In [10]:
import numpy as np
import pandas as pd
import requests
from cryptography.fernet import Fernet
import logging

# Setup advanced logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def download_and_save_csv(url, filename):
    """Download a CSV file from a URL and save it to a local file."""
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raises an HTTPError for bad responses
        with open(filename, 'wb') as f:
            f.write(response.content)
        logging.info(f"File downloaded and saved as {filename}")
    except requests.exceptions.RequestException as e:
        logging.error(f"Failed to download the file: {e}")
        return False
    return True

def generate_key():
    """Generate and return a Fernet encryption key."""
    return Fernet.generate_key()

def encrypt_data(data, cipher_suite):
    """ Encrypt data """
    return cipher_suite.encrypt(data.encode())

def decrypt_data(encrypted_data, cipher_suite):
    """ Decrypt data """
    return cipher_suite.decrypt(encrypted_data).decode()

def secure_mdav(data, k, cipher_suite):
    """Perform MDAV algorithm on encrypted data"""
    try:
        groups = {}
        while len(data) > k:
            centroid = data.mean().values
            distances = np.linalg.norm(data.values - centroid, axis=1)
            distances = pd.Series(distances, index=data.index)
            if distances.isna().all():
                logging.error("All distances are NaN, likely due to improper data normalization or input data issues.")
                return {}
            distances.fillna(distances.max(), inplace=True)
            furthest_index = distances.idxmax()
            furthest_point = data.loc[furthest_index]
            distances = np.linalg.norm(data.values - furthest_point.values, axis=1)
            distances = pd.Series(distances, index=data.index)
            nearest_indices = distances.nsmallest(k).index
            group = data.loc[nearest_indices]
            encrypted_group = [encrypt_data(row.to_json(), cipher_suite) for index, row in group.iterrows()]
            groups[k] = encrypted_group
            data = data.drop(nearest_indices)
            data.reset_index(drop=True, inplace=True)
        if len(data) > 0:
            groups[k] = [encrypt_data(row.to_json(), cipher_suite) for index, row in data.iterrows()]
        return groups
    except Exception as e:
        logging.error(f"Error during secure MDAV execution: {str(e)}")
        return {}

def main():
    url = 'https://raw.githubusercontent.com/raccamateo/PP_MDAV_/main/PP_airline_passenger_satisfaction.csv'
    local_file = '/Users/usernamemateo/Downloads/PP_airline_passengers_satisfaction.csv'
    
    if download_and_save_csv(url, local_file):
        data = pd.read_csv(local_file)
        numeric_columns = data.select_dtypes(include=[np.number]).columns
        data[numeric_columns] = (data[numeric_columns] - data[numeric_columns].min()) / (data[numeric_columns].max() - data[numeric_columns].min())
        
        k_values = [3, 5, 10]
        results = {}
        for k in k_values:
            key = generate_key()
            cipher_suite = Fernet(key)
            mdav_results = secure_mdav(data[numeric_columns], k, cipher_suite)
            if mdav_results:
                decrypted_group = [decrypt_data(encrypted, cipher_suite) for encrypted in mdav_results[k]]
                results[k] = decrypted_group
                result_df = pd.DataFrame(decrypted_group)
                result_df.to_csv(f'/Users/usernamemateo/Downloads/MDAV_Results_k{k}.csv', index=False)
                print(f"MDAV Results for k={k}: Saved to CSV")
            else:
                print(f"Failed to process MDAV for k={k}.")

    else:
        print("Failed to download the data file.")

if __name__ == "__main__":
    main()


2024-05-31 23:02:11,168 - INFO - File downloaded and saved as /Users/usernamemateo/Downloads/PP_airline_passengers_satisfaction.csv


MDAV Results for k=3: Saved to CSV
MDAV Results for k=5: Saved to CSV
MDAV Results for k=10: Saved to CSV
