In [28]:
import os
import pandas as pd
import numpy as np


from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics.pairwise import haversine_distances
from sklearn.metrics import accuracy_score


import matplotlib.pyplot as plt


In [10]:
def process_directory(file_directory, use_alternate_columns = False, strategy_ln = True, weights_f = 'uniform', metrics = 'manhattan', num_runs = 1):

    all_files = os.listdir(file_directory)
    csv_files = [file for file in all_files if file.endswith('.csv')]
    #csv_files.sort(0)

    bayes_risk = [0] * len(csv_files)

    for _ in range(num_runs):

        for idx, file_name in enumerate(csv_files):

            file_path = os.path.join(file_directory, file_name)
            data = pd.read_csv(file_path)

            if use_alternate_columns:
                observations = data[['reported_lat', 'reported_lon']]
            else:
                observations = data[['perturbed_latitude', 'perturbed_longitude']]

            secrets = data['location_id']

            #L = secrets.nunique() #########3 This will be for uniform priors
            #Rangom_guessing = (L-1)/L
            #### for non uniform priors  Rangom_guessing = 1 - max(P(secrets))

            X_train, X_test, y_train, y_test = train_test_split(observations, secrets, test_size=0.20)
            
            if strategy_ln:
                k = round(np.log(len(X_train)))
            else:
                k = round(np.log10(len(X_train)))
            #weights_f = 'distance', 'uniform'
            #metrics = manhattan, ‘euclidean’, ‘haversine’, ‘cosine’
            knn = KNeighborsClassifier(n_neighbors=k, weights = weights_f, metric=metrics, n_jobs=-1)    

            knn.fit(X_train, y_train)

            predicted_location = knn.predict(X_test)

            bayes_risk[idx] += (1 - accuracy_score(y_test, predicted_location))

    bayesRisk_avg = [x / num_runs for x in bayes_risk]

    return bayesRisk_avg

In [13]:
file_directory = r'C:\Users\ss6365\Desktop\location_privacy_final\collected\machine_learning\attack1\laplace\400'

In [14]:
d=process_directory(file_directory, use_alternate_columns = False, strategy_ln = True, weights_f = 'uniform', metrics = 'manhattan', num_runs = 1)
d

[0.43815967523680655,
 0.27731168245376636,
 0.16986919260261613,
 0.14000902119981962,
 0.12214704555705913,
 0.11078033378439334,
 0.11150202976995938,
 0.11375732972485342]

In [15]:
d=process_directory(file_directory, use_alternate_columns = False, strategy_ln = False, weights_f = 'uniform', metrics = 'manhattan', num_runs = 1)
d

[0.44808299503834015,
 0.2694632386107352,
 0.13946774921064498,
 0.08967072620658545,
 0.07153811456923775,
 0.06838069463238605,
 0.0625169147496617,
 0.06567433468651329]

In [23]:
d=process_directory(file_directory, use_alternate_columns = False, strategy_ln = True, weights_f = 'distance', metrics ='manhattan', num_runs = 1)
d

[0.43509246729815065,
 0.2553901668921966,
 0.11935047361299056,
 0.06152458276950834,
 0.03391971132160576,
 0.02733423545331526,
 0.0262516914749662,
 0.018764095624718036]

In [24]:
d=process_directory(file_directory, use_alternate_columns = False, strategy_ln = True, weights_f = 'uniform', metrics ='manhattan', num_runs = 1)
d

[0.4310329273793414,
 0.28362652232746954,
 0.17185385656292285,
 0.13676138926477222,
 0.1200721695985566,
 0.11907983761840324,
 0.11330626973387459,
 0.11204330175913402]

In [26]:
d=process_directory(file_directory, use_alternate_columns = False, strategy_ln = False, weights_f = 'uniform', metrics ='cosine', num_runs = 1)
d

[0.7964817320703653,
 0.7079837618403247,
 0.6316644113667118,
 0.595489400090212,
 0.5599458728010825,
 0.5351375732972485,
 0.5175462336490753,
 0.5065403698691926]

In [27]:
display_pdf

NameError: name 'display_pdf' is not defined