# Utility Evaluation
This notebook presents the reproducible results for the effects of fingerprintg on the ML performance. 
We use classification task and a range of different classifiers to evaluate the utility.

1. Adult Census dataset
    - 1.1. Baseline performance    
    - 1.2. Demo utility evaluation process
    - 1.3. Full utility evaluation
2. German Credit dataset
    - 2.1. Baseline performance
    - 2.2. Demo utility evaluation process
    - 2.3. Full utility evaluation


In [1]:
from sklearn import metrics, preprocessing, model_selection
import pandas as pd
import os
import json
from matplotlib import pyplot as plt
import random
import scipy.stats as stats
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.neural_network import MLPClassifier

In [3]:
import warnings
warnings.filterwarnings('ignore')
os.chdir('C:/Users/tsarcevic/PycharmProjects/fingerprinting-toolbox')

In [4]:
from datasets import Adult, GermanCredit, Dataset
from scheme import Universal
from utils import fp_cross_val_score

## 1. Adult census dataset
### 1.1. Baseline performance
Original baseline accuracy on clean dataset

In [5]:
original_data = Adult()
# cleaning the data 
original_data.dropna()

<datasets._dataset.Adult at 0x290040166e0>

In [6]:
# encode categorical features and drop redundant 
original_data.number_encode_categorical()
original_data = original_data.drop(['fnlwgt','education'], axis=1)

In [7]:
# define target attribute
X = original_data.get_features()
y = original_data.get_target()

# scale features
scaler = preprocessing.StandardScaler()
X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns) #, index=X.index)
X.shape

(45222, 12)

In [8]:
classifiers = [GradientBoostingClassifier(), LinearSVC(), MLPClassifier(), RandomForestClassifier(), LogisticRegression()]

In [10]:
results_adult = []
# columns = ['classifier', 'gamma', 'accuracy_mean', 'accuracy_std', 'f1_mean', 'f1_std'] 
# gamma -> 0 for original data

In [None]:
# cross validation
for clf in classifiers:
    scores = model_selection.cross_validate(clf, X, y, scoring = ['accuracy', 'f1'], cv=10)
    print(clf)
    print("\tAccuracy: %0.3f (+/- %0.3f)\n\tF1 score: %0.3f (+/- %0.3f)" 
          % (scores['test_accuracy'].mean(), scores['test_accuracy'].std() * 2,
            scores['test_f1'].mean(), scores['test_f1'].std() * 2))
    
    # save scores
    results_adult.append([clf, 0, 
                      scores['test_accuracy'].mean(), scores['test_accuracy'].std(),
                      scores['test_f1'].mean(), scores['test_f1'].std()])
    

GradientBoostingClassifier()
Accuracy: 0.861 (+/- 0.008)
F1 score: 0.683 (+/- 0.019)
LinearSVC()
Accuracy: 0.820 (+/- 0.005)
F1 score: 0.538 (+/- 0.018)


### 1.2. Demo utility effects evaluation process

In [None]:
# define fingerprinting scheme
scheme = Universal(gamma=1, fingerprint_bit_length=64)
fp_dataset = scheme.insertion(original_data, secret_key=4370315727, recipient_id=0)

In [None]:
# sanity check fingerprint detection
suspect = scheme.detection(fp_dataset, secret_key=4370315727)

In [None]:
# reproduce preprocessing of the original dataset
X_fp = fp_dataset.get_features()
y_fp = fp_dataset.get_target()

scaler = preprocessing.StandardScaler()
X_fp = pd.DataFrame(scaler.fit_transform(X_fp), columns=X_fp.columns) #, index=X_fp.index)
X_fp.shape

In [None]:
fp_scores = fp_cross_val_score(clf, X, y, X_fp, y_fp, cv=10, scoring = ['accuracy', 'f1'])
print("Accuracy: %0.3f (+/- %0.3f)\nF1 score: %0.3f (+/- %0.3f)" 
      % (fp_scores['test_accuracy'].mean(), fp_scores['test_accuracy'].std() * 2,
        fp_scores['test_f1'].mean(), fp_scores['test_f1'].std() * 2))

### 1.3. Full evaluation  

1. Define gammas
2. Define classifiers

In [None]:
gammas = [1, 1.5, 2]
#classifiers = [GradientBoostingClassifier(), LinearSVC(), MLPClassifier(), RandomForestClassifier(), LogisticRegression()]
secret_key = 4370315727

In [None]:
for g in gammas:
    # fingerprint
    scheme = Universal(gamma=g, fingerprint_bit_length=64)
    fp_dataset = scheme.insertion(original_data, secret_key=secret_key, recipient_id=0)
    # split
    X_fp = fp_dataset.get_features()
    y_fp = fp_dataset.get_target()
    # scale
    X_fp = pd.DataFrame(scaler.fit_transform(X_fp), columns=X_fp.columns) #, index=X_fp.index)
    
    # score
    for clf in classifiers:
        print(clf)
        fp_scores = fp_cross_val_score(clf, X, y, X_fp, y_fp, cv=10, scoring = ['accuracy', 'f1'])
        print("Accuracy: %0.3f (+/- %0.3f)\nF1 score: %0.3f (+/- %0.3f)" 
              % (fp_scores['test_accuracy'].mean(), fp_scores['test_accuracy'].std() * 2,
              fp_scores['test_f1'].mean(), fp_scores['test_f1'].std() * 2))
        # save scores
        results_adult.append([clf, g, 
                              fp_scores['test_accuracy'].mean(), fp_scores['test_accuracy'].std(),
                              fp_scores['test_f1'].mean(), fp_scores['test_f1'].std()])

In [None]:
results_adult = pd.DataFrame(results_adult,
                             columns=['classifier', 'gamma', 'accuracy_mean', 'accuracy_std', 'f1_mean', 'f1_std'])

In [None]:
results_adult.to_csv('ML_utility_results.csv')

## 2. German Credit data