In [13]:
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis


def build_confusion_matrix(dataset, model):
        X_train, X_test, y_train, y_test = train_test_split(dataset.iloc[:, :-1], dataset.iloc[:, -1], test_size=0.25)
        predictions = model.fit(X_train, y_train).predict(X_test)
        tn, fn, fp, tp = confusion_matrix(predictions, y_test.values).ravel()
        return {'true_negative': tn, 'false_negative': fn, 'false_positive': fp, 'true_positive': tp}

def sensitivity(cm):
    return cm['true_positive'] / (cm['true_positive'] + cm['false_negative'])

def specificity(cm):
    return cm['true_negative'] / ( cm['true_negative'] + cm['false_positive'] )

def false_positive_rate(cm):
    return (1.0 - (specificity(cm)) )

def generate_scores(dataset, model):
    specificity_scores = []
    sensitivity_scores = []

    for n in range(1000):
        cm = build_confusion_matrix(dataset, model)
        specificity_scores.append(specificity(cm))
        sensitivity_scores.append(sensitivity(cm))
    
    spec_avg = pd.Series(specificity_scores).mean()
    sens_avg = pd.Series(sensitivity_scores).mean()
    
    return {'specificity': spec_avg, 'sensitivity': sens_avg }

def summarise_test(test):
    x ='For {}, in model {}\nAverage NO SIGFALL accuracy is {}\nAverage SIGFALL accuracy is {}\n---\n'
    print(x.format(test['data_name'],test['model'],test['specificity'],test['sensitivity']))    
    
def balance_data_set(dataset, classifier):
    sigfall_indexes = dataset.index[dataset[classifier] == 0].to_list()
    n_falls = len(dataset) - len(sigfall_indexes)
    drop = len(sigfall_indexes) - n_falls
    random.shuffle(sigfall_indexes)
    drop_indexes = sigfall_indexes[0:drop]
    return dataset.drop(drop_indexes)

# Balance_data_generate_scores() is a seperate function to invoke a new randomly balanced dataset in each iteration.
# If a dataset was pre-balanced then run through generate_scores() it would overfit to that specific dataset 

def balance_data_generate_scores(dataset, classifier, model):
    specificity_scores = []
    sensitivity_scores = []

    for n in range(1000):
        b_dataset = balance_data_set(dataset, classifier)
        cm = build_confusion_matrix(b_dataset, model)
        specificity_scores.append(specificity(cm))
        sensitivity_scores.append(sensitivity(cm))
    
    spec_avg = pd.Series(specificity_scores).mean()
    sens_avg = pd.Series(sensitivity_scores).mean()
    
    return {'specificity': spec_avg, 'sensitivity': sens_avg }

In [16]:
# Load data sets
inc_updrs_sigfall = pd.read_csv('./working_data/normalised_increase_updrs_sigfall.csv')
inc_updrs_sigfall_raw = pd.read_csv('./working_data/increase_updrs_sigfall_raw.csv')
delta_updrs_sigfall = pd.read_csv('./working_data/normalised_delta_updrs_sigfall.csv')
delta_updrs_sigfall_raw = pd.read_csv('./working_data/delta_updrs_sigfall_raw.csv')
updrs_future_sigfall = pd.read_csv('./working_data/normalised_updrs_future_sigfall.csv')
updrs_future_sigfall_raw = pd.read_csv('./working_data/updrs_future_sigfall_raw.csv')

datasets = [{'data':inc_updrs_sigfall, 'name':'inc_updrs_sigfall'},
            {'data':inc_updrs_sigfall_raw, 'name':'inc_updrs_sigfall_raw'},
            {'data':delta_updrs_sigfall , 'name':'delta_updrs_sigfall'},
            {'data':delta_updrs_sigfall_raw , 'name':'delta_updrs_sigfall_raw'},
            {'data':updrs_future_sigfall , 'name':'updrs_future_sigfall'},
            {'data':updrs_future_sigfall_raw , 'name':'updrs_future_sigfall_raw'}
           ]

In [17]:
# Run tests to score each model & dataset combination

models = [QuadraticDiscriminantAnalysis(store_covariance=True)]

tests = []
for m in models:
    for d in datasets:
        tests.append({
            'data_name': d['name'],
            'dataset': d['data'],
            'model': m
        })

for t in tests:
    s = generate_scores(dataset=t['dataset'], model=t['model'])
    t['specificity'] = s['specificity']
    t['sensitivity'] = s['sensitivity']
    summarise_test(t)

For inc_updrs_sigfall, in model QuadraticDiscriminantAnalysis(store_covariance=True)
Average NO SIGFALL accuracy is 0.8489229265670296
Average SIGFALL accuracy is 0.39507019831690754
---

For inc_updrs_sigfall_raw, in model QuadraticDiscriminantAnalysis(store_covariance=True)
Average NO SIGFALL accuracy is 0.8562050715526961
Average SIGFALL accuracy is 0.3778802218838517
---

For delta_updrs_sigfall, in model QuadraticDiscriminantAnalysis(store_covariance=True)
Average NO SIGFALL accuracy is 0.8553113122457988
Average SIGFALL accuracy is 0.30775746840785906
---

For delta_updrs_sigfall_raw, in model QuadraticDiscriminantAnalysis(store_covariance=True)
Average NO SIGFALL accuracy is 0.861310400859974
Average SIGFALL accuracy is 0.292041472155738
---





































For updrs_future_sigfall, in model QuadraticDiscriminantAnalysis(store_covariance=True)
Average NO SIGFALL accuracy is 0.8648292906412027
Average SIGFALL accuracy is 0.3203814063783094
---

For updrs_future_sigfall_raw, in model QuadraticDiscriminantAnalysis(store_covariance=True)
Average NO SIGFALL accuracy is 0.8969379749652953
Average SIGFALL accuracy is 0.28349421535055125
---



In [8]:
# # Run tests to score each model with the randomly balanced version of the datasets

# balanced_datasets = [{'data':inc_updrs_sigfall, 'name':'balanced_inc_updrs_sigfall', 'classifier':'SIGFALL'}, 
#                      {'data':inc_updrs_sigfall_raw, 'name':'balanced_inc_updrs_sigfall_raw', 'classifier':'SIGFALL'}]

# b_tests = []
# for m in models:
#     for d in balanced_datasets:
#         b_tests.append({
#             'data_name': d['name'],
#             'dataset': d['data'],
#             'classifier': d['classifier'],
#             'model': m
#         })

# for t in b_tests:
#     s = balance_data_generate_scores(dataset=t['dataset'], classifier=d['classifier'], model=t['model'])
#     t['specificity'] = s['specificity']
#     t['sensitivity'] = s['sensitivity']
#     summarise_test(t)       



Unnamed: 0,I_NP2SPCH,I_NP2SALV,I_NP2SWAL,I_NP2EAT,I_NP2DRES,I_NP2HYGN,I_NP2HWRT,I_NP2HOBB,I_NP2TURN,I_NP2TRMR,I_NP2RISE,I_NP2WALK,I_NP2FREZ,I_NP2PTOT,SIGFALL
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.25,0.5,0.25,0.0,0.25,0.25,0.25,0.0,0.0,0.0,0.134615,0.0
3,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.057692,1.0
4,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.019231,0.0
