In [112]:
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis


def build_confusion_matrix(dataset, model):
        X_train, X_test, y_train, y_test = train_test_split(dataset.iloc[:, :-1], dataset.iloc[:, -1], test_size=0.25)
        predictions = model.fit(X_train, y_train).predict(X_test)
        tn, fn, fp, tp = confusion_matrix(predictions, y_test.values).ravel()
        return {'true_negative': tn, 'false_negative': fn, 'false_positive': fp, 'true_positive': tp}

def sensitivity(cm):
    return cm['true_positive'] / (cm['true_positive'] + cm['false_negative'])

def specificity(cm):
    return cm['true_negative'] / ( cm['true_negative'] + cm['false_positive'] )

def false_positive_rate(cm):
    return (1.0 - (specificity(cm)) )

def generate_scores(dataset, model):
    specificity_scores = []
    sensitivity_scores = []

    for n in range(1000):
        cm = build_confusion_matrix(dataset, model)
        specificity_scores.append(specificity(cm))
        sensitivity_scores.append(sensitivity(cm))
    
    spec_avg = pd.Series(specificity_scores).mean()
    sens_avg = pd.Series(sensitivity_scores).mean()
    
    return {'specificity': spec_avg, 'sensitivity': sens_avg }

def balance_data_set(dataset, classifier):
    sigfall_indexes = dataset.index[dataset[classifier] == 0].to_list()
    n_falls = len(dataset) - len(sigfall_indexes)
    drop = len(sigfall_indexes) - n_falls
    random.shuffle(sigfall_indexes)
    drop_indexes = sigfall_indexes[0:drop]
    return dataset.drop(drop_indexes)


In [114]:
# Load data sets
inc_updrs_sigfall = pd.read_csv('./working_data/normalised_increase_updrs_sigfall.csv')
inc_updrs_sigfall_b = balance_data_set(inc_updrs_sigfall, 'SIGFALL')
inc_updrs_sigfall_raw = pd.read_csv('./working_data/normalised_increase_updrs_sigfall_raw.csv')

datasets = [inc_updrs_sigfall, inc_updrs_sigfall_b, inc_updrs_sigfall_raw]

In [120]:
# Define models and establish tests
models = [
    LinearDiscriminantAnalysis(solver="svd"),
    LinearDiscriminantAnalysis(solver="lsqr"),
    LinearDiscriminantAnalysis(solver="eigen")
]

tests = []
for m in models:
    for d in datasets:
        tests.append({
            'dataset': d,
            'model': m
        })

In [121]:
for t in tests:
    scores = generate_scores(dataset=t['dataset'], model=t['model'])
    t['specificity'] = scores['specificity']
    t['sensitivity'] = scores['sensitivity']

In [122]:

tests

[{'dataset':       I_NP2SPCH  I_NP2SALV  I_NP2SWAL  I_NP2EAT  I_NP2DRES  I_NP2HYGN  \
  0          0.00        0.0       0.00      0.00        0.0       0.00   
  1          0.00        0.0       0.00      0.00        0.0       0.00   
  2          0.00        0.0       0.00      0.25        0.5       0.25   
  3          0.25        0.0       0.25      0.00        0.0       0.00   
  4          0.00        0.0       0.00      0.25        0.0       0.00   
  ...         ...        ...        ...       ...        ...        ...   
  1620       0.00        0.0       0.00      0.00        0.0       0.00   
  1621       0.00        0.0       0.00      0.00        0.0       0.00   
  1622       0.00        0.0       0.00      0.00        0.0       0.00   
  1623       0.00        0.0       0.00      0.00        0.0       0.00   
  1624       0.00        0.0       0.00      0.00        0.0       0.00   
  
        I_NP2HWRT  I_NP2HOBB  I_NP2TURN  I_NP2TRMR  I_NP2RISE  I_NP2WALK  \
  0       