# Tester for the Sebastien

** Please use python3 to run this code and install all packages that the classifier depends on**

In [None]:
__authoroftheclassifier__ = 'Sebastien Levy'
__authorofthetesterfile__ = 'Khaled Jedoui'

from processing import ADOS_Data
from cross_validation import CVP_Set
from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge
from sklearn.svm import LinearSVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from classifiers import RegClassifier, BinClassifier
from sklearn.metrics import roc_auc_score, average_precision_score, classification_report, confusion_matrix, precision_recall_curve, auc

import matplotlib.pyplot as plt
import pandas as pd

In [None]:
def test_func(pred_score, predlabels):
    y_true = predlabels
    print('Confusion matrix:')
    cm = confusion_matrix([1-x for x in y_true], [1-int(x > 0.5) for x in pred_score])
    print(cm)
    print('Precision: {}'.format(float(cm[0][0])/(cm[0][0]+cm[1][0])))
    print('Recall/Sensitivity: {}'.format(float(cm[0][0])/(cm[0][0]+cm[0][1])))
    print('Specificity: {}'.format(float(cm[1][1])/(cm[1][1]+cm[1][0])))
    print ('class report')
    print (classification_report([1-x for x in y_true], [1-int(x > 0.5) for x in pred_score]))

# M3 training

In [None]:
MODULE = 'm3'
FEATURE_SET = ['A2', 'A4', 'A8', 'B3_miss', 'B2', 'B7', 'B8', 'D4', 'D3_miss', 'male']
N_FOLD = 10
PRED_RATIO = 0.2
SCALING_PARAM = 4
# Binary or Replacement
MISSING_VALUE_STRATEGY = 'Binary'
# poly, linear, indicator, interaction_ind, pca_comp
PROCESSING_STRATEGY = 'linear'

POLY_DEGREE = 2
NORMALIZE = True

ADOS_FILE = MODULE+"/data/ados_"+MODULE+"_allData.csv"
label_id = "ASD"
label_age = "age_months"
label_gender = "male"
columns_to_delete = ["Subject.Id", "Diagnosis"]
sub_diagnosis_id = ["social_affect_calc","restricted_repetitive_calc","SA_RRI_total_calc","severity_calc"]

In [None]:
# We import the data
data = ADOS_Data.read_csv(ADOS_FILE)
sub_diagnosis = data[sub_diagnosis_id]

# We drop the columns that are not interesting for us, and the row with no label
data.select_good_columns(columns_to_delete+sub_diagnosis_id)

print('gendering')
print((data[data['ASD'] == 1][['male']]).sum())

data.full_preprocessing(NORMALIZE, MISSING_VALUE_STRATEGY, PROCESSING_STRATEGY, [label_age], label_gender, label_id)
if FEATURE_SET != []:
    data.select_good_columns(FEATURE_SET, keep_the_column=True)
    
cv_set = CVP_Set(data, data.labels, N_FOLD, PRED_RATIO)

## LR training

In [None]:
lr = BinClassifier(proc=LogisticRegression(C=1, penalty='l2',class_weight='balanced'), severity=False)
lr.fit(cv_set.cv_feat, cv_set.cv_labels)

## SVM training

In [None]:
svc = BinClassifier(proc = LinearSVC(penalty = 'l1', dual = False, C = .5), severity = False)
svc.fit(cv_set.cv_feat, cv_set.cv_labels)

## M3 testing

** For the m3 testing: **
 - Note that there are 3 datasets:
     - for_new_classifier_19.csv : Where B8 is question 19.
     - for_new_classifier_21.csv : Where B8 is question 21.
     - for_new_classifier_22.csv : Where B8 is question 22.
     - for_new_classifier_average.csv : Where B8 is the average (rounded) for questions 19, 21 and 22.

In [None]:
FILENAME = "M3_videos_LR3.csv"
columns_to_delete = ['child_id', 'scorer_id', 'video_file']
pred_feat = ADOS_Data.read_csv(FILENAME)
ytrue = pred_feat["ASD"]
# We drop the columns that are not interesting for us, and the row with no label
pred_feat.select_good_columns(columns_to_delete)

pred_feat.full_preprocessing(NORMALIZE, MISSING_VALUE_STRATEGY, PROCESSING_STRATEGY, [label_age], label_gender, label_id)
FEATURE_SET = ['A2', 'A4', 'A8', 'B3', 'B2', 'B7', 'B8', 'D4', 'D3', 'male']
if FEATURE_SET != []:
    pred_feat.select_good_columns(FEATURE_SET, keep_the_column=True)

### Logistic regression

In [None]:
lr_score = [x[1] for x in lr.predict_proba(pred_feat)]
test_func(list(lr.predict(pred_feat)), ytrue)

In [None]:
print("---Printing probabilities---")
LogisticProbabilities = (lr.predict_proba(pred_feat))
for line in LogisticProbabilities:
    print(line)
print("---End of Printing Probabilities---")

In [None]:
print(list(lr.predict(pred_feat)))

In [None]:
l = list(zip(LogisticProbabilities, list(lr.predict(pred_feat))))
with open('M3_videos_LR_L2_3.csv', 'w') as f:
    for i in range(len(l)):
        f.write(str(l[i])+'\n')

### SVM

In [None]:
test_func(list(svc.predict(pred_feat)), ytrue)

In [None]:
print("---Printing probabilities---")
LogisticProbabilities = (svc.predict_proba(pred_feat))
for line in LogisticProbabilities:
    print(line)
print("---End of Printing Probabilities---")

In [None]:
print(list(svc.predict(pred_feat)))

In [None]:
l = list(zip(LogisticProbabilities, list(svc.predict(pred_feat))))
with open('M3_videos_SVM_3.csv', 'w') as f:
    for i in range(len(l)):
        f.write(str(l[i])+'\n')

# M2 training

In [None]:
MODULE = 'm2'

FEATURE_SET = ['A3', 'A5', 'B1', 'B2', 'B10']

N_FOLD = 10
PRED_RATIO = 0.2
SCALING_PARAM = 4
# Binary or Replacement
MISSING_VALUE_STRATEGY = 'Binary'
# poly, linear, indicator, interaction_ind, pca_comp
PROCESSING_STRATEGY = 'linear'

POLY_DEGREE = 2
NORMALIZE = True

ADOS_FILE = MODULE+"/data/ados_"+MODULE+"_allData.csv"
label_id = "ASD"
label_age = "age_months"
label_gender = "male"
columns_to_delete = ["Subject.Id", "Diagnosis"]
sub_diagnosis_id = ["social_affect_calc","restricted_repetitive_calc","SA_RRI_total_calc","severity_calc"]

In [None]:
# We import the data
data = ADOS_Data.read_csv(ADOS_FILE)
sub_diagnosis = data[sub_diagnosis_id]

# We drop the columns that are not interesting for us, and the row with no label
data.select_good_columns(columns_to_delete+sub_diagnosis_id)

print('gendering')
print((data[data['ASD'] == 1][['male']]).sum())

data.full_preprocessing(NORMALIZE, MISSING_VALUE_STRATEGY, PROCESSING_STRATEGY, [label_age], label_gender, label_id)
if FEATURE_SET != []:
    data.select_good_columns(FEATURE_SET, keep_the_column=True)
    
cv_set = CVP_Set(data, data.labels, N_FOLD, PRED_RATIO)

## LDA Training 

In [None]:
ld = BinClassifier(proc=LDA(shrinkage=0.8, priors=(0.029,0.931), solver="lsqr"), severity=False)
ld.fit(cv_set.cv_feat, cv_set.cv_labels)
ld_score = [x[1] for x in ld.predict_proba(cv_set.pred_feat)]

## LR training

In [None]:
lr = BinClassifier(proc=LogisticRegression(C=0.05, penalty='l2',class_weight='balanced'), severity=False)
lr.fit(cv_set.cv_feat, cv_set.cv_labels)

## SVM training

In [None]:
svc = BinClassifier(proc = LinearSVC(penalty = 'l1', dual = False, C = .5), severity = False)
svc.fit(cv_set.cv_feat, cv_set.cv_labels)

# M2 testing

In [None]:
FILENAME = "M3_videos_LR2.csv"
columns_to_delete = ['child_id','scorer_id','video_file'
]
pred_feat = ADOS_Data.read_csv(FILENAME)
ytrue = pred_feat["ASD"]
# We drop the columns that are not interesting for us, and the row with no label
pred_feat.select_good_columns(columns_to_delete)

pred_feat.full_preprocessing(NORMALIZE, MISSING_VALUE_STRATEGY, PROCESSING_STRATEGY, [label_age], label_gender, label_id)
if FEATURE_SET != []:
    pred_feat.select_good_columns(FEATURE_SET, keep_the_column=True)

## Logistic Regression

In [None]:
test_func(list(lr.predict(pred_feat)), ytrue)

In [None]:
print("---Printing probabilities---")
LogisticProbabilities = (lr.predict_proba(pred_feat))
for line in LogisticProbabilities:
    print(line)
print("---End of Printing Probabilities---")

In [None]:
print(list(lr.predict(pred_feat)))

In [None]:
l = list(zip(LogisticProbabilities, list(lr.predict(pred_feat))))
with open('M3_videos_LR_L2_2.csv', 'w') as f:
    for i in range(len(l)):
        f.write(str(l[i])+'\n')

## SVM

In [None]:
test_func(list(svc.predict(pred_feat)), ytrue)

In [None]:
print("---Printing probabilities---")
LogisticProbabilities = (svc.predict_proba(pred_feat))
for line in LogisticProbabilities:
    print(line)
print("---End of Printing Probabilities---")

In [None]:
print(list(svc.predict(pred_feat)))

In [None]:
l = list(zip(LogisticProbabilities, list(svc.predict(pred_feat))))
with open('M3_videos_SVM_2.csv', 'w') as f:
    for i in range(len(l)):
        f.write(str(l[i])+'\n')

## LDA

In [None]:
test_func(list(ld.predict(pred_feat)), ytrue)

In [None]:
print("---Printing probabilities---")
LogisticProbabilities = (ld.predict_proba(pred_feat))
for line in LogisticProbabilities:
    print(line)
print("---End of Printing Probabilities---")

In [None]:
print(list(ld.predict(pred_feat)))