# Tester for the Sebastien

** Please use python3 to run this code and install all packages that the classifier depends on**

In [1]:
__authoroftheclassifier__ = 'Sebastien Levy'
__authorofthetesterfile__ = 'Khaled Jedoui'

from processing import ADOS_Data
from cross_validation import CVP_Set
from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge
from sklearn.svm import LinearSVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from classifiers import RegClassifier, BinClassifier
from sklearn.metrics import roc_auc_score, average_precision_score, classification_report, confusion_matrix, precision_recall_curve, auc

import matplotlib.pyplot as plt
import pandas as pd



In [2]:
def test_func(pred_score, predlabels):
    y_true = predlabels
    print('Confusion matrix:')
    cm = confusion_matrix([1-x for x in y_true], [1-int(x > 0.5) for x in pred_score])
    print(cm)
    print('Precision: {}'.format(float(cm[0][0])/(cm[0][0]+cm[1][0])))
    print('Recall/Sensitivity: {}'.format(float(cm[0][0])/(cm[0][0]+cm[0][1])))
    print('Specificity: {}'.format(float(cm[1][1])/(cm[1][1]+cm[1][0])))
    print ('class report')
    print (classification_report([1-x for x in y_true], [1-int(x > 0.5) for x in pred_score]))

# M3 training

In [3]:
MODULE = 'm3'
FEATURE_SET = ['A2', 'A4', 'A8', 'B3_miss', 'B2', 'B7', 'B8', 'D4', 'D3_miss', 'male']
N_FOLD = 10
PRED_RATIO = 0.2
SCALING_PARAM = 4
# Binary or Replacement
MISSING_VALUE_STRATEGY = 'Binary'
# poly, linear, indicator, interaction_ind, pca_comp
PROCESSING_STRATEGY = 'linear'

POLY_DEGREE = 2
NORMALIZE = True

ADOS_FILE = MODULE+"/data/ados_"+MODULE+"_allData.csv"
label_id = "ASD"
label_age = "age_months"
label_gender = "male"
columns_to_delete = ["Subject.Id", "Diagnosis"]
sub_diagnosis_id = ["social_affect_calc","restricted_repetitive_calc","SA_RRI_total_calc","severity_calc"]

In [4]:
# We import the data
data = ADOS_Data.read_csv(ADOS_FILE)
sub_diagnosis = data[sub_diagnosis_id]

# We drop the columns that are not interesting for us, and the row with no label
data.select_good_columns(columns_to_delete+sub_diagnosis_id)

print('gendering')
print((data[data['ASD'] == 1][['male']]).sum())

data.full_preprocessing(NORMALIZE, MISSING_VALUE_STRATEGY, PROCESSING_STRATEGY, [label_age], label_gender, label_id)
if FEATURE_SET != []:
    data.select_good_columns(FEATURE_SET, keep_the_column=True)
    
cv_set = CVP_Set(data, data.labels, N_FOLD, PRED_RATIO)

hello!
      A1  A2  A3  A4   A5  A6  A7  A8  A9  B1 ...   D2  D3  D4   D5   E1   E2  \
0      1   0   0   1  0.0   2   0   1   0   0 ...    1   8   0  1.0  0.0  1.0   
1      0   2   0   2  0.0   1   1   1   0   0 ...    1   8   0  2.0  0.0  0.0   
2      1   1   0   1  0.0   0   0   1   0   2 ...    2   8   0  2.0  1.0  0.0   
3      1   2   1   2  0.0   3   2   2   0   2 ...    2   8   0  2.0  1.0  0.0   
4      0   1   0   1  1.0   1   2   1   0   2 ...    0   8   0  2.0  1.0  0.0   
5      0   2   0   2  1.0   1   1   2   1   2 ...    2   8   0  2.0  1.0  0.0   
6      0   0   0   1  0.0   0   2   0   0   0 ...    0   8   0  2.0  1.0  0.0   
7      1   1   0   2  0.0   1   0   1   0   2 ...    0   8   0  2.0  0.0  0.0   
8      1   2   0   2  0.0   2   1   1   0   2 ...    1   8   0  2.0  1.0  0.0   
9      1   1   0   1  0.0   0   1   1   0   2 ...    0   8   0  0.0  0.0  0.0   
10     1   2   0   1  0.0   0   0   1   0   2 ...    1   8   0  3.0  0.0  1.0   
11     0   2   0   2 

## LR training

In [5]:
lr = BinClassifier(proc=LogisticRegression(C=1, penalty='l2',class_weight='balanced'), severity=False)
lr.fit(cv_set.cv_feat, cv_set.cv_labels)

## SVM training

In [6]:
svc = BinClassifier(proc = LinearSVC(penalty = 'l1', dual = False, C = .5), severity = False)
svc.fit(cv_set.cv_feat, cv_set.cv_labels)

## M3 testing

** For the m3 testing: **
 - Note that there are 3 datasets:
     - for_new_classifier_19.csv : Where B8 is question 19.
     - for_new_classifier_21.csv : Where B8 is question 21.
     - for_new_classifier_22.csv : Where B8 is question 22.
     - for_new_classifier_average.csv : Where B8 is the average (rounded) for questions 19, 21 and 22.

In [7]:
FILENAME = "M3_videos_LR3.csv"
columns_to_delete = ['child_id', 'scorer_id', 'video_file']
pred_feat = ADOS_Data.read_csv(FILENAME)
ytrue = pred_feat["ASD"]
# We drop the columns that are not interesting for us, and the row with no label
pred_feat.select_good_columns(columns_to_delete)

pred_feat.full_preprocessing(NORMALIZE, MISSING_VALUE_STRATEGY, PROCESSING_STRATEGY, [label_age], label_gender, label_id)
FEATURE_SET = ['A2', 'A4', 'A8', 'B3', 'B2', 'B7', 'B8', 'D4', 'D3', 'male']
if FEATURE_SET != []:
    pred_feat.select_good_columns(FEATURE_SET, keep_the_column=True)

hello!
     ASD          original_file  age_months  male  unscorable   startTime  \
0      0                  MB161    5.170000     1           0  1526756954   
1      0                  MB161    5.166667     1           0  1524711363   
2      0                  MB161    5.166667     1           0  1524759595   
3      0                  MB171    2.420000     0           0  1526757822   
4      0                  MB171    2.416667     0           0  1524259778   
5      0                  MB171    2.416667     0           0  1524247314   
6      1                  MB049    3.420000     0           0  1526762265   
7      1                  MB049    3.416667     0           0  1524629728   
8      1                  MB049    3.416667     0           0  1524508356   
9      0                  MB082    2.420000     1           0  1526763384   
10     0                  MB082    2.416667     1           0  1524629282   
11     0                  MB082    2.416667     1           0  152450

  result = result.union(other)
  result = result.union(other)
  index = _union_indexes(indexes)
  index = _union_indexes(indexes)


                       ASD  original_file  age_months  male  unscorable  \
MB140                  NaN            6.0         NaN   NaN         NaN   
MB047                  NaN            6.0         NaN   NaN         NaN   
MB004                  NaN            6.0         NaN   NaN         NaN   
MB042                  NaN            6.0         NaN   NaN         NaN   
MB161                  NaN            6.0         NaN   NaN         NaN   
MB010                  NaN            5.0         NaN   NaN         NaN   
newyoutube_NT_female1  NaN            4.0         NaN   NaN         NaN   
newyoutube_ASD_male1   NaN            4.0         NaN   NaN         NaN   
newyoutube_NT_female3  NaN            4.0         NaN   NaN         NaN   
newyoutube_NT_male1    NaN            4.0         NaN   NaN         NaN   
newyoutube_ASD_male2   NaN            4.0         NaN   NaN         NaN   
newyoutube_NT_female4  NaN            4.0         NaN   NaN         NaN   
newyoutube_ASD_male3   Na

### Logistic regression

In [8]:
lr_score = [x[1] for x in lr.predict_proba(pred_feat)]
test_func(list(lr.predict(pred_feat)), ytrue)

Confusion matrix:
[[94 10]
 [42 67]]
Precision: 0.6911764705882353
Recall/Sensitivity: 0.9038461538461539
Specificity: 0.6146788990825688
class report
             precision    recall  f1-score   support

          0       0.69      0.90      0.78       104
          1       0.87      0.61      0.72       109

avg / total       0.78      0.76      0.75       213



In [9]:
print("---Printing probabilities---")
LogisticProbabilities = (lr.predict_proba(pred_feat))
for line in LogisticProbabilities:
    print(line)
print("---End of Printing Probabilities---")

---Printing probabilities---
[0.97516396 0.02483604]
[0.70225485 0.29774515]
[0.97516396 0.02483604]
[0.07358682 0.92641318]
[0.0024001 0.9975999]
[0.00518025 0.99481975]
[0.04125871 0.95874129]
[0.08736494 0.91263506]
[0.21192338 0.78807662]
[0.00688276 0.99311724]
[0.13516881 0.86483119]
[0.86242769 0.13757231]
[0.97516396 0.02483604]
[0.91699779 0.08300221]
[0.06242816 0.93757184]
[0.0353423 0.9646577]
[0.0107047 0.9892953]
[0.5062352 0.4937648]
[0.83203789 0.16796211]
[0.02801782 0.97198218]
[0.19506574 0.80493426]
[5.74464880e-04 9.99425535e-01]
[0.09901758 0.90098242]
[0.25742025 0.74257975]
[0.90586703 0.09413297]
[0.97516396 0.02483604]
[0.97516396 0.02483604]
[3.17154856e-04 9.99682845e-01]
[0.05083182 0.94916818]
[0.00122693 0.99877307]
[4.29886731e-04 9.99570113e-01]
[0.00540468 0.99459532]
[0.02292693 0.97707307]
[3.59308900e-05 9.99964069e-01]
[0.00300951 0.99699049]
[0.13464923 0.86535077]
[0.85612719 0.14387281]
[0.98387895 0.01612105]
[0.9369435 0.0630565]
[0.7909918 0.

In [10]:
print(list(lr.predict(pred_feat)))

[0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1]


In [11]:
l = list(zip(LogisticProbabilities, list(lr.predict(pred_feat))))
with open('M3_videos_LR_L2_3.csv', 'w') as f:
    for i in range(len(l)):
        f.write(str(l[i])+'\n')

### SVM

In [12]:
test_func(list(svc.predict(pred_feat)), ytrue)

Confusion matrix:
[[101   3]
 [ 64  45]]
Precision: 0.6121212121212121
Recall/Sensitivity: 0.9711538461538461
Specificity: 0.41284403669724773
class report
             precision    recall  f1-score   support

          0       0.61      0.97      0.75       104
          1       0.94      0.41      0.57       109

avg / total       0.78      0.69      0.66       213



In [13]:
print("---Printing probabilities---")
LogisticProbabilities = (svc.predict_proba(pred_feat))
for line in LogisticProbabilities:
    print(line)
print("---End of Printing Probabilities---")

---Printing probabilities---
-0.42622966796653483
0.38851902040657005
-0.42622966796653483
1.5388492233110802
2.480484939573225
2.1786974677579733
1.7743997750503624
1.1643590362988865
0.7824906169923695
2.3380950143747516
1.2520442168713761
0.09896429294012354
-0.42622966796653483
0.008370954039282785
1.4860162364422767
1.8237100103386514
2.0923519691944343
0.6590766234522596
0.05249664131309606
1.6106605837684342
0.9227631801632807
2.9101008810468763
1.1915853141181516
0.9359791860133395
-0.018855323779982447
-0.42622966796653483
-0.42622966796653483
3.3742118734790254
1.520690356660749
2.768510808537554
3.377502510630611
2.3846975871658813
1.9762791409031726
3.9276974070434982
2.4400706975200057
1.1883209865252156
0.12449667015735
-0.7356959434930128
-0.1839178374032544
0.1590173020094625
-0.11550207225463105
1.4611731469199976
1.8982442696873694
0.4973485375676103
-0.384324206209502
1.2188327846998108
1.7573546713341073
1.236279857856056
0.17245554148819053
0.8526077142372133
-0.51

In [14]:
print(list(svc.predict(pred_feat)))

[0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1]


In [15]:
l = list(zip(LogisticProbabilities, list(svc.predict(pred_feat))))
with open('M3_videos_SVM_3.csv', 'w') as f:
    for i in range(len(l)):
        f.write(str(l[i])+'\n')

# M2 training

In [16]:
MODULE = 'm2'

FEATURE_SET = ['A3', 'A5', 'B1', 'B2', 'B10']

N_FOLD = 10
PRED_RATIO = 0.2
SCALING_PARAM = 4
# Binary or Replacement
MISSING_VALUE_STRATEGY = 'Binary'
# poly, linear, indicator, interaction_ind, pca_comp
PROCESSING_STRATEGY = 'linear'

POLY_DEGREE = 2
NORMALIZE = True

ADOS_FILE = MODULE+"/data/ados_"+MODULE+"_allData.csv"
label_id = "ASD"
label_age = "age_months"
label_gender = "male"
columns_to_delete = ["Subject.Id", "Diagnosis"]
sub_diagnosis_id = ["social_affect_calc","restricted_repetitive_calc","SA_RRI_total_calc","severity_calc"]

In [17]:
# We import the data
data = ADOS_Data.read_csv(ADOS_FILE)
sub_diagnosis = data[sub_diagnosis_id]

# We drop the columns that are not interesting for us, and the row with no label
data.select_good_columns(columns_to_delete+sub_diagnosis_id)

print('gendering')
print((data[data['ASD'] == 1][['male']]).sum())

data.full_preprocessing(NORMALIZE, MISSING_VALUE_STRATEGY, PROCESSING_STRATEGY, [label_age], label_gender, label_id)
if FEATURE_SET != []:
    data.select_good_columns(FEATURE_SET, keep_the_column=True)
    
cv_set = CVP_Set(data, data.labels, N_FOLD, PRED_RATIO)

hello!
      A1   A2  A3  A4  A5  A6  A7  A8  B1  B2 ...   D1  D2  D3  D4  E1  E2  \
0      0  1.0   2   0   1   2   0   2   2   1 ...    2   2   0   3   0   1   
1      1  0.0   1   2   1   1   0   1   2   1 ...    0   0   0   1   1   0   
2      1  2.0   2   1   2   2   2   2   2   2 ...    2   0   0   2   1   2   
3      0  1.0   1   0   1   2   0   0   2   2 ...    2   0   0   1   0   1   
4      1  0.0   1   2   2   2   0   2   2   1 ...    2   2   0   1   1   0   
5      0  1.0   2   1   2   2   0   1   2   1 ...    2   2   0   2   0   1   
6      0  1.0   1   2   1   2   2   3   2   2 ...    2   2   0   1   0   1   
7      0  2.0   2   0   0   2   3   1   2   2 ...    2   0   0   1   0   2   
8      1  1.0   2   0   0   1   1   1   2   1 ...    2   0   0   3   1   1   
9      1  2.0   2   2   2   2   1   0   2   1 ...    2   1   0   2   1   2   
10     0  0.0   2   2   0   2   1   0   2   1 ...    2   1   0   1   0   0   
11     1  1.0   1   1   0   2   1   2   2   1 ...    2   

## LDA Training 

In [18]:
ld = BinClassifier(proc=LDA(shrinkage=0.8, priors=(0.029,0.931), solver="lsqr"), severity=False)
ld.fit(cv_set.cv_feat, cv_set.cv_labels)
ld_score = [x[1] for x in ld.predict_proba(cv_set.pred_feat)]



## LR training

In [19]:
lr = BinClassifier(proc=LogisticRegression(C=0.05, penalty='l2',class_weight='balanced'), severity=False)
lr.fit(cv_set.cv_feat, cv_set.cv_labels)

## SVM training

In [20]:
svc = BinClassifier(proc = LinearSVC(penalty = 'l1', dual = False, C = .5), severity = False)
svc.fit(cv_set.cv_feat, cv_set.cv_labels)

# M2 testing

In [21]:
FILENAME = "M3_videos_LR2.csv"
columns_to_delete = ['child_id','scorer_id','video_file'
]
pred_feat = ADOS_Data.read_csv(FILENAME)
ytrue = pred_feat["ASD"]
# We drop the columns that are not interesting for us, and the row with no label
pred_feat.select_good_columns(columns_to_delete)

pred_feat.full_preprocessing(NORMALIZE, MISSING_VALUE_STRATEGY, PROCESSING_STRATEGY, [label_age], label_gender, label_id)
if FEATURE_SET != []:
    pred_feat.select_good_columns(FEATURE_SET, keep_the_column=True)

hello!
     ASD          original_file  age_months  male  unscorable   startTime  \
0      0                  MB161    5.170000     1           0  1526756954   
1      0                  MB161    5.166667     1           0  1524711363   
2      0                  MB161    5.166667     1           0  1524759595   
3      0                  MB171    2.420000     0           0  1526757822   
4      0                  MB171    2.416667     0           0  1524259778   
5      0                  MB171    2.416667     0           0  1524247314   
6      1                  MB049    3.420000     0           0  1526762265   
7      1                  MB049    3.416667     0           0  1524629728   
8      1                  MB049    3.416667     0           0  1524508356   
9      0                  MB082    2.420000     1           0  1526763384   
10     0                  MB082    2.416667     1           0  1524629282   
11     0                  MB082    2.416667     1           0  152450

                       ASD  original_file  age_months  male  unscorable  \
MB140                  NaN            6.0         NaN   NaN         NaN   
MB047                  NaN            6.0         NaN   NaN         NaN   
MB004                  NaN            6.0         NaN   NaN         NaN   
MB042                  NaN            6.0         NaN   NaN         NaN   
MB161                  NaN            6.0         NaN   NaN         NaN   
MB010                  NaN            5.0         NaN   NaN         NaN   
newyoutube_NT_female1  NaN            4.0         NaN   NaN         NaN   
newyoutube_ASD_male1   NaN            4.0         NaN   NaN         NaN   
newyoutube_NT_female3  NaN            4.0         NaN   NaN         NaN   
newyoutube_NT_male1    NaN            4.0         NaN   NaN         NaN   
newyoutube_ASD_male2   NaN            4.0         NaN   NaN         NaN   
newyoutube_NT_female4  NaN            4.0         NaN   NaN         NaN   
newyoutube_ASD_male3   Na

  result = result.union(other)
  result = result.union(other)
  index = _union_indexes(indexes)
  index = _union_indexes(indexes)


## Logistic Regression

In [22]:
test_func(list(lr.predict(pred_feat)), ytrue)

Confusion matrix:
[[88 16]
 [42 67]]
Precision: 0.676923076923077
Recall/Sensitivity: 0.8461538461538461
Specificity: 0.6146788990825688
class report
             precision    recall  f1-score   support

          0       0.68      0.85      0.75       104
          1       0.81      0.61      0.70       109

avg / total       0.74      0.73      0.72       213



In [23]:
print("---Printing probabilities---")
LogisticProbabilities = (lr.predict_proba(pred_feat))
for line in LogisticProbabilities:
    print(line)
print("---End of Printing Probabilities---")

---Printing probabilities---
[0.86087581 0.13912419]
[0.75458106 0.24541894]
[0.86087581 0.13912419]
[0.03915991 0.96084009]
[0.04666105 0.95333895]
[0.04977209 0.95022791]
[0.053079 0.946921]
[0.15712655 0.84287345]
[0.43659334 0.56340666]
[0.02576077 0.97423923]
[0.34377529 0.65622471]
[0.59930399 0.40069601]
[0.86087581 0.13912419]
[0.68530299 0.31469701]
[0.28196619 0.71803381]
[0.49129284 0.50870716]
[0.21162422 0.78837578]
[0.51321498 0.48678502]
[0.34036728 0.65963272]
[0.21347909 0.78652091]
[0.22058535 0.77941465]
[0.03819358 0.96180642]
[0.27578453 0.72421547]
[0.43659334 0.56340666]
[0.8134962 0.1865038]
[0.86087581 0.13912419]
[0.86087581 0.13912419]
[0.02722971 0.97277029]
[0.26459911 0.73540089]
[0.20732036 0.79267964]
[0.02576077 0.97423923]
[0.11817765 0.88182235]
[0.28196619 0.71803381]
[0.03801098 0.96198902]
[0.15566441 0.84433559]
[0.35074287 0.64925713]
[0.36523573 0.63476427]
[0.86087581 0.13912419]
[0.86087581 0.13912419]
[0.53660355 0.46339645]
[0.69088114 0.309

In [24]:
print(list(lr.predict(pred_feat)))

[0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1]


In [25]:
l = list(zip(LogisticProbabilities, list(lr.predict(pred_feat))))
with open('M3_videos_LR_L2_2.csv', 'w') as f:
    for i in range(len(l)):
        f.write(str(l[i])+'\n')

## SVM

In [26]:
test_func(list(svc.predict(pred_feat)), ytrue)

Confusion matrix:
[[104   0]
 [109   0]]
Precision: 0.48826291079812206
Recall/Sensitivity: 1.0
Specificity: 0.0
class report
             precision    recall  f1-score   support

          0       0.49      1.00      0.66       104
          1       0.00      0.00      0.00       109

avg / total       0.24      0.49      0.32       213



  'precision', 'predicted', average, warn_for)


In [27]:
print("---Printing probabilities---")
LogisticProbabilities = (svc.predict_proba(pred_feat))
for line in LogisticProbabilities:
    print(line)
print("---End of Printing Probabilities---")

---Printing probabilities---
0.10749858699153973
0.3267972124011773
0.10749858699153973
1.8082417498689944
1.8875514847257537
1.7782863087914633
1.6690211328571731
1.359487125907573
0.8658831166376271
1.998333275168152
1.0297908468260657
0.7008429087116091
0.10749858699153973
0.5361868375561196
1.2289072675297898
0.7911941215606433
1.293823026235156
0.810492221416428
1.000967883486114
1.1948310547520835
1.1405726372684637
1.863248508319665
1.1841737135303372
0.8658831166376271
0.21714789969635853
0.10749858699153973
0.10749858699153973
1.9728978210244839
1.1944469179815551
1.3488297846858266
1.998333275168152
1.612861964094917
1.2289072675297898
1.778670445561992
1.4584790973906454
1.0745244008255184
0.8655189124411045
0.10749858699153973
0.10749858699153973
0.6462202870314668
0.48118007910544897
0.9648750881206996
1.2399487455220646
0.9201415341212468
0.27215465814702927
0.840447662493959
1.0297908468260657
0.9751482925719175
0.43644652510599613
0.8651347756705761
0.10749858699153973


In [28]:
print(list(svc.predict(pred_feat)))

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [29]:
l = list(zip(LogisticProbabilities, list(svc.predict(pred_feat))))
with open('M3_videos_SVM_2.csv', 'w') as f:
    for i in range(len(l)):
        f.write(str(l[i])+'\n')

## LDA

In [None]:
test_func(list(ld.predict(pred_feat)), ytrue)

In [None]:
print("---Printing probabilities---")
LogisticProbabilities = (ld.predict_proba(pred_feat))
for line in LogisticProbabilities:
    print(line)
print("---End of Printing Probabilities---")

In [None]:
print(list(ld.predict(pred_feat)))