In [1]:
import numpy as np
import math
import pandas as pd
from pandas import crosstab
import statsmodels.api as sm
import scipy.stats as stats
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import roc_curve, roc_auc_score, auc, classification_report, confusion_matrix, accuracy_score, f1_score, recall_score, precision_score, precision_recall_curve
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, cross_val_score, KFold, StratifiedKFold, cross_validate, train_test_split
from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier, OutputCodeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from xgboost.sklearn import XGBClassifier
import xgboost as xgb
from collections import Counter
from imblearn.over_sampling import SMOTE

pd.set_option('display.max_columns', 60)
pd.set_option('display.max_rows', 70)

Using TensorFlow backend.


In [2]:
df = pd.read_csv(r'C:\Users\1130j\Desktop\Yale\Data\all_recent_tweets_text_score_8_parsed_noDup.csv')

In [3]:
df = df.loc[df['classname'] != 'awareness']
df.loc[df['classname']=='None', 'class'] = 0
df.loc[df['classname']=='pain', 'class'] = 1
df.loc[df['classname'].isin(['pleasure', 'addiction']), 'class'] = 2

### Group Pleasure and Addiction together

In [4]:
crosstab(df['class'], df['classname'])

classname,None,addiction,pain,pleasure
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,166,0,0,0
1,0,0,229,0
2,0,197,0,572


In [14]:
corr(df[X], df[Y])

NameError: name 'corr' is not defined

### Define X and Y

In [5]:
X = df[df.columns.difference(['screen_name', 'user_id', 'classname', 'class'])]
y = df['class']

### Oversampling

In [6]:
print('Original dataset shape %s' % Counter(y))

Original dataset shape Counter({2: 769, 1: 229, 0: 166})


In [7]:
oversampling = SMOTE(random_state=0)
X_res, y_res = oversampling.fit_resample(X, y)

In [8]:
print('Resampled dataset shape %s' % Counter(y_res))

Resampled dataset shape Counter({2: 769, 1: 769, 0: 769})


### Train Test Split

In [9]:
train, test = train_test_split(pd.concat([X_res, y_res], axis=1), test_size=0.3, random_state=0, stratify=y_res)

In [10]:
X_train = train[train.columns.difference(['screen_name', 'user_id', 'classname', 'class'])]
y_train = train['class']
X_test = test[test.columns.difference(['screen_name', 'user_id', 'classname', 'class'])]
y_test = test['class']

In [11]:
y_train.value_counts()

2    538
1    538
0    538
Name: class, dtype: int64

In [12]:
y_test.value_counts()

2    231
1    231
0    231
Name: class, dtype: int64

### Baseline Model

In [13]:
lr = LogisticRegression(solver='newton-cg', random_state=0)
svc = SVC(random_state=0)
rf = RandomForestClassifier(random_state=0)
xgb = XGBClassifier(objective='multi:softmax', num_class=4, random_state=0)
nb = GaussianNB()

onevsone_lr = OneVsOneClassifier(lr).fit(X_train, y_train)
onevsone_svc = OneVsOneClassifier(svc).fit(X_train, y_train)
onevsone_rf = OneVsOneClassifier(rf).fit(X_train, y_train)
# onevsone_xgb = OneVsOneClassifier(xgb).fit(X_train, y_train)
onevsone_nb = OneVsOneClassifier(nb).fit(X_train, y_train)

onevsrest_lr = OneVsRestClassifier(lr).fit(X_train, y_train)
onevsrest_svc = OneVsRestClassifier(svc).fit(X_train, y_train)
onevsrest_rf = OneVsRestClassifier(rf).fit(X_train, y_train)
onevsrest_nb = OneVsRestClassifier(nb).fit(X_train, y_train)

In [22]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.6270186335403726.
ovo_lr: Test f1 micro is 0.6349206349206349
ovo_svc: Cross validation f1 micro is 0.7973698336017175.
ovo_svc: Test f1 micro is 0.7792207792207793
ovo_rf: Cross validation f1 micro is 0.846330802852542.
ovo_rf: Test f1 micro is 0.8225108225108225
ovo_nb: Cross validation f1 micro is 0.5749789126600721.
ovo_nb: Test f1 micro is 0.5440115440115441
ovr_lr: Cross validation f1 micro is 0.6331838049229355.
ovr_lr: Test f1 micro is 0.6219336219336219
ovr_svc: Cross validation f1 micro is 0.786837665823173.
ovr_svc: Test f1 micro is 0.7907647907647908
ovr_rf: Cross validation f1 micro is 0.8605896787056206.
ovr_rf: Test f1 micro is 0.8311688311688312
ovr_nb: Cross validation f1 micro is 0.5594778007821486.
ovr_nb: Test f1 micro is 0.5281385281385281
xgb: Cross validation f1 micro is 0.7831569664902999.
xgb: Test f1 micro is 0.759018759018759


In [26]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.87      0.87      0.87       231
           1       0.83      0.85      0.84       231
           2       0.78      0.77      0.77       231

    accuracy                           0.83       693
   macro avg       0.83      0.83      0.83       693
weighted avg       0.83      0.83      0.83       693

micro avg                              0.83


### BIC 6

In [27]:
BIC_features = ['netspeak_focus', 'money_oriented', 'workhorse', 'active', 'power_driven', 'health_oriented']

X_train = train[BIC_features]
X_test = test[BIC_features]

In [28]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5619507706464228.
ovo_lr: Test f1 micro is 0.5526695526695526
ovo_svc: Cross validation f1 micro is 0.6455869948623573.
ovo_svc: Test f1 micro is 0.6248196248196248
ovo_rf: Cross validation f1 micro is 0.7317230273752013.
ovo_rf: Test f1 micro is 0.7113997113997114
ovo_nb: Cross validation f1 micro is 0.5495820872632466.
ovo_nb: Test f1 micro is 0.5108225108225108
ovr_lr: Cross validation f1 micro is 0.5619431025228127.
ovr_lr: Test f1 micro is 0.5497835497835498
ovr_svc: Cross validation f1 micro is 0.6598343685300208.
ovr_svc: Test f1 micro is 0.6233766233766234
ovr_rf: Cross validation f1 micro is 0.7316923548807607.
ovr_rf: Test f1 micro is 0.7085137085137087
ovr_nb: Cross validation f1 micro is 0.5446361475346982.
ovr_nb: Test f1 micro is 0.5021645021645021
xgb: Cross validation f1 micro is 0.6635457403573346.
xgb: Test f1 micro is 0.6392496392496393


In [29]:
mod = onevsone_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.73      0.71      0.72       231
           1       0.75      0.76      0.76       231
           2       0.66      0.67      0.66       231

    accuracy                           0.71       693
   macro avg       0.71      0.71      0.71       693
weighted avg       0.71      0.71      0.71       693

micro avg                              0.71


### BIC 10

In [30]:
BIC_features = ['netspeak_focus', 'money_oriented', 'workhorse', 'active', 'power_driven',
                'health_oriented', 'adjustment', 'reward_bias', 'cheerful', 'leisure_oriented']

X_train = train[BIC_features]
X_test = test[BIC_features]

In [31]:
# import math
# print('odds ratio    {}'.format(math.exp()) )
# print('odds ratio    {}'.format(math.exp())  )
# print('odds ratio    {}'.format(math.exp()) )
# print('odds ratio    {}'.format(math.exp()) )
# print('odds ratio    {}'.format(math.exp())  )

In [32]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5799210183268155.
ovo_lr: Test f1 micro is 0.5901875901875901
ovo_svc: Cross validation f1 micro is 0.6555018786902845.
ovo_svc: Test f1 micro is 0.670995670995671
ovo_rf: Cross validation f1 micro is 0.7756728778467908.
ovo_rf: Test f1 micro is 0.7474747474747475
ovo_nb: Cross validation f1 micro is 0.5266697339161107.
ovo_nb: Test f1 micro is 0.5238095238095238




ovr_lr: Cross validation f1 micro is 0.5861091940802086.
ovr_lr: Test f1 micro is 0.5959595959595959
ovr_svc: Cross validation f1 micro is 0.6610727704930602.
ovr_svc: Test f1 micro is 0.6695526695526696
ovr_rf: Cross validation f1 micro is 0.7868223295759528.
ovr_rf: Test f1 micro is 0.7503607503607503
ovr_nb: Cross validation f1 micro is 0.5372134038800706.
ovr_nb: Test f1 micro is 0.5411255411255411
xgb: Cross validation f1 micro is 0.6883214477417376.
xgb: Test f1 micro is 0.6738816738816739


In [33]:
mod = onevsone_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.78      0.78      0.78       231
           1       0.79      0.75      0.77       231
           2       0.67      0.71      0.69       231

    accuracy                           0.75       693
   macro avg       0.75      0.75      0.75       693
weighted avg       0.75      0.75      0.75       693

micro avg                              0.75


### BIC 15

In [34]:
BIC_features = ['netspeak_focus', 'money_oriented', 'workhorse', 'active', 'power_driven',
                'health_oriented', 'adjustment', 'reward_bias', 'cheerful', 'leisure_oriented',
                'thinking_style', 'aggressive', 'organized', 'self_assured', 'religion_oriented']

X_train = train[BIC_features]
X_test = test[BIC_features]

In [35]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5941837282416993.
ovo_lr: Test f1 micro is 0.5930735930735931
ovo_svc: Cross validation f1 micro is 0.7137451115711986.
ovo_svc: Test f1 micro is 0.7113997113997114
ovo_rf: Cross validation f1 micro is 0.8092094164557933.
ovo_rf: Test f1 micro is 0.7777777777777778
ovo_nb: Cross validation f1 micro is 0.5626255655241162.
ovo_nb: Test f1 micro is 0.5483405483405484
ovr_lr: Cross validation f1 micro is 0.5811785905988804.
ovr_lr: Test f1 micro is 0.5829725829725829
ovr_svc: Cross validation f1 micro is 0.716237251744498.
ovr_svc: Test f1 micro is 0.7113997113997114
ovr_rf: Cross validation f1 micro is 0.8271374894563301.
ovr_rf: Test f1 micro is 0.7994227994227994
ovr_nb: Cross validation f1 micro is 0.5533356337704163.
ovr_nb: Test f1 micro is 0.5353535353535354
xgb: Cross validation f1 micro is 0.7094547964113181.
xgb: Test f1 micro is 0.6897546897546898


In [36]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.82      0.83      0.83       231
           1       0.84      0.81      0.82       231
           2       0.74      0.76      0.75       231

    accuracy                           0.80       693
   macro avg       0.80      0.80      0.80       693
weighted avg       0.80      0.80      0.80       693

micro avg                              0.8


### BIC 20

In [37]:
BIC_features = ['netspeak_focus', 'money_oriented', 'workhorse', 'active', 'power_driven',
                'health_oriented', 'adjustment', 'reward_bias', 'cheerful', 'leisure_oriented',
                'thinking_style', 'aggressive', 'organized', 'self_assured', 'religion_oriented',
                'cautious', 'impulsive', 'disciplined', 'type_a', 'liberal']

X_train = train[BIC_features]
X_test = test[BIC_features]

In [38]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5966451959205583.
ovo_lr: Test f1 micro is 0.5930735930735931
ovo_svc: Cross validation f1 micro is 0.7527605244996549.
ovo_svc: Test f1 micro is 0.7344877344877345
ovo_rf: Cross validation f1 micro is 0.8178437236408251.
ovo_rf: Test f1 micro is 0.7835497835497836
ovo_nb: Cross validation f1 micro is 0.563216011042098.
ovo_nb: Test f1 micro is 0.5454545454545454
ovr_lr: Cross validation f1 micro is 0.5960125757227207.
ovr_lr: Test f1 micro is 0.5858585858585859
ovr_svc: Cross validation f1 micro is 0.7385323211410167.
ovr_svc: Test f1 micro is 0.7359307359307359
ovr_rf: Cross validation f1 micro is 0.8277662755923625.
ovr_rf: Test f1 micro is 0.8037518037518038
ovr_nb: Cross validation f1 micro is 0.560746875239629.
ovr_nb: Test f1 micro is 0.5252525252525253
xgb: Cross validation f1 micro is 0.7286442757457251.
xgb: Test f1 micro is 0.7215007215007215


In [39]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.85      0.83      0.84       231
           1       0.82      0.83      0.83       231
           2       0.74      0.76      0.75       231

    accuracy                           0.80       693
   macro avg       0.80      0.80      0.80       693
weighted avg       0.80      0.80      0.80       693

micro avg                              0.8


### BIC 25

In [40]:
BIC_features = ['netspeak_focus', 'money_oriented', 'workhorse', 'active', 'power_driven',
                'health_oriented', 'adjustment', 'reward_bias', 'cheerful', 'leisure_oriented',
                'thinking_style', 'aggressive', 'organized', 'self_assured', 'religion_oriented',
                'cautious', 'impulsive', 'disciplined', 'type_a', 'liberal',
                'melancholy', 'neuroticism', 'sexual_focus', 'food_focus', 'cold']

X_train = train[BIC_features]
X_test = test[BIC_features]

In [41]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.6022237558469443.
ovo_lr: Test f1 micro is 0.6204906204906205
ovo_svc: Cross validation f1 micro is 0.7769266160570508.
ovo_svc: Test f1 micro is 0.7604617604617605
ovo_rf: Cross validation f1 micro is 0.8209071390230811.
ovo_rf: Test f1 micro is 0.8095238095238095
ovo_nb: Cross validation f1 micro is 0.568786902844874.
ovo_nb: Test f1 micro is 0.5339105339105339
ovr_lr: Cross validation f1 micro is 0.6009853538839046.
ovr_lr: Test f1 micro is 0.6190476190476191
ovr_svc: Cross validation f1 micro is 0.7676520205505712.
ovr_svc: Test f1 micro is 0.7691197691197691
ovr_rf: Cross validation f1 micro is 0.84447895100069.
ovr_rf: Test f1 micro is 0.8210678210678211
ovr_nb: Cross validation f1 micro is 0.5694003527336859.
ovr_nb: Test f1 micro is 0.5093795093795094
xgb: Cross validation f1 micro is 0.7360478490913274.
xgb: Test f1 micro is 0.7229437229437228


In [42]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.85      0.86      0.86       231
           1       0.84      0.83      0.84       231
           2       0.77      0.77      0.77       231

    accuracy                           0.82       693
   macro avg       0.82      0.82      0.82       693
weighted avg       0.82      0.82      0.82       693

micro avg                              0.82


### BIC 30

In [43]:
BIC_features = ['netspeak_focus', 'money_oriented', 'workhorse', 'active', 'power_driven',
                'health_oriented', 'adjustment', 'reward_bias', 'cheerful', 'leisure_oriented',
                'thinking_style', 'aggressive', 'organized', 'self_assured', 'religion_oriented',
                'cautious', 'impulsive', 'disciplined', 'type_a', 'liberal',
                'melancholy', 'neuroticism', 'sexual_focus', 'food_focus', 'cold',
                'friend_focus', 'agreeableness', 'generous', 'genuine', 'self_conscious']

X_train = train[BIC_features]
X_test = test[BIC_features]

In [44]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.6232995935894486.
ovo_lr: Test f1 micro is 0.6204906204906205
ovo_svc: Cross validation f1 micro is 0.7855954297983283.
ovo_svc: Test f1 micro is 0.7748917748917749
ovo_rf: Cross validation f1 micro is 0.8227666589985431.
ovo_rf: Test f1 micro is 0.8225108225108225
ovo_nb: Cross validation f1 micro is 0.5626217314623111.
ovo_nb: Test f1 micro is 0.5468975468975469
ovr_lr: Cross validation f1 micro is 0.6214209033049612.
ovr_lr: Test f1 micro is 0.6291486291486291
ovr_svc: Cross validation f1 micro is 0.7738171919331339.
ovr_svc: Test f1 micro is 0.7705627705627706
ovr_rf: Cross validation f1 micro is 0.8388812207652787.
ovr_rf: Test f1 micro is 0.8311688311688312
ovr_nb: Cross validation f1 micro is 0.5588835212023618.
ovr_nb: Test f1 micro is 0.5310245310245311
xgb: Cross validation f1 micro is 0.7540104286481097.
xgb: Test f1 micro is 0.7518037518037518


In [45]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.88      0.87      0.87       231
           1       0.83      0.84      0.83       231
           2       0.78      0.79      0.79       231

    accuracy                           0.83       693
   macro avg       0.83      0.83      0.83       693
weighted avg       0.83      0.83      0.83       693

micro avg                              0.83


### BIC 35

In [46]:
BIC_features = ['netspeak_focus', 'money_oriented', 'workhorse', 'active', 'power_driven',
                'health_oriented', 'adjustment', 'reward_bias', 'cheerful', 'leisure_oriented',
                'thinking_style', 'aggressive', 'organized', 'self_assured', 'religion_oriented',
                'cautious', 'impulsive', 'disciplined', 'type_a', 'liberal',
                'melancholy', 'neuroticism', 'sexual_focus', 'food_focus', 'cold',
                'friend_focus', 'agreeableness', 'generous', 'genuine', 'self_conscious',
                'emotionally_aware', 'artistic', 'ambitious', 'adventurous', 'anxious']

X_train = train[BIC_features]
X_test = test[BIC_features]

In [47]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.629495437466452.
ovo_lr: Test f1 micro is 0.6507936507936508
ovo_svc: Cross validation f1 micro is 0.7992331876389847.
ovo_svc: Test f1 micro is 0.7878787878787878
ovo_rf: Cross validation f1 micro is 0.833317997086113.
ovo_rf: Test f1 micro is 0.8023088023088023
ovo_nb: Cross validation f1 micro is 0.5632236791657081.
ovo_nb: Test f1 micro is 0.5454545454545454
ovr_lr: Cross validation f1 micro is 0.6325818572195383.
ovr_lr: Test f1 micro is 0.6493506493506493
ovr_svc: Cross validation f1 micro is 0.7849551414768806.
ovr_svc: Test f1 micro is 0.7922077922077922
ovr_rf: Cross validation f1 micro is 0.8500498428034661.
ovr_rf: Test f1 micro is 0.834054834054834
ovr_nb: Cross validation f1 micro is 0.5619967793880838.
ovr_nb: Test f1 micro is 0.5339105339105339
xgb: Cross validation f1 micro is 0.7608427267847557.
xgb: Test f1 micro is 0.7518037518037518


In [48]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.88      0.87      0.88       231
           1       0.85      0.84      0.84       231
           2       0.78      0.79      0.78       231

    accuracy                           0.83       693
   macro avg       0.83      0.83      0.83       693
weighted avg       0.83      0.83      0.83       693

micro avg                              0.83


### BIC 40

In [49]:
BIC_features = ['netspeak_focus', 'money_oriented', 'workhorse', 'active', 'power_driven',
                'health_oriented', 'adjustment', 'reward_bias', 'cheerful', 'leisure_oriented',
                'thinking_style', 'aggressive', 'organized', 'self_assured', 'religion_oriented',
                'cautious', 'impulsive', 'disciplined', 'type_a', 'liberal',
                'melancholy', 'neuroticism', 'sexual_focus', 'food_focus', 'cold',
                'friend_focus', 'agreeableness', 'generous', 'genuine', 'self_conscious',
                'emotionally_aware', 'artistic', 'ambitious', 'adventurous', 'anxious',
                'stressed', 'work_oriented', 'depression', 'family_oriented', 'insecure']

X_train = train[BIC_features]
X_test = test[BIC_features]

In [50]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.6424852388620504.
ovo_lr: Test f1 micro is 0.6421356421356421
ovo_svc: Cross validation f1 micro is 0.796733379342075.
ovo_svc: Test f1 micro is 0.7777777777777778
ovo_rf: Cross validation f1 micro is 0.8358101372594126.
ovo_rf: Test f1 micro is 0.8124098124098124
ovo_nb: Cross validation f1 micro is 0.5588796871405568.
ovo_nb: Test f1 micro is 0.5468975468975469
ovr_lr: Cross validation f1 micro is 0.6313434552564987.
ovr_lr: Test f1 micro is 0.6378066378066378
ovr_svc: Cross validation f1 micro is 0.7861935434399202.
ovr_svc: Test f1 micro is 0.772005772005772
ovr_rf: Cross validation f1 micro is 0.8463154666053218.
ovr_rf: Test f1 micro is 0.8383838383838383
ovr_nb: Cross validation f1 micro is 0.5601180891035964.
ovr_nb: Test f1 micro is 0.5483405483405484
xgb: Cross validation f1 micro is 0.7651675485008819.
xgb: Test f1 micro is 0.7518037518037518


In [51]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))
print('macro avg                              {}'.format(round(f1_score(y_test, test_pred, average='macro'),2)))

              precision    recall  f1-score   support

           0       0.88      0.89      0.88       231
           1       0.84      0.84      0.84       231
           2       0.80      0.79      0.79       231

    accuracy                           0.84       693
   macro avg       0.84      0.84      0.84       693
weighted avg       0.84      0.84      0.84       693

micro avg                              0.84
macro avg                              0.84


#### Odds Ratio for BIC 40

In [None]:
# Coefficients:
#      (Intercept) netspeak_focus money_oriented    workhorse       active
#    1   -0.728999   -0.023476272    0.010378129 -0.009522501 -0.015349323
#    2    2.128925   -0.006137466   -0.003026801 -0.031658883  0.003269262
#      power_driven health_oriented  adjustment  reward_bias     cheerful
#    1  -0.02553222     0.016059395 -0.04908470 -0.006461065  0.008260045
#    2  -0.01451243     0.008371385 -0.01654196  0.006186548 -0.010192141
#      leisure_oriented thinking_style  aggressive    organized self_assured
#    1      -0.01137100   -0.001625862 -0.02968211 -0.008331101  0.006260128
#    2      -0.01194271    0.014451886 -0.03226892 -0.016988143  0.017833144
#      religion_oriented      cautious   impulsive  disciplined     type_a
#    1      -0.004719442  0.0127061385 0.027234667  0.003919399 0.01376712
#    2       0.006997811 -0.0005605821 0.003561902 -0.007219051 0.02124003
#          liberal melancholy neuroticism sexual_focus  food_focus       cold
#    1 -0.01522392 0.02295983 -0.04673314   0.01817473 0.004430683 0.02393643
#    2 -0.02271685 0.02128984 -0.04686367   0.01612098 0.007613981 0.01435010
#      friend_focus agreeableness     generous    genuine self_conscious
#    1  0.004076875  -0.001436249 -0.002730232 0.01069174     0.01880966
#    2  0.008815973  -0.023137742  0.019466417 0.02244011     0.01946617
#      emotionally_aware   artistic   ambitious  adventurous      anxious
#    1      -0.013205394 0.01079969 0.008774397 -0.008834482  0.003593158
#    2      -0.007231397 0.01276577 0.017758021 -0.016618487 -0.005953701
#         stressed work_oriented  depression family_oriented    insecure
#    1 0.002260092   0.001703292 0.014276417     0.009998929 -0.01661206
#    2 0.011207941  -0.004048210 0.001663619     0.006171349 -0.01342986

In [128]:
coefficients_pain_vs_none = [-0.023476272, 0.010378129, -0.009522501, -0.015349323,
                            -0.02553222, 0.016059395, -0.04908470, -0.006461065, 0.008260045,
                            -0.01137100, -0.001625862, -0.02968211, -0.008331101, 0.006260128,
                            -0.004719442, 0.0127061385, 0.027234667, 0.003919399, 0.01376712,
                            -0.01522392, 0.02295983, -0.04673314, 0.01817473, 0.004430683, 0.02393643,
                             0.004076875, -0.001436249, -0.002730232, 0.01069174, 0.01880966,
                            -0.013205394, 0.01079969, 0.008774397, -0.008834482, 0.003593158,
                            0.002260092, 0.001703292, 0.014276417, 0.009998929, -0.01661206]

coefficients_PleasureAddiction_vs_none = [-0.006137466, -0.003026801, -0.031658883, 0.003269262,
                                         -0.01451243, 0.008371385, -0.01654196, 0.006186548, -0.010192141,
                                         -0.01194271, 0.014451886, -0.03226892, -0.016988143, 0.017833144,
                                         0.006997811, -0.0005605821, 0.003561902, -0.007219051, 0.02124003,
                                         -0.02271685, 0.02128984, -0.04686367, 0.01612098, 0.007613981, 0.01435010,
                                         0.008815973, -0.023137742, 0.019466417, 0.02244011, 0.01946617,
                                          -0.007231397, 0.01276577, 0.017758021, -0.016618487, -0.005953701,
                                         0.011207941, -0.004048210, 0.001663619, 0.006171349, -0.01342986]

Feature_nm = ['netspeak_focus', 'money_oriented', 'workhorse', 'active',
             'power_driven', 'health_oriented', 'adjustment', 'reward_bias', 'cheerful',
             'leisure_oriented', 'thinking_style', 'aggressive', 'organized', 'self_assured',
              'religion_oriented', 'cautious', 'impulsive', 'disciplined', 'type_a',
             'liberal', 'melancholy', 'neuroticism', 'sexual_focus', 'food_focus', 'cold',
             'friend_focus', 'agreeableness', 'generous', 'genuine', 'self_conscious',
              'emotionally_aware', 'artistic', 'ambitious', 'adventurous', 'anxious',
             'stressed', 'work_oriented', 'depression', 'family_oriented', 'insecure']

In [129]:
print('Odds Ratio')
for coef_pain, coef_pleasreaddiction, feature_name in zip(coefficients_pain_vs_none,
                                                          coefficients_PleasureAddiction_vs_none,
                                                          Feature_nm):
    print(feature_name.upper())
    print('ref: None')
    print('     Pain {}'.format(round(math.exp(coef_pain), 3)))
    print('     Pleasure & Addiction {}'.format(round(math.exp(coef_pleasreaddiction), 3)))

Odds Ratio
NETSPEAK_FOCUS
ref: None
     Pain 0.977
     Pleasure & Addiction 0.994
MONEY_ORIENTED
ref: None
     Pain 1.01
     Pleasure & Addiction 0.997
WORKHORSE
ref: None
     Pain 0.991
     Pleasure & Addiction 0.969
ACTIVE
ref: None
     Pain 0.985
     Pleasure & Addiction 1.003
POWER_DRIVEN
ref: None
     Pain 0.975
     Pleasure & Addiction 0.986
HEALTH_ORIENTED
ref: None
     Pain 1.016
     Pleasure & Addiction 1.008
ADJUSTMENT
ref: None
     Pain 0.952
     Pleasure & Addiction 0.984
REWARD_BIAS
ref: None
     Pain 0.994
     Pleasure & Addiction 1.006
CHEERFUL
ref: None
     Pain 1.008
     Pleasure & Addiction 0.99
LEISURE_ORIENTED
ref: None
     Pain 0.989
     Pleasure & Addiction 0.988
THINKING_STYLE
ref: None
     Pain 0.998
     Pleasure & Addiction 1.015
AGGRESSIVE
ref: None
     Pain 0.971
     Pleasure & Addiction 0.968
ORGANIZED
ref: None
     Pain 0.992
     Pleasure & Addiction 0.983
SELF_ASSURED
ref: None
     Pain 1.006
     Pleasure & Addiction 1.018
RELIG

### BIC 45

In [52]:
BIC_features = ['netspeak_focus', 'money_oriented', 'workhorse', 'active', 'power_driven',
                'health_oriented', 'adjustment', 'reward_bias', 'cheerful', 'leisure_oriented',
                'thinking_style', 'aggressive', 'organized', 'self_assured', 'religion_oriented',
                'cautious', 'impulsive', 'disciplined', 'type_a', 'liberal',
                'melancholy', 'neuroticism', 'sexual_focus', 'food_focus', 'cold',
                'friend_focus', 'agreeableness', 'generous', 'genuine', 'self_conscious',
                'emotionally_aware', 'artistic', 'ambitious', 'adventurous', 'anxious',
                'stressed', 'work_oriented', 'depression', 'family_oriented', 'insecure',
                'happiness', 'conscientiousness', 'dutiful', 'persuasive', 'cooperative']

X_train = train[BIC_features]
X_test = test[BIC_features]

In [53]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.6307031669350509.
ovo_lr: Test f1 micro is 0.6305916305916306
ovo_svc: Cross validation f1 micro is 0.794873859366613.
ovo_svc: Test f1 micro is 0.7705627705627706
ovo_rf: Cross validation f1 micro is 0.8358063031976076.
ovo_rf: Test f1 micro is 0.8225108225108225
ovo_nb: Cross validation f1 micro is 0.5675638371290546.
ovo_nb: Test f1 micro is 0.5411255411255411
ovr_lr: Cross validation f1 micro is 0.6356376044781842.
ovr_lr: Test f1 micro is 0.6363636363636364
ovr_svc: Cross validation f1 micro is 0.7905337014032666.
ovr_svc: Test f1 micro is 0.7777777777777778
ovr_rf: Cross validation f1 micro is 0.851909362778928.
ovr_rf: Test f1 micro is 0.8326118326118326
ovr_nb: Cross validation f1 micro is 0.5663024307951844.
ovr_nb: Test f1 micro is 0.5180375180375181
xgb: Cross validation f1 micro is 0.7670539069089793.
xgb: Test f1 micro is 0.7518037518037518


In [54]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.88      0.87      0.87       231
           1       0.85      0.84      0.84       231
           2       0.78      0.79      0.78       231

    accuracy                           0.83       693
   macro avg       0.83      0.83      0.83       693
weighted avg       0.83      0.83      0.83       693

micro avg                              0.83


### BIC 50

In [55]:
BIC_features = ['netspeak_focus', 'money_oriented', 'workhorse', 'active', 'power_driven',
                'health_oriented', 'adjustment', 'reward_bias', 'cheerful', 'leisure_oriented',
                'thinking_style', 'aggressive', 'organized', 'self_assured', 'religion_oriented',
                'cautious', 'impulsive', 'disciplined', 'type_a', 'liberal',
                'melancholy', 'neuroticism', 'sexual_focus', 'food_focus', 'cold',
                'friend_focus', 'agreeableness', 'generous', 'genuine', 'self_conscious',
                'emotionally_aware', 'artistic', 'ambitious', 'adventurous', 'anxious',
                'stressed', 'work_oriented', 'depression', 'family_oriented', 'insecure',
                'happiness', 'conscientiousness', 'dutiful', 'persuasive', 'cooperative',
                'intellectual', 'humble', 'assertive', 'body_focus', 'friendly']

X_train = train[BIC_features]
X_test = test[BIC_features]

In [56]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.6331953071083507.
ovo_lr: Test f1 micro is 0.6392496392496393
ovo_svc: Cross validation f1 micro is 0.7986120696265624.
ovo_svc: Test f1 micro is 0.7734487734487735
ovo_rf: Cross validation f1 micro is 0.8419944789510007.
ovo_rf: Test f1 micro is 0.8268398268398268
ovo_nb: Cross validation f1 micro is 0.5694041867954911.
ovo_nb: Test f1 micro is 0.5367965367965368
ovr_lr: Cross validation f1 micro is 0.6325626869105131.
ovr_lr: Test f1 micro is 0.6378066378066378
ovr_svc: Cross validation f1 micro is 0.7899317536998697.
ovr_svc: Test f1 micro is 0.7835497835497836
ovr_rf: Cross validation f1 micro is 0.8519323671497585.
ovr_rf: Test f1 micro is 0.8412698412698413
ovr_nb: Cross validation f1 micro is 0.5607238708687985.
ovr_nb: Test f1 micro is 0.5223665223665224
xgb: Cross validation f1 micro is 0.7732574189095928.
xgb: Test f1 micro is 0.7575757575757576


In [57]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.88      0.88      0.88       231
           1       0.85      0.87      0.86       231
           2       0.80      0.77      0.79       231

    accuracy                           0.84       693
   macro avg       0.84      0.84      0.84       693
weighted avg       0.84      0.84      0.84       693

micro avg                              0.84


### Model with Backward Feaure Selection using all 29 AIC features

In [58]:
AIC_features = ['netspeak_focus', 'liberal', 'self_assured', 'organized', 'type_a',
                'cheerful', 'disciplined', 'neuroticism', 'aggressive', 'adjustment',
                'religion_oriented', 'food_focus', 'generous', 'sexual_focus', 'thinking_style',
                'agreeableness', 'money_oriented', 'melancholy', 'workhorse', 'reward_bias',
                'self_conscious', 'leisure_oriented', 'impulsive', 'cautious', 'active',
                'power_driven', 'genuine', 'cold', 'health_oriented']

X_train = train[AIC_features]
X_test = test[AIC_features]

In [59]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.6251629476267156.
ovo_lr: Test f1 micro is 0.6204906204906205
ovo_svc: Cross validation f1 micro is 0.7843685300207038.
ovo_svc: Test f1 micro is 0.7705627705627706
ovo_rf: Cross validation f1 micro is 0.8227819952457633.
ovo_rf: Test f1 micro is 0.8066378066378066
ovo_nb: Cross validation f1 micro is 0.5619967793880838.
ovo_nb: Test f1 micro is 0.5425685425685426
ovr_lr: Cross validation f1 micro is 0.6232765892186182.
ovr_lr: Test f1 micro is 0.6305916305916306
ovr_svc: Cross validation f1 micro is 0.7725864580937044.
ovr_svc: Test f1 micro is 0.7792207792207793
ovr_rf: Cross validation f1 micro is 0.8463154666053218.
ovr_rf: Test f1 micro is 0.8326118326118326
ovr_nb: Cross validation f1 micro is 0.5588835212023617.
ovr_nb: Test f1 micro is 0.5281385281385281
xgb: Cross validation f1 micro is 0.7533893106356875.
xgb: Test f1 micro is 0.7445887445887447


In [60]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.88      0.87      0.88       231
           1       0.85      0.84      0.84       231
           2       0.77      0.79      0.78       231

    accuracy                           0.83       693
   macro avg       0.83      0.83      0.83       693
weighted avg       0.83      0.83      0.83       693

micro avg                              0.83


### AIC 30

In [61]:
AIC_features = ['netspeak_focus', 'liberal', 'self_assured', 'organized', 'type_a',
                'cheerful', 'disciplined', 'neuroticism', 'aggressive', 'adjustment',
                'religion_oriented', 'food_focus', 'generous', 'sexual_focus', 'thinking_style',
                'agreeableness', 'money_oriented', 'melancholy', 'workhorse', 'reward_bias',
                'self_conscious', 'leisure_oriented', 'impulsive', 'cautious', 'active',
                'power_driven', 'genuine', 'cold', 'health_oriented', 'friend_focus']

X_train = train[AIC_features]
X_test = test[AIC_features]

In [62]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.6232995935894486.
ovo_lr: Test f1 micro is 0.6204906204906205
ovo_svc: Cross validation f1 micro is 0.7855954297983283.
ovo_svc: Test f1 micro is 0.7748917748917749
ovo_rf: Cross validation f1 micro is 0.8252664672954527.
ovo_rf: Test f1 micro is 0.8124098124098124
ovo_nb: Cross validation f1 micro is 0.5626217314623111.
ovo_nb: Test f1 micro is 0.5468975468975469
ovr_lr: Cross validation f1 micro is 0.6214209033049612.
ovr_lr: Test f1 micro is 0.6291486291486291
ovr_svc: Cross validation f1 micro is 0.7738171919331339.
ovr_svc: Test f1 micro is 0.7705627705627706
ovr_rf: Cross validation f1 micro is 0.8475692048155816.
ovr_rf: Test f1 micro is 0.8268398268398268
ovr_nb: Cross validation f1 micro is 0.5588835212023618.
ovr_nb: Test f1 micro is 0.5310245310245311
xgb: Cross validation f1 micro is 0.7552526646729545.
xgb: Test f1 micro is 0.7518037518037518


In [63]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.87      0.87      0.87       231
           1       0.85      0.83      0.84       231
           2       0.77      0.78      0.78       231

    accuracy                           0.83       693
   macro avg       0.83      0.83      0.83       693
weighted avg       0.83      0.83      0.83       693

micro avg                              0.83


### AIC 35

In [84]:
AIC_features = ['netspeak_focus', 'liberal', 'self_assured', 'organized', 'type_a',
                'cheerful', 'disciplined', 'neuroticism', 'aggressive', 'adjustment',
                'religion_oriented', 'food_focus', 'generous', 'sexual_focus', 'thinking_style',
                'agreeableness', 'money_oriented', 'melancholy', 'workhorse', 'reward_bias',
                'self_conscious', 'leisure_oriented', 'impulsive', 'cautious', 'active',
                'power_driven', 'genuine', 'cold', 'health_oriented', 'friend_focus',
                'emotionally_aware', 'artistic', 'ambitious', 'adventurous', 'anxious']

X_train = train[AIC_features]
X_test = test[AIC_features]

In [85]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.629495437466452.
ovo_lr: Test f1 micro is 0.6507936507936508
ovo_svc: Cross validation f1 micro is 0.7992331876389847.
ovo_svc: Test f1 micro is 0.7878787878787878
ovo_rf: Cross validation f1 micro is 0.8376811594202899.
ovo_rf: Test f1 micro is 0.8080808080808081
ovo_nb: Cross validation f1 micro is 0.5632236791657081.
ovo_nb: Test f1 micro is 0.5454545454545454
ovr_lr: Cross validation f1 micro is 0.6325818572195383.
ovr_lr: Test f1 micro is 0.6493506493506493
ovr_svc: Cross validation f1 micro is 0.7849551414768806.
ovr_svc: Test f1 micro is 0.7922077922077922
ovr_rf: Cross validation f1 micro is 0.8382946093091019.
ovr_rf: Test f1 micro is 0.8354978354978355
ovr_nb: Cross validation f1 micro is 0.5619967793880838.
ovr_nb: Test f1 micro is 0.5339105339105339
xgb: Cross validation f1 micro is 0.7627060808220228.
xgb: Test f1 micro is 0.7518037518037518


In [86]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.87      0.89      0.88       231
           1       0.86      0.83      0.84       231
           2       0.78      0.79      0.79       231

    accuracy                           0.84       693
   macro avg       0.84      0.84      0.84       693
weighted avg       0.84      0.84      0.84       693

micro avg                              0.84


#### Odds Ratio for AIC 35

In [None]:
# Coefficients from R
#          cheerful  disciplined neuroticism  aggressive  adjustment
#    1  0.006926225  0.009932597 -0.04574989 -0.02293918 -0.06273459
#    2 -0.006857462 -0.004668864 -0.04206451 -0.03191554 -0.01608389
#      religion_oriented  food_focus     generous sexual_focus thinking_style
#    1      -0.004750237 0.005284733 -0.005943655   0.01673024    -0.00374206
#    2       0.006412098 0.007953333  0.019830291   0.01599192     0.01466021
#      agreeableness money_oriented melancholy    workhorse  reward_bias
#    1   0.001250636     0.01451598 0.02516431 -0.007254736 -0.007573091
#    2  -0.021897528     0.00103292 0.02203398 -0.031393984  0.007016677
#      self_conscious leisure_oriented   impulsive    cautious       active
#    1     0.01828672     -0.009135593 0.028912001 0.013718917 -0.014244353
#    2     0.01965547     -0.010970023 0.002486877 0.001679266  0.003941147
#      power_driven    genuine        cold health_oriented friend_focus
#    1  -0.02798047 0.01338628 0.017041520     0.015940930  0.004454851
#    2  -0.01633463 0.02384384 0.009642887     0.008663801  0.009402999
#      emotionally_aware    artistic  ambitious  adventurous      anxious
#    1      -0.011733478 0.009289661 0.01134012 -0.005573153  0.002557516
#    2      -0.007777114 0.012479756 0.01476101 -0.014021436 -0.007567554

In [100]:
coefficients_pain_vs_none = [0.006926225, 0.009932597, -0.04574989, -0.02293918, -0.06273459,
                            -0.004750237, 0.005284733, -0.005943655, 0.01673024, -0.00374206,
                            0.001250636, 0.01451598, 0.02516431, -0.007254736, -0.007573091,
                            0.01828672, -0.009135593, 0.028912001, 0.013718917, -0.014244353,
                            -0.02798047, 0.01338628, 0.017041520, 0.015940930, 0.004454851,
                            -0.011733478, 0.009289661, 0.01134012, -0.005573153, 0.002557516]

coefficients_PleasureAddiction_vs_none = [-0.006857462, -0.004668864, -0.04206451, -0.03191554, -0.01608389,
                                         0.006412098, 0.007953333, 0.019830291, 0.01599192, 0.01466021,
                                         -0.021897528, 0.00103292, 0.02203398, -0.031393984, 0.007016677,
                                         0.01965547, -0.010970023, 0.002486877, 0.001679266, 0.003941147,
                                         -0.01633463, 0.02384384, 0.009642887, 0.008663801, 0.009402999,
                                         -0.007777114, 0.012479756, 0.01476101, -0.014021436, -0.007567554]

Feature_nm = ['cheerful', 'disciplined', 'neuroticism',  'aggressive',  'adjustment',
             'religion_oriented', 'food_focus', 'generous', 'sexual_focus', 'thinking_style',
             'agreeableness', 'money_oriented', 'melancholy', 'workhorse', 'reward_bias',
             'self_conscious', 'leisure_oriented', 'impulsive', 'cautious', 'active',
             'power_driven', 'genuine', 'cold', 'health_oriented', 'friend_focus',
             'emotionally_aware', 'artistic', 'ambitious', 'adventurous', 'anxious']

In [127]:
print('Odds Ratio')
for coef_pain, coef_pleasreaddiction, feature_name in zip(coefficients_pain_vs_none,
                                                          coefficients_PleasureAddiction_vs_none,
                                                          Feature_nm):
    print(feature_name.upper())
    print('ref: None')
    print('     Pain {}'.format(round(math.exp(coef_pain), 3)))
    print('     Pleasure & Addiction {}'.format(round(math.exp(coef_pleasreaddiction), 3)))

Odds Ratio
CHEERFUL
ref: None
     Pain 1.007
     Pleasure & Addiction 0.993
DISCIPLINED
ref: None
     Pain 1.01
     Pleasure & Addiction 0.995
NEUROTICISM
ref: None
     Pain 0.955
     Pleasure & Addiction 0.959
AGGRESSIVE
ref: None
     Pain 0.977
     Pleasure & Addiction 0.969
ADJUSTMENT
ref: None
     Pain 0.939
     Pleasure & Addiction 0.984
RELIGION_ORIENTED
ref: None
     Pain 0.995
     Pleasure & Addiction 1.006
FOOD_FOCUS
ref: None
     Pain 1.005
     Pleasure & Addiction 1.008
GENEROUS
ref: None
     Pain 0.994
     Pleasure & Addiction 1.02
SEXUAL_FOCUS
ref: None
     Pain 1.017
     Pleasure & Addiction 1.016
THINKING_STYLE
ref: None
     Pain 0.996
     Pleasure & Addiction 1.015
AGREEABLENESS
ref: None
     Pain 1.001
     Pleasure & Addiction 0.978
MONEY_ORIENTED
ref: None
     Pain 1.015
     Pleasure & Addiction 1.001
MELANCHOLY
ref: None
     Pain 1.025
     Pleasure & Addiction 1.022
WORKHORSE
ref: None
     Pain 0.993
     Pleasure & Addiction 0.969
REWARD_B

### AIC 40

In [67]:
AIC_features = ['netspeak_focus', 'liberal', 'self_assured', 'organized', 'type_a',
                'cheerful', 'disciplined', 'neuroticism', 'aggressive', 'adjustment',
                'religion_oriented', 'food_focus', 'generous', 'sexual_focus', 'thinking_style',
                'agreeableness', 'money_oriented', 'melancholy', 'workhorse', 'reward_bias',
                'self_conscious', 'leisure_oriented', 'impulsive', 'cautious', 'active',
                'power_driven', 'genuine', 'cold', 'health_oriented', 'friend_focus',
                'emotionally_aware', 'artistic', 'ambitious', 'adventurous', 'anxious',
                'stressed', 'work_oriented', 'depression', 'family_oriented', 'insecure']

X_train = train[AIC_features]
X_test = test[AIC_features]

In [68]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.6424852388620504.
ovo_lr: Test f1 micro is 0.6421356421356421
ovo_svc: Cross validation f1 micro is 0.796733379342075.
ovo_svc: Test f1 micro is 0.7777777777777778
ovo_rf: Cross validation f1 micro is 0.8364350893336401.
ovo_rf: Test f1 micro is 0.8225108225108225
ovo_nb: Cross validation f1 micro is 0.5588796871405568.
ovo_nb: Test f1 micro is 0.5468975468975469
ovr_lr: Cross validation f1 micro is 0.6313434552564987.
ovr_lr: Test f1 micro is 0.6378066378066378
ovr_svc: Cross validation f1 micro is 0.7861935434399202.
ovr_svc: Test f1 micro is 0.772005772005772
ovr_rf: Cross validation f1 micro is 0.8469519208649643.
ovr_rf: Test f1 micro is 0.8326118326118326
ovr_nb: Cross validation f1 micro is 0.5601180891035964.
ovr_nb: Test f1 micro is 0.5483405483405484
xgb: Cross validation f1 micro is 0.7651675485008819.
xgb: Test f1 micro is 0.7474747474747475


In [69]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.88      0.87      0.88       231
           1       0.84      0.84      0.84       231
           2       0.77      0.78      0.78       231

    accuracy                           0.83       693
   macro avg       0.83      0.83      0.83       693
weighted avg       0.83      0.83      0.83       693

micro avg                              0.83


### AIC 45

In [70]:
AIC_features = ['netspeak_focus', 'liberal', 'self_assured', 'organized', 'type_a',
                'cheerful', 'disciplined', 'neuroticism', 'aggressive', 'adjustment',
                'religion_oriented', 'food_focus', 'generous', 'sexual_focus', 'thinking_style',
                'agreeableness', 'money_oriented', 'melancholy', 'workhorse', 'reward_bias',
                'self_conscious', 'leisure_oriented', 'impulsive', 'cautious', 'active',
                'power_driven', 'genuine', 'cold', 'health_oriented', 'friend_focus',
                'emotionally_aware', 'artistic', 'ambitious', 'adventurous', 'anxious',
                'stressed', 'work_oriented', 'depression', 'family_oriented', 'insecure',
                'happiness', 'conscientiousness', 'dutiful', 'persuasive', 'cooperative']

X_train = train[AIC_features]
X_test = test[AIC_features]

In [71]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.6307031669350509.
ovo_lr: Test f1 micro is 0.6305916305916306
ovo_svc: Cross validation f1 micro is 0.794873859366613.
ovo_svc: Test f1 micro is 0.7705627705627706
ovo_rf: Cross validation f1 micro is 0.8314776474196763.
ovo_rf: Test f1 micro is 0.8253968253968254
ovo_nb: Cross validation f1 micro is 0.5675638371290546.
ovo_nb: Test f1 micro is 0.5411255411255411
ovr_lr: Cross validation f1 micro is 0.6356376044781842.
ovr_lr: Test f1 micro is 0.6363636363636364
ovr_svc: Cross validation f1 micro is 0.7905337014032666.
ovr_svc: Test f1 micro is 0.7777777777777778
ovr_rf: Cross validation f1 micro is 0.8562495207422742.
ovr_rf: Test f1 micro is 0.8282828282828283
ovr_nb: Cross validation f1 micro is 0.5663024307951844.
ovr_nb: Test f1 micro is 0.5180375180375181
xgb: Cross validation f1 micro is 0.7670539069089793.
xgb: Test f1 micro is 0.7503607503607503


In [72]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.87      0.88      0.88       231
           1       0.83      0.84      0.84       231
           2       0.78      0.77      0.77       231

    accuracy                           0.83       693
   macro avg       0.83      0.83      0.83       693
weighted avg       0.83      0.83      0.83       693

micro avg                              0.83


### AIC 50

In [73]:
AIC_features = ['netspeak_focus', 'liberal', 'self_assured', 'organized', 'type_a',
                'cheerful', 'disciplined', 'neuroticism', 'aggressive', 'adjustment',
                'religion_oriented', 'food_focus', 'generous', 'sexual_focus', 'thinking_style',
                'agreeableness', 'money_oriented', 'melancholy', 'workhorse', 'reward_bias',
                'self_conscious', 'leisure_oriented', 'impulsive', 'cautious', 'active',
                'power_driven', 'genuine', 'cold', 'health_oriented', 'friend_focus',
                'emotionally_aware', 'artistic', 'ambitious', 'adventurous', 'anxious',
                'stressed', 'work_oriented', 'depression', 'family_oriented', 'insecure',
                'happiness', 'conscientiousness', 'dutiful', 'persuasive', 'cooperative',
                'intellectual', 'humble', 'assertive', 'body_focus', 'friendly']

X_train = train[AIC_features]
X_test = test[AIC_features]

In [74]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.6331953071083507.
ovo_lr: Test f1 micro is 0.6392496392496393
ovo_svc: Cross validation f1 micro is 0.7986120696265624.
ovo_svc: Test f1 micro is 0.7734487734487735
ovo_rf: Cross validation f1 micro is 0.8413810290621886.
ovo_rf: Test f1 micro is 0.8196248196248195
ovo_nb: Cross validation f1 micro is 0.5694041867954911.
ovo_nb: Test f1 micro is 0.5367965367965368
ovr_lr: Cross validation f1 micro is 0.6325626869105131.
ovr_lr: Test f1 micro is 0.6378066378066378
ovr_svc: Cross validation f1 micro is 0.7899317536998697.
ovr_svc: Test f1 micro is 0.7835497835497836
ovr_rf: Cross validation f1 micro is 0.8513035810137259.
ovr_rf: Test f1 micro is 0.8427128427128429
ovr_nb: Cross validation f1 micro is 0.5607238708687985.
ovr_nb: Test f1 micro is 0.5223665223665224
xgb: Cross validation f1 micro is 0.7726363008971704.
xgb: Test f1 micro is 0.7633477633477633


In [76]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.90      0.89      0.89       231
           1       0.83      0.86      0.84       231
           2       0.80      0.78      0.79       231

    accuracy                           0.84       693
   macro avg       0.84      0.84      0.84       693
weighted avg       0.84      0.84      0.84       693

micro avg                              0.84


### F Test at p <= 0.01 Cutoff

In [77]:
features = ['netspeak_focus', 'cheerful', 'cooperative', 'aggressive', 'religion_oriented',
            'depression', 'ambitious', 'self_conscious', 'active', 'power_driven',
            'genuine', 'anxious', 'health_oriented', 'work_oriented']

X_train = train[features]
X_test = test[features]

In [78]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5929031515988037.
ovo_lr: Test f1 micro is 0.5873015873015873
ovo_svc: Cross validation f1 micro is 0.7143777317690362.
ovo_svc: Test f1 micro is 0.7085137085137087
ovo_rf: Cross validation f1 micro is 0.8023311095774865.
ovo_rf: Test f1 micro is 0.7950937950937951
ovo_nb: Cross validation f1 micro is 0.5564143853998927.
ovo_nb: Test f1 micro is 0.5440115440115441
ovr_lr: Cross validation f1 micro is 0.5997124453646192.
ovr_lr: Test f1 micro is 0.5800865800865801
ovr_svc: Cross validation f1 micro is 0.7131009891879456.
ovr_svc: Test f1 micro is 0.696969696969697
ovr_rf: Cross validation f1 micro is 0.8159880377271682.
ovr_rf: Test f1 micro is 0.7994227994227994
ovr_nb: Cross validation f1 micro is 0.5502070393374741.
ovr_nb: Test f1 micro is 0.5411255411255411
xgb: Cross validation f1 micro is 0.7100375738056897.
xgb: Test f1 micro is 0.7056277056277056


In [79]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.83      0.83      0.83       231
           1       0.81      0.82      0.81       231
           2       0.76      0.75      0.76       231

    accuracy                           0.80       693
   macro avg       0.80      0.80      0.80       693
weighted avg       0.80      0.80      0.80       693

micro avg                              0.8


In [80]:
# training set performance
train_pred = modfit.predict(X_train)
print(classification_report(y_train, train_pred))
print('micro avg                              {}'.format(round(f1_score(y_train, train_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       538
           1       1.00      1.00      1.00       538
           2       1.00      1.00      1.00       538

    accuracy                           1.00      1614
   macro avg       1.00      1.00      1.00      1614
weighted avg       1.00      1.00      1.00      1614

micro avg                              1.0


### F Test at p <= 0.001 Cutoff

In [81]:
features = ['netspeak_focus', 'active', 'power_driven', 'health_oriented']

X_train = train[features]
X_test = test[features]

In [82]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.569396518671881.
ovo_lr: Test f1 micro is 0.5454545454545454
ovo_svc: Cross validation f1 micro is 0.5854957441913964.
ovo_svc: Test f1 micro is 0.5468975468975469
ovo_rf: Cross validation f1 micro is 0.6338240932443833.
ovo_rf: Test f1 micro is 0.6277056277056277
ovo_nb: Cross validation f1 micro is 0.5408672647803081.
ovo_nb: Test f1 micro is 0.4935064935064935
ovr_lr: Cross validation f1 micro is 0.5743347902768192.
ovr_lr: Test f1 micro is 0.5382395382395382
ovr_svc: Cross validation f1 micro is 0.58240932443831.
ovr_svc: Test f1 micro is 0.5512265512265512
ovr_rf: Cross validation f1 micro is 0.6406410551338088.
ovr_rf: Test f1 micro is 0.645021645021645
ovr_nb: Cross validation f1 micro is 0.5439728548424201.
ovr_nb: Test f1 micro is 0.4935064935064935
xgb: Cross validation f1 micro is 0.6078598267004065.
xgb: Test f1 micro is 0.5844155844155844


In [83]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.64      0.71      0.67       231
           1       0.67      0.66      0.67       231
           2       0.63      0.56      0.59       231

    accuracy                           0.65       693
   macro avg       0.64      0.65      0.64       693
weighted avg       0.64      0.65      0.64       693

micro avg                              0.65
