In [2]:
import numpy as np
import math
import pandas as pd
from pandas import crosstab
import statsmodels.api as sm
import scipy.stats as stats
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import roc_curve, roc_auc_score, auc, classification_report, confusion_matrix, accuracy_score, f1_score, recall_score, precision_score, precision_recall_curve
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, cross_val_score, KFold, StratifiedKFold, cross_validate, train_test_split
from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier, OutputCodeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from xgboost.sklearn import XGBClassifier
import xgboost as xgb
from collections import Counter
from imblearn.over_sampling import SMOTE

pd.set_option('display.max_columns', 60)
pd.set_option('display.max_rows', 70)

Using TensorFlow backend.


In [3]:
df = pd.read_csv(r'C:\Users\1130j\Desktop\Yale\Data\all_recent_tweets_text_score_8_parsed_noDup.csv')

In [4]:
df = df.loc[df['classname'] != 'awareness']
df.loc[df['classname']=='None', 'class'] = 0
df.loc[df['classname']=='pain', 'class'] = 1
df.loc[df['classname'].isin(['pleasure', 'addiction']), 'class'] = 2

### Group Pleasure and Addiction together

In [5]:
crosstab(df['class'], df['classname'])

classname,None,addiction,pain,pleasure
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,166,0,0,0
1,0,0,229,0
2,0,197,0,572


### Define X and Y

In [6]:
X = df[df.columns.difference(['screen_name', 'user_id', 'classname', 'class'])]
y = df['class']

### Train Test Split

In [7]:
train, test = train_test_split(df, test_size=0.3, random_state=0, stratify=df['class'])

In [8]:
X_train = train[train.columns.difference(['screen_name', 'user_id', 'classname', 'class'])]
y_train = train['class']
X_test = test[test.columns.difference(['screen_name', 'user_id', 'classname', 'class'])]
y_test = test['class']

In [9]:
y_train.value_counts()

2    538
1    160
0    116
Name: class, dtype: int64

In [10]:
y_test.value_counts()

2    231
1     69
0     50
Name: class, dtype: int64

In [11]:
#### Apply weights

In [45]:
lr = LogisticRegression(solver = 'liblinear', random_state=0, class_weight = 'balanced')
svc = SVC(random_state = 0, class_weight = 'balanced')
rf = RandomForestClassifier(random_state = 0, class_weight = 'balanced')
xgb = XGBClassifier(objective='multi:softmax', random_state=0, class_weight = 'balanced')
nb = GaussianNB()

### Baseline Model

In [46]:
# lr = LogisticRegression(solver='newton-cg', random_state=0, class_weight = {0: 3, 1: 3, 2: 1})
# svc = SVC(random_state=0, class_weight = {0: 3, 1: 3, 2: 1})
# rf = RandomForestClassifier(random_state=0, class_weight='balanced')
# xgb = XGBClassifier(objective='multi:softmax', num_class=4, random_state=0, class_weight='balanced')
# nb = GaussianNB()

onevsone_lr = OneVsOneClassifier(lr).fit(X_train, y_train)
onevsone_svc = OneVsOneClassifier(svc).fit(X_train, y_train)
onevsone_rf = OneVsOneClassifier(rf).fit(X_train, y_train)
# onevsone_xgb = OneVsOneClassifier(xgb).fit(X_train, y_train)
onevsone_nb = OneVsOneClassifier(nb).fit(X_train, y_train)

onevsrest_lr = OneVsRestClassifier(lr).fit(X_train, y_train)
onevsrest_svc = OneVsRestClassifier(svc).fit(X_train, y_train)
onevsrest_rf = OneVsRestClassifier(rf).fit(X_train, y_train)
onevsrest_nb = OneVsRestClassifier(nb).fit(X_train, y_train)

In [47]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5564890093345378.
ovo_lr: Test f1 micro is 0.49714285714285716
ovo_svc: Cross validation f1 micro is 0.6165612767238784.
ovo_svc: Test f1 micro is 0.5714285714285714
ovo_rf: Cross validation f1 micro is 0.6732460102378802.
ovo_rf: Test f1 micro is 0.6742857142857143
ovo_nb: Cross validation f1 micro is 0.610328214393255.
ovo_nb: Test f1 micro is 0.6114285714285714
ovr_lr: Cross validation f1 micro is 0.5809846431797652.
ovr_lr: Test f1 micro is 0.5371428571428571
ovr_svc: Cross validation f1 micro is 0.6079795242396868.
ovr_svc: Test f1 micro is 0.6028571428571429
ovr_rf: Cross validation f1 micro is 0.6842667871123156.
ovr_rf: Test f1 micro is 0.6714285714285714
ovr_nb: Cross validation f1 micro is 0.6238482384823849.
ovr_nb: Test f1 micro is 0.6085714285714285
xgb: Cross validation f1 micro is 0.6682174043962663.
xgb: Test f1 micro is 0.6885714285714286


In [48]:
mod = xgb
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.44      0.24      0.31        50
           1       0.44      0.26      0.33        69
           2       0.75      0.91      0.82       231

    accuracy                           0.69       350
   macro avg       0.54      0.47      0.49       350
weighted avg       0.64      0.69      0.65       350

micro avg                              0.69


### BIC 6

In [49]:
BIC_features = ['netspeak_focus', 'money_oriented', 'workhorse', 'active', 'power_driven', 'health_oriented']

X_train = train[BIC_features]
X_test = test[BIC_features]

In [50]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5759861487503765.
ovo_lr: Test f1 micro is 0.5028571428571429
ovo_svc: Cross validation f1 micro is 0.5636404697380307.
ovo_svc: Test f1 micro is 0.5028571428571429
ovo_rf: Cross validation f1 micro is 0.6793134598012646.
ovo_rf: Test f1 micro is 0.6371428571428571
ovo_nb: Cross validation f1 micro is 0.6483739837398373.
ovo_nb: Test f1 micro is 0.6228571428571429
ovr_lr: Cross validation f1 micro is 0.5895362842517314.
ovr_lr: Test f1 micro is 0.5314285714285715
ovr_svc: Cross validation f1 micro is 0.5809545317675399.
ovr_svc: Test f1 micro is 0.5228571428571429
ovr_rf: Cross validation f1 micro is 0.6596356519120746.
ovr_rf: Test f1 micro is 0.6457142857142857
ovr_nb: Cross validation f1 micro is 0.6532670882264378.
ovr_nb: Test f1 micro is 0.62
xgb: Cross validation f1 micro is 0.6719662752183078.
xgb: Test f1 micro is 0.6571428571428571


In [51]:
mod = xgb
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.44      0.14      0.21        50
           1       0.38      0.28      0.32        69
           2       0.72      0.88      0.79       231

    accuracy                           0.66       350
   macro avg       0.51      0.43      0.44       350
weighted avg       0.61      0.66      0.62       350

micro avg                              0.66


### BIC 10

In [52]:
BIC_features = ['netspeak_focus', 'money_oriented', 'workhorse', 'active', 'power_driven',
                'health_oriented', 'adjustment', 'reward_bias', 'cheerful', 'leisure_oriented']

X_train = train[BIC_features]
X_test = test[BIC_features]

In [53]:
# import math
# print('odds ratio    {}'.format(math.exp()) )
# print('odds ratio    {}'.format(math.exp())  )
# print('odds ratio    {}'.format(math.exp()) )
# print('odds ratio    {}'.format(math.exp()) )
# print('odds ratio    {}'.format(math.exp())  )

In [54]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5846281240590183.
ovo_lr: Test f1 micro is 0.5228571428571429
ovo_svc: Cross validation f1 micro is 0.5845829569406805.
ovo_svc: Test f1 micro is 0.5371428571428571
ovo_rf: Cross validation f1 micro is 0.6768593797049082.
ovo_rf: Test f1 micro is 0.6571428571428571
ovo_nb: Cross validation f1 micro is 0.571017765733213.
ovo_nb: Test f1 micro is 0.5171428571428571
ovr_lr: Cross validation f1 micro is 0.590770852152966.
ovr_lr: Test f1 micro is 0.5457142857142857
ovr_svc: Cross validation f1 micro is 0.6055103884372177.
ovr_svc: Test f1 micro is 0.5428571428571428
ovr_rf: Cross validation f1 micro is 0.6805329719963866.
ovr_rf: Test f1 micro is 0.6514285714285715
ovr_nb: Cross validation f1 micro is 0.6312556458897922.
ovr_nb: Test f1 micro is 0.5914285714285714
xgb: Cross validation f1 micro is 0.6792682926829269.
xgb: Test f1 micro is 0.64


In [55]:
mod = onevsone_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.30      0.06      0.10        50
           1       0.37      0.22      0.27        69
           2       0.71      0.92      0.80       231

    accuracy                           0.66       350
   macro avg       0.46      0.40      0.39       350
weighted avg       0.58      0.66      0.60       350

micro avg                              0.66


### BIC 15

In [56]:
BIC_features = ['netspeak_focus', 'money_oriented', 'workhorse', 'active', 'power_driven',
                'health_oriented', 'adjustment', 'reward_bias', 'cheerful', 'leisure_oriented',
                'thinking_style', 'aggressive', 'organized', 'self_assured', 'religion_oriented']

X_train = train[BIC_features]
X_test = test[BIC_features]

In [57]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5809846431797651.
ovo_lr: Test f1 micro is 0.5342857142857143
ovo_svc: Cross validation f1 micro is 0.6190153568202349.
ovo_svc: Test f1 micro is 0.5742857142857143
ovo_rf: Cross validation f1 micro is 0.6719662752183078.
ovo_rf: Test f1 micro is 0.6685714285714286
ovo_nb: Cross validation f1 micro is 0.587037037037037.
ovo_nb: Test f1 micro is 0.5828571428571429
ovr_lr: Cross validation f1 micro is 0.5932550436615477.
ovr_lr: Test f1 micro is 0.5457142857142857
ovr_svc: Cross validation f1 micro is 0.6189701897018971.
ovr_svc: Test f1 micro is 0.5742857142857143
ovr_rf: Cross validation f1 micro is 0.6792833483890395.
ovr_rf: Test f1 micro is 0.66
ovr_nb: Cross validation f1 micro is 0.6324299909665763.
ovr_nb: Test f1 micro is 0.6057142857142858
xgb: Cross validation f1 micro is 0.6559620596205963.
xgb: Test f1 micro is 0.6514285714285715


In [61]:
mod = onevsone_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.43      0.12      0.19        50
           1       0.38      0.19      0.25        69
           2       0.71      0.93      0.81       231

    accuracy                           0.67       350
   macro avg       0.51      0.41      0.42       350
weighted avg       0.61      0.67      0.61       350

micro avg                              0.67


### BIC 20

In [62]:
BIC_features = ['netspeak_focus', 'money_oriented', 'workhorse', 'active', 'power_driven',
                'health_oriented', 'adjustment', 'reward_bias', 'cheerful', 'leisure_oriented',
                'thinking_style', 'aggressive', 'organized', 'self_assured', 'religion_oriented',
                'cautious', 'impulsive', 'disciplined', 'type_a', 'liberal']

X_train = train[BIC_features]
X_test = test[BIC_features]

In [63]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5834989461005721.
ovo_lr: Test f1 micro is 0.54
ovo_svc: Cross validation f1 micro is 0.6152815417043059.
ovo_svc: Test f1 micro is 0.58
ovo_rf: Cross validation f1 micro is 0.6768292682926829.
ovo_rf: Test f1 micro is 0.6771428571428572
ovo_nb: Cross validation f1 micro is 0.6054049984944294.
ovo_nb: Test f1 micro is 0.6
ovr_lr: Cross validation f1 micro is 0.5982384823848238.
ovr_lr: Test f1 micro is 0.5628571428571428
ovr_svc: Cross validation f1 micro is 0.6423215898825655.
ovr_svc: Test f1 micro is 0.5885714285714285
ovr_rf: Cross validation f1 micro is 0.6817976513098463.
ovr_rf: Test f1 micro is 0.6657142857142857
ovr_nb: Cross validation f1 micro is 0.6349292381812707.
ovr_nb: Test f1 micro is 0.6257142857142857
xgb: Cross validation f1 micro is 0.6719060523938571.
xgb: Test f1 micro is 0.66


In [64]:
mod = onevsone_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.54      0.14      0.22        50
           1       0.41      0.19      0.26        69
           2       0.71      0.94      0.81       231

    accuracy                           0.68       350
   macro avg       0.55      0.42      0.43       350
weighted avg       0.63      0.68      0.62       350

micro avg                              0.68


### BIC 25

In [65]:
BIC_features = ['netspeak_focus', 'money_oriented', 'workhorse', 'active', 'power_driven',
                'health_oriented', 'adjustment', 'reward_bias', 'cheerful', 'leisure_oriented',
                'thinking_style', 'aggressive', 'organized', 'self_assured', 'religion_oriented',
                'cautious', 'impulsive', 'disciplined', 'type_a', 'liberal',
                'melancholy', 'neuroticism', 'sexual_focus', 'food_focus', 'cold']

X_train = train[BIC_features]
X_test = test[BIC_features]

In [66]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.572463113520024.
ovo_lr: Test f1 micro is 0.5342857142857143
ovo_svc: Cross validation f1 micro is 0.6128575730201746.
ovo_svc: Test f1 micro is 0.5828571428571429
ovo_rf: Cross validation f1 micro is 0.6830021077988556.
ovo_rf: Test f1 micro is 0.6714285714285714
ovo_nb: Cross validation f1 micro is 0.622583559168925.
ovo_nb: Test f1 micro is 0.6114285714285714
ovr_lr: Cross validation f1 micro is 0.5760614272809395.
ovr_lr: Test f1 micro is 0.5628571428571428
ovr_svc: Cross validation f1 micro is 0.6239084613068353.
ovr_svc: Test f1 micro is 0.6
ovr_rf: Cross validation f1 micro is 0.6805630834086118.
ovr_rf: Test f1 micro is 0.6714285714285714
ovr_nb: Cross validation f1 micro is 0.6349292381812707.
ovr_nb: Test f1 micro is 0.6285714285714286
xgb: Cross validation f1 micro is 0.6583860283047275.
xgb: Test f1 micro is 0.6628571428571428


In [68]:
mod = onevsone_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.42      0.10      0.16        50
           1       0.45      0.20      0.28        69
           2       0.70      0.94      0.80       231

    accuracy                           0.67       350
   macro avg       0.52      0.41      0.41       350
weighted avg       0.61      0.67      0.61       350

micro avg                              0.67


### BIC 30

In [69]:
BIC_features = ['netspeak_focus', 'money_oriented', 'workhorse', 'active', 'power_driven',
                'health_oriented', 'adjustment', 'reward_bias', 'cheerful', 'leisure_oriented',
                'thinking_style', 'aggressive', 'organized', 'self_assured', 'religion_oriented',
                'cautious', 'impulsive', 'disciplined', 'type_a', 'liberal',
                'melancholy', 'neuroticism', 'sexual_focus', 'food_focus', 'cold',
                'friend_focus', 'agreeableness', 'generous', 'genuine', 'self_conscious']

X_train = train[BIC_features]
X_test = test[BIC_features]

In [70]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5748870822041554.
ovo_lr: Test f1 micro is 0.5428571428571428
ovo_svc: Cross validation f1 micro is 0.6140770852152967.
ovo_svc: Test f1 micro is 0.58
ovo_rf: Cross validation f1 micro is 0.6854110207768744.
ovo_rf: Test f1 micro is 0.6742857142857143
ovo_nb: Cross validation f1 micro is 0.6090484793736827.
ovo_nb: Test f1 micro is 0.6257142857142857
ovr_lr: Cross validation f1 micro is 0.5896567299006323.
ovr_lr: Test f1 micro is 0.5828571428571429
ovr_svc: Cross validation f1 micro is 0.6349744052996085.
ovr_svc: Test f1 micro is 0.6057142857142858
ovr_rf: Cross validation f1 micro is 0.6780638361939173.
ovr_rf: Test f1 micro is 0.6657142857142857
ovr_nb: Cross validation f1 micro is 0.642186088527552.
ovr_nb: Test f1 micro is 0.6257142857142857
xgb: Cross validation f1 micro is 0.6670280036133694.
xgb: Test f1 micro is 0.6542857142857142


In [71]:
mod = onevsone_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.54      0.14      0.22        50
           1       0.42      0.22      0.29        69
           2       0.71      0.93      0.80       231

    accuracy                           0.67       350
   macro avg       0.56      0.43      0.44       350
weighted avg       0.63      0.67      0.62       350

micro avg                              0.67


### BIC 35

In [72]:
BIC_features = ['netspeak_focus', 'money_oriented', 'workhorse', 'active', 'power_driven',
                'health_oriented', 'adjustment', 'reward_bias', 'cheerful', 'leisure_oriented',
                'thinking_style', 'aggressive', 'organized', 'self_assured', 'religion_oriented',
                'cautious', 'impulsive', 'disciplined', 'type_a', 'liberal',
                'melancholy', 'neuroticism', 'sexual_focus', 'food_focus', 'cold',
                'friend_focus', 'agreeableness', 'generous', 'genuine', 'self_conscious',
                'emotionally_aware', 'artistic', 'ambitious', 'adventurous', 'anxious']

X_train = train[BIC_features]
X_test = test[BIC_features]

In [73]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5845679012345679.
ovo_lr: Test f1 micro is 0.54
ovo_svc: Cross validation f1 micro is 0.6152815417043059.
ovo_svc: Test f1 micro is 0.5828571428571429
ovo_rf: Cross validation f1 micro is 0.6854712436013248.
ovo_rf: Test f1 micro is 0.6742857142857143
ovo_nb: Cross validation f1 micro is 0.6017163504968382.
ovo_nb: Test f1 micro is 0.6085714285714285
ovr_lr: Cross validation f1 micro is 0.5957392351701295.
ovr_lr: Test f1 micro is 0.56
ovr_svc: Cross validation f1 micro is 0.6373983739837399.
ovr_svc: Test f1 micro is 0.6
ovr_rf: Cross validation f1 micro is 0.6707618187292985.
ovr_rf: Test f1 micro is 0.6628571428571428
ovr_nb: Cross validation f1 micro is 0.6299457994579947.
ovr_nb: Test f1 micro is 0.62
xgb: Cross validation f1 micro is 0.6670129479072567.
xgb: Test f1 micro is 0.6657142857142857


In [74]:
mod = onevsone_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.62      0.10      0.17        50
           1       0.43      0.17      0.25        69
           2       0.70      0.95      0.80       231

    accuracy                           0.67       350
   macro avg       0.58      0.41      0.41       350
weighted avg       0.63      0.67      0.60       350

micro avg                              0.67


### BIC 40

In [75]:
BIC_features = ['netspeak_focus', 'money_oriented', 'workhorse', 'active', 'power_driven',
                'health_oriented', 'adjustment', 'reward_bias', 'cheerful', 'leisure_oriented',
                'thinking_style', 'aggressive', 'organized', 'self_assured', 'religion_oriented',
                'cautious', 'impulsive', 'disciplined', 'type_a', 'liberal',
                'melancholy', 'neuroticism', 'sexual_focus', 'food_focus', 'cold',
                'friend_focus', 'agreeableness', 'generous', 'genuine', 'self_conscious',
                'emotionally_aware', 'artistic', 'ambitious', 'adventurous', 'anxious',
                'stressed', 'work_oriented', 'depression', 'family_oriented', 'insecure']

X_train = train[BIC_features]
X_test = test[BIC_features]

In [76]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5810147545919904.
ovo_lr: Test f1 micro is 0.52
ovo_svc: Cross validation f1 micro is 0.6128876844323999.
ovo_svc: Test f1 micro is 0.5914285714285714
ovo_rf: Cross validation f1 micro is 0.6793134598012646.
ovo_rf: Test f1 micro is 0.6657142857142857
ovo_nb: Cross validation f1 micro is 0.578319783197832.
ovo_nb: Test f1 micro is 0.62
ovr_lr: Cross validation f1 micro is 0.5933303221921108.
ovr_lr: Test f1 micro is 0.5714285714285714
ovr_svc: Cross validation f1 micro is 0.62387834989461.
ovr_svc: Test f1 micro is 0.6171428571428571
ovr_rf: Cross validation f1 micro is 0.6682776272207167.
ovr_rf: Test f1 micro is 0.66
ovr_nb: Cross validation f1 micro is 0.6311803673592292.
ovr_nb: Test f1 micro is 0.6228571428571429
xgb: Cross validation f1 micro is 0.6767991568804577.
xgb: Test f1 micro is 0.6628571428571428


In [77]:
mod = onevsone_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))
print('macro avg                              {}'.format(round(f1_score(y_test, test_pred, average='macro'),2)))

              precision    recall  f1-score   support

           0       0.45      0.10      0.16        50
           1       0.42      0.19      0.26        69
           2       0.70      0.93      0.80       231

    accuracy                           0.67       350
   macro avg       0.52      0.41      0.41       350
weighted avg       0.61      0.67      0.60       350

micro avg                              0.67
macro avg                              0.41


#### Odds Ratio for BIC 40

In [None]:
# Coefficients:
#      (Intercept) netspeak_focus money_oriented    workhorse       active
#    1   -0.728999   -0.023476272    0.010378129 -0.009522501 -0.015349323
#    2    2.128925   -0.006137466   -0.003026801 -0.031658883  0.003269262
#      power_driven health_oriented  adjustment  reward_bias     cheerful
#    1  -0.02553222     0.016059395 -0.04908470 -0.006461065  0.008260045
#    2  -0.01451243     0.008371385 -0.01654196  0.006186548 -0.010192141
#      leisure_oriented thinking_style  aggressive    organized self_assured
#    1      -0.01137100   -0.001625862 -0.02968211 -0.008331101  0.006260128
#    2      -0.01194271    0.014451886 -0.03226892 -0.016988143  0.017833144
#      religion_oriented      cautious   impulsive  disciplined     type_a
#    1      -0.004719442  0.0127061385 0.027234667  0.003919399 0.01376712
#    2       0.006997811 -0.0005605821 0.003561902 -0.007219051 0.02124003
#          liberal melancholy neuroticism sexual_focus  food_focus       cold
#    1 -0.01522392 0.02295983 -0.04673314   0.01817473 0.004430683 0.02393643
#    2 -0.02271685 0.02128984 -0.04686367   0.01612098 0.007613981 0.01435010
#      friend_focus agreeableness     generous    genuine self_conscious
#    1  0.004076875  -0.001436249 -0.002730232 0.01069174     0.01880966
#    2  0.008815973  -0.023137742  0.019466417 0.02244011     0.01946617
#      emotionally_aware   artistic   ambitious  adventurous      anxious
#    1      -0.013205394 0.01079969 0.008774397 -0.008834482  0.003593158
#    2      -0.007231397 0.01276577 0.017758021 -0.016618487 -0.005953701
#         stressed work_oriented  depression family_oriented    insecure
#    1 0.002260092   0.001703292 0.014276417     0.009998929 -0.01661206
#    2 0.011207941  -0.004048210 0.001663619     0.006171349 -0.01342986

In [128]:
coefficients_pain_vs_none = [-0.023476272, 0.010378129, -0.009522501, -0.015349323,
                            -0.02553222, 0.016059395, -0.04908470, -0.006461065, 0.008260045,
                            -0.01137100, -0.001625862, -0.02968211, -0.008331101, 0.006260128,
                            -0.004719442, 0.0127061385, 0.027234667, 0.003919399, 0.01376712,
                            -0.01522392, 0.02295983, -0.04673314, 0.01817473, 0.004430683, 0.02393643,
                             0.004076875, -0.001436249, -0.002730232, 0.01069174, 0.01880966,
                            -0.013205394, 0.01079969, 0.008774397, -0.008834482, 0.003593158,
                            0.002260092, 0.001703292, 0.014276417, 0.009998929, -0.01661206]

coefficients_PleasureAddiction_vs_none = [-0.006137466, -0.003026801, -0.031658883, 0.003269262,
                                         -0.01451243, 0.008371385, -0.01654196, 0.006186548, -0.010192141,
                                         -0.01194271, 0.014451886, -0.03226892, -0.016988143, 0.017833144,
                                         0.006997811, -0.0005605821, 0.003561902, -0.007219051, 0.02124003,
                                         -0.02271685, 0.02128984, -0.04686367, 0.01612098, 0.007613981, 0.01435010,
                                         0.008815973, -0.023137742, 0.019466417, 0.02244011, 0.01946617,
                                          -0.007231397, 0.01276577, 0.017758021, -0.016618487, -0.005953701,
                                         0.011207941, -0.004048210, 0.001663619, 0.006171349, -0.01342986]

Feature_nm = ['netspeak_focus', 'money_oriented', 'workhorse', 'active',
             'power_driven', 'health_oriented', 'adjustment', 'reward_bias', 'cheerful',
             'leisure_oriented', 'thinking_style', 'aggressive', 'organized', 'self_assured',
              'religion_oriented', 'cautious', 'impulsive', 'disciplined', 'type_a',
             'liberal', 'melancholy', 'neuroticism', 'sexual_focus', 'food_focus', 'cold',
             'friend_focus', 'agreeableness', 'generous', 'genuine', 'self_conscious',
              'emotionally_aware', 'artistic', 'ambitious', 'adventurous', 'anxious',
             'stressed', 'work_oriented', 'depression', 'family_oriented', 'insecure']

In [129]:
print('Odds Ratio')
for coef_pain, coef_pleasreaddiction, feature_name in zip(coefficients_pain_vs_none,
                                                          coefficients_PleasureAddiction_vs_none,
                                                          Feature_nm):
    print(feature_name.upper())
    print('ref: None')
    print('     Pain {}'.format(round(math.exp(coef_pain), 3)))
    print('     Pleasure & Addiction {}'.format(round(math.exp(coef_pleasreaddiction), 3)))

Odds Ratio
NETSPEAK_FOCUS
ref: None
     Pain 0.977
     Pleasure & Addiction 0.994
MONEY_ORIENTED
ref: None
     Pain 1.01
     Pleasure & Addiction 0.997
WORKHORSE
ref: None
     Pain 0.991
     Pleasure & Addiction 0.969
ACTIVE
ref: None
     Pain 0.985
     Pleasure & Addiction 1.003
POWER_DRIVEN
ref: None
     Pain 0.975
     Pleasure & Addiction 0.986
HEALTH_ORIENTED
ref: None
     Pain 1.016
     Pleasure & Addiction 1.008
ADJUSTMENT
ref: None
     Pain 0.952
     Pleasure & Addiction 0.984
REWARD_BIAS
ref: None
     Pain 0.994
     Pleasure & Addiction 1.006
CHEERFUL
ref: None
     Pain 1.008
     Pleasure & Addiction 0.99
LEISURE_ORIENTED
ref: None
     Pain 0.989
     Pleasure & Addiction 0.988
THINKING_STYLE
ref: None
     Pain 0.998
     Pleasure & Addiction 1.015
AGGRESSIVE
ref: None
     Pain 0.971
     Pleasure & Addiction 0.968
ORGANIZED
ref: None
     Pain 0.992
     Pleasure & Addiction 0.983
SELF_ASSURED
ref: None
     Pain 1.006
     Pleasure & Addiction 1.018
RELIG

### BIC 45

In [78]:
BIC_features = ['netspeak_focus', 'money_oriented', 'workhorse', 'active', 'power_driven',
                'health_oriented', 'adjustment', 'reward_bias', 'cheerful', 'leisure_oriented',
                'thinking_style', 'aggressive', 'organized', 'self_assured', 'religion_oriented',
                'cautious', 'impulsive', 'disciplined', 'type_a', 'liberal',
                'melancholy', 'neuroticism', 'sexual_focus', 'food_focus', 'cold',
                'friend_focus', 'agreeableness', 'generous', 'genuine', 'self_conscious',
                'emotionally_aware', 'artistic', 'ambitious', 'adventurous', 'anxious',
                'stressed', 'work_oriented', 'depression', 'family_oriented', 'insecure',
                'happiness', 'conscientiousness', 'dutiful', 'persuasive', 'cooperative']

X_train = train[BIC_features]
X_test = test[BIC_features]

In [79]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5625715146040349.
ovo_lr: Test f1 micro is 0.5114285714285715
ovo_svc: Cross validation f1 micro is 0.6177807889190003.
ovo_svc: Test f1 micro is 0.5914285714285714
ovo_rf: Cross validation f1 micro is 0.6744203553146642.
ovo_rf: Test f1 micro is 0.6685714285714286
ovo_nb: Cross validation f1 micro is 0.6090785907859079.
ovo_nb: Test f1 micro is 0.6285714285714286
ovr_lr: Cross validation f1 micro is 0.5749322493224932.
ovr_lr: Test f1 micro is 0.5514285714285714
ovr_svc: Cross validation f1 micro is 0.6177356218006624.
ovr_svc: Test f1 micro is 0.6085714285714285
ovr_rf: Cross validation f1 micro is 0.6744203553146643.
ovr_rf: Test f1 micro is 0.6628571428571428
ovr_nb: Cross validation f1 micro is 0.6238331827762723.
ovr_nb: Test f1 micro is 0.6228571428571429
xgb: Cross validation f1 micro is 0.6707015959048478.
xgb: Test f1 micro is 0.68


In [80]:
mod = xgb
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.39      0.18      0.25        50
           1       0.46      0.30      0.37        69
           2       0.74      0.90      0.81       231

    accuracy                           0.68       350
   macro avg       0.53      0.46      0.47       350
weighted avg       0.63      0.68      0.64       350

micro avg                              0.68


### BIC 50

In [83]:
BIC_features = ['netspeak_focus', 'money_oriented', 'workhorse', 'active', 'power_driven',
                'health_oriented', 'adjustment', 'reward_bias', 'cheerful', 'leisure_oriented',
                'thinking_style', 'aggressive', 'organized', 'self_assured', 'religion_oriented',
                'cautious', 'impulsive', 'disciplined', 'type_a', 'liberal',
                'melancholy', 'neuroticism', 'sexual_focus', 'food_focus', 'cold',
                'friend_focus', 'agreeableness', 'generous', 'genuine', 'self_conscious',
                'emotionally_aware', 'artistic', 'ambitious', 'adventurous', 'anxious',
                'stressed', 'work_oriented', 'depression', 'family_oriented', 'insecure',
                'happiness', 'conscientiousness', 'dutiful', 'persuasive', 'cooperative',
                'intellectual', 'humble', 'assertive', 'body_focus', 'friendly']

X_train = train[BIC_features]
X_test = test[BIC_features]

In [84]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5491117133393556.
ovo_lr: Test f1 micro is 0.5057142857142857
ovo_svc: Cross validation f1 micro is 0.6165161096055405.
ovo_svc: Test f1 micro is 0.5742857142857143
ovo_rf: Cross validation f1 micro is 0.679373682625715.
ovo_rf: Test f1 micro is 0.6828571428571428
ovo_nb: Cross validation f1 micro is 0.6078891900030111.
ovo_nb: Test f1 micro is 0.62
ovr_lr: Cross validation f1 micro is 0.5786208973200843.
ovr_lr: Test f1 micro is 0.5457142857142857
ovr_svc: Cross validation f1 micro is 0.6153267088226437.
ovr_svc: Test f1 micro is 0.6
ovr_rf: Cross validation f1 micro is 0.6793435712134899.
ovr_rf: Test f1 micro is 0.6742857142857143
ovr_nb: Cross validation f1 micro is 0.6189099668774466.
ovr_nb: Test f1 micro is 0.62
xgb: Cross validation f1 micro is 0.6730954531767539.
xgb: Test f1 micro is 0.6742857142857143


In [85]:
mod = onevsone_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.53      0.16      0.25        50
           1       0.49      0.25      0.33        69
           2       0.71      0.93      0.81       231

    accuracy                           0.68       350
   macro avg       0.58      0.44      0.46       350
weighted avg       0.64      0.68      0.63       350

micro avg                              0.68


### Model with Backward Feaure Selection using all 29 AIC features

In [86]:
AIC_features = ['netspeak_focus', 'liberal', 'self_assured', 'organized', 'type_a',
                'cheerful', 'disciplined', 'neuroticism', 'aggressive', 'adjustment',
                'religion_oriented', 'food_focus', 'generous', 'sexual_focus', 'thinking_style',
                'agreeableness', 'money_oriented', 'melancholy', 'workhorse', 'reward_bias',
                'self_conscious', 'leisure_oriented', 'impulsive', 'cautious', 'active',
                'power_driven', 'genuine', 'cold', 'health_oriented']

X_train = train[AIC_features]
X_test = test[AIC_features]

In [87]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5761065943992774.
ovo_lr: Test f1 micro is 0.5542857142857143
ovo_svc: Cross validation f1 micro is 0.6177356218006624.
ovo_svc: Test f1 micro is 0.5714285714285714
ovo_rf: Cross validation f1 micro is 0.682987052092743.
ovo_rf: Test f1 micro is 0.6742857142857143
ovo_nb: Cross validation f1 micro is 0.602905751279735.
ovo_nb: Test f1 micro is 0.6228571428571429
ovr_lr: Cross validation f1 micro is 0.5957994579945799.
ovr_lr: Test f1 micro is 0.58
ovr_svc: Cross validation f1 micro is 0.6275971093044264.
ovr_svc: Test f1 micro is 0.5942857142857143
ovr_rf: Cross validation f1 micro is 0.6743902439024391.
ovr_rf: Test f1 micro is 0.68
ovr_nb: Cross validation f1 micro is 0.6446552243300211.
ovr_nb: Test f1 micro is 0.6257142857142857
xgb: Cross validation f1 micro is 0.669482083709726.
xgb: Test f1 micro is 0.6542857142857142


In [88]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.45      0.10      0.16        50
           1       0.47      0.26      0.34        69
           2       0.71      0.93      0.81       231

    accuracy                           0.68       350
   macro avg       0.55      0.43      0.44       350
weighted avg       0.63      0.68      0.62       350

micro avg                              0.68


### AIC 30

In [89]:
AIC_features = ['netspeak_focus', 'liberal', 'self_assured', 'organized', 'type_a',
                'cheerful', 'disciplined', 'neuroticism', 'aggressive', 'adjustment',
                'religion_oriented', 'food_focus', 'generous', 'sexual_focus', 'thinking_style',
                'agreeableness', 'money_oriented', 'melancholy', 'workhorse', 'reward_bias',
                'self_conscious', 'leisure_oriented', 'impulsive', 'cautious', 'active',
                'power_driven', 'genuine', 'cold', 'health_oriented', 'friend_focus']

X_train = train[AIC_features]
X_test = test[AIC_features]

In [90]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5748870822041554.
ovo_lr: Test f1 micro is 0.5428571428571428
ovo_svc: Cross validation f1 micro is 0.6140770852152967.
ovo_svc: Test f1 micro is 0.58
ovo_rf: Cross validation f1 micro is 0.6879253236976813.
ovo_rf: Test f1 micro is 0.6828571428571428
ovo_nb: Cross validation f1 micro is 0.6090484793736827.
ovo_nb: Test f1 micro is 0.6257142857142857
ovr_lr: Cross validation f1 micro is 0.5896567299006323.
ovr_lr: Test f1 micro is 0.5828571428571429
ovr_svc: Cross validation f1 micro is 0.6349744052996085.
ovr_svc: Test f1 micro is 0.6057142857142858
ovr_rf: Cross validation f1 micro is 0.6854862993074375.
ovr_rf: Test f1 micro is 0.6685714285714286
ovr_nb: Cross validation f1 micro is 0.642186088527552.
ovr_nb: Test f1 micro is 0.6257142857142857
xgb: Cross validation f1 micro is 0.6682625715146041.
xgb: Test f1 micro is 0.6542857142857142


In [91]:
mod = onevsone_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.38      0.10      0.16        50
           1       0.49      0.25      0.33        69
           2       0.72      0.94      0.81       231

    accuracy                           0.68       350
   macro avg       0.53      0.43      0.43       350
weighted avg       0.62      0.68      0.62       350

micro avg                              0.68


### AIC 35

In [92]:
AIC_features = ['netspeak_focus', 'liberal', 'self_assured', 'organized', 'type_a',
                'cheerful', 'disciplined', 'neuroticism', 'aggressive', 'adjustment',
                'religion_oriented', 'food_focus', 'generous', 'sexual_focus', 'thinking_style',
                'agreeableness', 'money_oriented', 'melancholy', 'workhorse', 'reward_bias',
                'self_conscious', 'leisure_oriented', 'impulsive', 'cautious', 'active',
                'power_driven', 'genuine', 'cold', 'health_oriented', 'friend_focus',
                'emotionally_aware', 'artistic', 'ambitious', 'adventurous', 'anxious']

X_train = train[AIC_features]
X_test = test[AIC_features]

In [93]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5845679012345679.
ovo_lr: Test f1 micro is 0.54
ovo_svc: Cross validation f1 micro is 0.6152815417043059.
ovo_svc: Test f1 micro is 0.5828571428571429
ovo_rf: Cross validation f1 micro is 0.6744354110207768.
ovo_rf: Test f1 micro is 0.66
ovo_nb: Cross validation f1 micro is 0.6017163504968382.
ovo_nb: Test f1 micro is 0.6085714285714285
ovr_lr: Cross validation f1 micro is 0.5957392351701295.
ovr_lr: Test f1 micro is 0.56
ovr_svc: Cross validation f1 micro is 0.6373983739837399.
ovr_svc: Test f1 micro is 0.6
ovr_rf: Cross validation f1 micro is 0.6732008431195422.
ovr_rf: Test f1 micro is 0.6771428571428572
ovr_nb: Cross validation f1 micro is 0.6299457994579947.
ovr_nb: Test f1 micro is 0.62
xgb: Cross validation f1 micro is 0.6682174043962661.
xgb: Test f1 micro is 0.6685714285714286


In [94]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.36      0.08      0.13        50
           1       0.48      0.20      0.29        69
           2       0.71      0.95      0.81       231

    accuracy                           0.68       350
   macro avg       0.52      0.41      0.41       350
weighted avg       0.61      0.68      0.61       350

micro avg                              0.68


#### Odds Ratio for AIC 35

In [None]:
# Coefficients from R
#          cheerful  disciplined neuroticism  aggressive  adjustment
#    1  0.006926225  0.009932597 -0.04574989 -0.02293918 -0.06273459
#    2 -0.006857462 -0.004668864 -0.04206451 -0.03191554 -0.01608389
#      religion_oriented  food_focus     generous sexual_focus thinking_style
#    1      -0.004750237 0.005284733 -0.005943655   0.01673024    -0.00374206
#    2       0.006412098 0.007953333  0.019830291   0.01599192     0.01466021
#      agreeableness money_oriented melancholy    workhorse  reward_bias
#    1   0.001250636     0.01451598 0.02516431 -0.007254736 -0.007573091
#    2  -0.021897528     0.00103292 0.02203398 -0.031393984  0.007016677
#      self_conscious leisure_oriented   impulsive    cautious       active
#    1     0.01828672     -0.009135593 0.028912001 0.013718917 -0.014244353
#    2     0.01965547     -0.010970023 0.002486877 0.001679266  0.003941147
#      power_driven    genuine        cold health_oriented friend_focus
#    1  -0.02798047 0.01338628 0.017041520     0.015940930  0.004454851
#    2  -0.01633463 0.02384384 0.009642887     0.008663801  0.009402999
#      emotionally_aware    artistic  ambitious  adventurous      anxious
#    1      -0.011733478 0.009289661 0.01134012 -0.005573153  0.002557516
#    2      -0.007777114 0.012479756 0.01476101 -0.014021436 -0.007567554

In [100]:
coefficients_pain_vs_none = [0.006926225, 0.009932597, -0.04574989, -0.02293918, -0.06273459,
                            -0.004750237, 0.005284733, -0.005943655, 0.01673024, -0.00374206,
                            0.001250636, 0.01451598, 0.02516431, -0.007254736, -0.007573091,
                            0.01828672, -0.009135593, 0.028912001, 0.013718917, -0.014244353,
                            -0.02798047, 0.01338628, 0.017041520, 0.015940930, 0.004454851,
                            -0.011733478, 0.009289661, 0.01134012, -0.005573153, 0.002557516]

coefficients_PleasureAddiction_vs_none = [-0.006857462, -0.004668864, -0.04206451, -0.03191554, -0.01608389,
                                         0.006412098, 0.007953333, 0.019830291, 0.01599192, 0.01466021,
                                         -0.021897528, 0.00103292, 0.02203398, -0.031393984, 0.007016677,
                                         0.01965547, -0.010970023, 0.002486877, 0.001679266, 0.003941147,
                                         -0.01633463, 0.02384384, 0.009642887, 0.008663801, 0.009402999,
                                         -0.007777114, 0.012479756, 0.01476101, -0.014021436, -0.007567554]

Feature_nm = ['cheerful', 'disciplined', 'neuroticism',  'aggressive',  'adjustment',
             'religion_oriented', 'food_focus', 'generous', 'sexual_focus', 'thinking_style',
             'agreeableness', 'money_oriented', 'melancholy', 'workhorse', 'reward_bias',
             'self_conscious', 'leisure_oriented', 'impulsive', 'cautious', 'active',
             'power_driven', 'genuine', 'cold', 'health_oriented', 'friend_focus',
             'emotionally_aware', 'artistic', 'ambitious', 'adventurous', 'anxious']

In [127]:
print('Odds Ratio')
for coef_pain, coef_pleasreaddiction, feature_name in zip(coefficients_pain_vs_none,
                                                          coefficients_PleasureAddiction_vs_none,
                                                          Feature_nm):
    print(feature_name.upper())
    print('ref: None')
    print('     Pain {}'.format(round(math.exp(coef_pain), 3)))
    print('     Pleasure & Addiction {}'.format(round(math.exp(coef_pleasreaddiction), 3)))

Odds Ratio
CHEERFUL
ref: None
     Pain 1.007
     Pleasure & Addiction 0.993
DISCIPLINED
ref: None
     Pain 1.01
     Pleasure & Addiction 0.995
NEUROTICISM
ref: None
     Pain 0.955
     Pleasure & Addiction 0.959
AGGRESSIVE
ref: None
     Pain 0.977
     Pleasure & Addiction 0.969
ADJUSTMENT
ref: None
     Pain 0.939
     Pleasure & Addiction 0.984
RELIGION_ORIENTED
ref: None
     Pain 0.995
     Pleasure & Addiction 1.006
FOOD_FOCUS
ref: None
     Pain 1.005
     Pleasure & Addiction 1.008
GENEROUS
ref: None
     Pain 0.994
     Pleasure & Addiction 1.02
SEXUAL_FOCUS
ref: None
     Pain 1.017
     Pleasure & Addiction 1.016
THINKING_STYLE
ref: None
     Pain 0.996
     Pleasure & Addiction 1.015
AGREEABLENESS
ref: None
     Pain 1.001
     Pleasure & Addiction 0.978
MONEY_ORIENTED
ref: None
     Pain 1.015
     Pleasure & Addiction 1.001
MELANCHOLY
ref: None
     Pain 1.025
     Pleasure & Addiction 1.022
WORKHORSE
ref: None
     Pain 0.993
     Pleasure & Addiction 0.969
REWARD_B

### AIC 40

In [95]:
AIC_features = ['netspeak_focus', 'liberal', 'self_assured', 'organized', 'type_a',
                'cheerful', 'disciplined', 'neuroticism', 'aggressive', 'adjustment',
                'religion_oriented', 'food_focus', 'generous', 'sexual_focus', 'thinking_style',
                'agreeableness', 'money_oriented', 'melancholy', 'workhorse', 'reward_bias',
                'self_conscious', 'leisure_oriented', 'impulsive', 'cautious', 'active',
                'power_driven', 'genuine', 'cold', 'health_oriented', 'friend_focus',
                'emotionally_aware', 'artistic', 'ambitious', 'adventurous', 'anxious',
                'stressed', 'work_oriented', 'depression', 'family_oriented', 'insecure']

X_train = train[AIC_features]
X_test = test[AIC_features]

In [96]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5810147545919904.
ovo_lr: Test f1 micro is 0.52
ovo_svc: Cross validation f1 micro is 0.6128876844323999.
ovo_svc: Test f1 micro is 0.5914285714285714
ovo_rf: Cross validation f1 micro is 0.6855164107196627.
ovo_rf: Test f1 micro is 0.6742857142857143
ovo_nb: Cross validation f1 micro is 0.578319783197832.
ovo_nb: Test f1 micro is 0.62
ovr_lr: Cross validation f1 micro is 0.5933303221921108.
ovr_lr: Test f1 micro is 0.5714285714285714
ovr_svc: Cross validation f1 micro is 0.62387834989461.
ovr_svc: Test f1 micro is 0.6171428571428571
ovr_rf: Cross validation f1 micro is 0.6719361638060825.
ovr_rf: Test f1 micro is 0.68
ovr_nb: Cross validation f1 micro is 0.6311803673592292.
ovr_nb: Test f1 micro is 0.6228571428571429
xgb: Cross validation f1 micro is 0.6755947003914483.
xgb: Test f1 micro is 0.6628571428571428


In [97]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.50      0.10      0.17        50
           1       0.53      0.23      0.32        69
           2       0.70      0.94      0.80       231

    accuracy                           0.68       350
   macro avg       0.58      0.42      0.43       350
weighted avg       0.64      0.68      0.62       350

micro avg                              0.68


### AIC 45

In [98]:
AIC_features = ['netspeak_focus', 'liberal', 'self_assured', 'organized', 'type_a',
                'cheerful', 'disciplined', 'neuroticism', 'aggressive', 'adjustment',
                'religion_oriented', 'food_focus', 'generous', 'sexual_focus', 'thinking_style',
                'agreeableness', 'money_oriented', 'melancholy', 'workhorse', 'reward_bias',
                'self_conscious', 'leisure_oriented', 'impulsive', 'cautious', 'active',
                'power_driven', 'genuine', 'cold', 'health_oriented', 'friend_focus',
                'emotionally_aware', 'artistic', 'ambitious', 'adventurous', 'anxious',
                'stressed', 'work_oriented', 'depression', 'family_oriented', 'insecure',
                'happiness', 'conscientiousness', 'dutiful', 'persuasive', 'cooperative']

X_train = train[AIC_features]
X_test = test[AIC_features]

In [99]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5625715146040349.
ovo_lr: Test f1 micro is 0.5114285714285715
ovo_svc: Cross validation f1 micro is 0.6177807889190003.
ovo_svc: Test f1 micro is 0.5914285714285714
ovo_rf: Cross validation f1 micro is 0.6768443239987955.
ovo_rf: Test f1 micro is 0.6714285714285714
ovo_nb: Cross validation f1 micro is 0.6090785907859079.
ovo_nb: Test f1 micro is 0.6285714285714286
ovr_lr: Cross validation f1 micro is 0.5749322493224932.
ovr_lr: Test f1 micro is 0.5514285714285714
ovr_svc: Cross validation f1 micro is 0.6177356218006624.
ovr_svc: Test f1 micro is 0.6085714285714285
ovr_rf: Cross validation f1 micro is 0.6744052996085516.
ovr_rf: Test f1 micro is 0.6685714285714286
ovr_nb: Cross validation f1 micro is 0.6238331827762723.
ovr_nb: Test f1 micro is 0.6228571428571429
xgb: Cross validation f1 micro is 0.6669978922011441.
xgb: Test f1 micro is 0.6828571428571428


In [100]:
mod = xgb
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.39      0.18      0.25        50
           1       0.47      0.32      0.38        69
           2       0.74      0.90      0.81       231

    accuracy                           0.68       350
   macro avg       0.53      0.47      0.48       350
weighted avg       0.64      0.68      0.65       350

micro avg                              0.68


### AIC 50

In [101]:
AIC_features = ['netspeak_focus', 'liberal', 'self_assured', 'organized', 'type_a',
                'cheerful', 'disciplined', 'neuroticism', 'aggressive', 'adjustment',
                'religion_oriented', 'food_focus', 'generous', 'sexual_focus', 'thinking_style',
                'agreeableness', 'money_oriented', 'melancholy', 'workhorse', 'reward_bias',
                'self_conscious', 'leisure_oriented', 'impulsive', 'cautious', 'active',
                'power_driven', 'genuine', 'cold', 'health_oriented', 'friend_focus',
                'emotionally_aware', 'artistic', 'ambitious', 'adventurous', 'anxious',
                'stressed', 'work_oriented', 'depression', 'family_oriented', 'insecure',
                'happiness', 'conscientiousness', 'dutiful', 'persuasive', 'cooperative',
                'intellectual', 'humble', 'assertive', 'body_focus', 'friendly']

X_train = train[AIC_features]
X_test = test[AIC_features]

In [102]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5491117133393556.
ovo_lr: Test f1 micro is 0.5057142857142857
ovo_svc: Cross validation f1 micro is 0.6165161096055405.
ovo_svc: Test f1 micro is 0.5742857142857143
ovo_rf: Cross validation f1 micro is 0.6830322192110809.
ovo_rf: Test f1 micro is 0.68
ovo_nb: Cross validation f1 micro is 0.6078891900030111.
ovo_nb: Test f1 micro is 0.62
ovr_lr: Cross validation f1 micro is 0.5786208973200843.
ovr_lr: Test f1 micro is 0.5457142857142857
ovr_svc: Cross validation f1 micro is 0.6153267088226437.
ovr_svc: Test f1 micro is 0.6
ovr_rf: Cross validation f1 micro is 0.6781240590183679.
ovr_rf: Test f1 micro is 0.6685714285714286
ovr_nb: Cross validation f1 micro is 0.6189099668774466.
ovr_nb: Test f1 micro is 0.62
xgb: Cross validation f1 micro is 0.6743149653718759.
xgb: Test f1 micro is 0.6742857142857143


In [103]:
mod = onevsone_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.50      0.16      0.24        50
           1       0.45      0.22      0.29        69
           2       0.71      0.93      0.81       231

    accuracy                           0.68       350
   macro avg       0.56      0.44      0.45       350
weighted avg       0.63      0.68      0.63       350

micro avg                              0.68


### F Test at p <= 0.01 Cutoff

In [104]:
features = ['netspeak_focus', 'cheerful', 'cooperative', 'aggressive', 'religion_oriented',
            'depression', 'ambitious', 'self_conscious', 'active', 'power_driven',
            'genuine', 'anxious', 'health_oriented', 'work_oriented']

X_train = train[features]
X_test = test[features]

In [105]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5858325805480277.
ovo_lr: Test f1 micro is 0.5371428571428571
ovo_svc: Cross validation f1 micro is 0.579735019572418.
ovo_svc: Test f1 micro is 0.5228571428571429
ovo_rf: Cross validation f1 micro is 0.6842667871123156.
ovo_rf: Test f1 micro is 0.6542857142857142
ovo_nb: Cross validation f1 micro is 0.6214694369165914.
ovo_nb: Test f1 micro is 0.6314285714285715
ovr_lr: Cross validation f1 micro is 0.5895061728395061.
ovr_lr: Test f1 micro is 0.5457142857142857
ovr_svc: Cross validation f1 micro is 0.5931797651309847.
ovr_svc: Test f1 micro is 0.58
ovr_rf: Cross validation f1 micro is 0.6768894911171335.
ovr_rf: Test f1 micro is 0.6685714285714286
ovr_nb: Cross validation f1 micro is 0.6386329418849744.
ovr_nb: Test f1 micro is 0.64
xgb: Cross validation f1 micro is 0.6596055404998495.
xgb: Test f1 micro is 0.6428571428571429


In [106]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.57      0.08      0.14        50
           1       0.41      0.16      0.23        69
           2       0.69      0.95      0.80       231

    accuracy                           0.67       350
   macro avg       0.56      0.40      0.39       350
weighted avg       0.62      0.67      0.59       350

micro avg                              0.67


In [107]:
# training set performance
train_pred = modfit.predict(X_train)
print(classification_report(y_train, train_pred))
print('micro avg                              {}'.format(round(f1_score(y_train, train_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       116
           1       1.00      1.00      1.00       160
           2       1.00      1.00      1.00       538

    accuracy                           1.00       814
   macro avg       1.00      1.00      1.00       814
weighted avg       1.00      1.00      1.00       814

micro avg                              1.0


### F Test at p <= 0.001 Cutoff

In [108]:
features = ['netspeak_focus', 'active', 'power_driven', 'health_oriented']

X_train = train[features]
X_test = test[features]

In [109]:
for mod, modname in zip([onevsone_lr, onevsone_svc, onevsone_rf, onevsone_nb, onevsrest_lr, onevsrest_svc, onevsrest_rf,
                         onevsrest_nb, xgb],
                        ['ovo_lr', 'ovo_svc', 'ovo_rf', 'ovo_nb', 'ovr_lr', 'ovr_svc', 'ovr_rf', 'ovr_nb', 'xgb']):
    
    score = cross_val_score(mod, X_train, y_train, cv=10, scoring='f1_micro')
    print('{}: Cross validation f1 micro is {}.'.format(modname, score.mean()))
    
    modfit = mod.fit(X_train, y_train)
    test_pred = modfit.predict(X_test)
    print('{}: Test f1 micro is {}'.format(modname, f1_score(y_test, test_pred, average='micro')))

ovo_lr: Cross validation f1 micro is 0.5526046371574828.
ovo_lr: Test f1 micro is 0.49714285714285716
ovo_svc: Cross validation f1 micro is 0.5452423968684131.
ovo_svc: Test f1 micro is 0.46285714285714286
ovo_rf: Cross validation f1 micro is 0.6535230352303524.
ovo_rf: Test f1 micro is 0.6257142857142857
ovo_nb: Cross validation f1 micro is 0.6607648298705209.
ovo_nb: Test f1 micro is 0.6314285714285715
ovr_lr: Cross validation f1 micro is 0.5710930442637759.
ovr_lr: Test f1 micro is 0.5228571428571429
ovr_svc: Cross validation f1 micro is 0.563745859680819.
ovr_svc: Test f1 micro is 0.5057142857142857
ovr_rf: Cross validation f1 micro is 0.6425624811803674.
ovr_rf: Test f1 micro is 0.62
ovr_nb: Cross validation f1 micro is 0.6607798855766336.
ovr_nb: Test f1 micro is 0.6085714285714285
xgb: Cross validation f1 micro is 0.6645137006925625.
xgb: Test f1 micro is 0.62


In [83]:
mod = onevsrest_rf
modfit = mod.fit(X_train, y_train)
test_pred = modfit.predict(X_test)
print(classification_report(y_test, test_pred))
print('micro avg                              {}'.format(round(f1_score(y_test, test_pred, average='micro'),2)))

              precision    recall  f1-score   support

           0       0.64      0.71      0.67       231
           1       0.67      0.66      0.67       231
           2       0.63      0.56      0.59       231

    accuracy                           0.65       693
   macro avg       0.64      0.65      0.64       693
weighted avg       0.64      0.65      0.64       693

micro avg                              0.65
