In [64]:
import pandas as pd
from efar import assessGeneralizability
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score, precision_score,recall_score, accuracy_score
from sklearn.metrics import confusion_matrix

## Loading dataset for modeling
First we will select features using domain knowledge and literature in the field of MMLA.
Features:
* mouth area 
* head pose
* basic emotion
* action units
* etherpad logs stats



In [41]:
X = pd.read_csv('X_proper_dataset_38_43_44_45.csv')
y = pd.read_csv('y_proper_dataset_38_43_44_45.csv')

In [46]:
ada = AdaBoostClassifier(n_estimators=150)
X_train,X_test,y_train,y_test = train_test_split(X,y['ARG_bi'])

In [47]:
ada.fit(X_train,y_train)

AdaBoostClassifier(n_estimators=150)

In [58]:
x = cohen_kappa_score(ada.predict(X_test),y_test)

In [59]:
x

0.46038454669920836

In [78]:
def tuneParameters(estimator,search_params,X,y):
    split = StratifiedShuffleSplit(n_splits=5).split(X_train,y_train)
    rcv = GridSearchCV(estimator,search_params,cv=split)
    rcv.fit(X,y)
    return rcv.best_estimator_,rcv.best_params_

def classificationMetrics(estimator,X,y):
    results = {}
    y_pred = estimator.predict(X)
    print(confusion_matrix(y,y_pred).ravel())
    
    tn, fp, fn, tp = confusion_matrix(y,y_pred).ravel()  
    
    results['precision'] = precision_score(y,y_pred)
    results['recall'] = recall_score(y,y_pred)
    results['cohen_kappa_score'] = cohen_kappa_score(y,y_pred)
    results['precision_neg'] = tn /(tn + fn)
    results['recall_neg'] = tn /(tn+fp) 
    results['accuracy'] = accuracy_score(y,y_pred)
    return results

def printClassificationMetrics(metrics):
    print('  Across folds performance')
    for metric in metrics.keys():
        print('  %s : %.2f (%.2f)' % (metric,np.mean(metrics[metric]),np.sd(metrics[metric])))

In [70]:
def assessGen(estimator,X,y,search_params,level,verbose,**kwargs):
    # to store metric score
    train_scores = {}
    test_scores={}
    
    fold = 1
    
    # metric to compute
    metrics = ['precision','recall','cohen_kappa_score','precision_neg','recall_neg','accuracy']
    
    # initialize with empty list
    for metric in metrics:
        train_scores[metric] = list()
        test_scores[metric] = list()
    
    if level == 'instance':
        splits = StratifiedShuffleSplit(n_splits=10).split(X,y)
    else:
        splits = LeaveOneGroupOut().split(X,y,groups=kwargs['groups'])
    
    for train,test in splits:
        X_train,y_train = X.iloc[train,:],y[train]
        X_test,y_test = X.iloc[test,:],y[test]

        print(X_train.shape,y_train.shape)
    
        estimator.fit(X_train,y_train)
    
        scores_1 = classificationMetrics(estimator,X_train,y_train)
        scores_2 = classificationMetrics(estimator,X_test,y_test)
        
        for metric in metrics:
            train_scores[metric].append(scores_1[metric])
            test_scores[metric].append(scores_2[metric])
            
            
        if verbose:
            print('  ------Upper Split No.%d -------' % (fold))
            print('  Train data: %s Test data: %s' % (len(train),len(test)))
            print('  Train kappa:', scores_1['kappa'],'recall_neg:',scores_1['recall_neg'])
            print('  Test kappa:', scores_2['kappa'],'recall_neg:',scores_2['recall_neg'])
            
        
        
        fold += 1
        
    
    printClassificationMetrics(train_scores)
    printClassificationMetrics(test_scores)
    
    return train_scores,test_scores
    
    

In [79]:
ada = AdaBoostClassifier(n_estimators=200)
assessGen(ada,X,y['ARG'],{},'instance',True)

[  3   0   0   0   0   0   7  18   4   0   0  16 250  34  44   0   4  51
  31  30   0   2  54  50  65]


ValueError: too many values to unpack (expected 4)