In [5]:
# load stuff we will need
import pandas as pd
import kinecture
import math


In [175]:
# When we make changes to modules, automatically reload the modules
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Load data and generate features
1. Load data, clean, make features.
2. Convert data to a form sklearn understands: X (observations), y (labels)

In [110]:
# load data, generate features
xls = pd.ExcelFile('data/allData-filtered.xlsx')
train_dataframe = xls.parse('filtered-noFormulas')
test09_dataframe = xls.parse('09test')
test10_dataframe = xls.parse('10test')
test11_dataframe = xls.parse('11test')

print("train instances:", len(train_dataframe))
print("test09 instances:", len(test09_dataframe))
print("test10 instances:", len(test10_dataframe))
print("test11 instances:", len(test11_dataframe))

train instances: 8906
test09 instances: 1108
test10 instances: 1068
test11 instances: 1149


In [223]:
# Generate features (determined from data_exploration)
train_features = kinecture.gen_features(train_dataframe)
test09_features = kinecture.gen_features(test09_dataframe)
test10_features = kinecture.gen_features(test10_dataframe)
test11_features = kinecture.gen_features(test11_dataframe)

test09_features['Truth'] = kinecture.remap_labels({"SI-FIX": "SI"}, 
                                                 test09_features['Truth'])
test10_features['Truth'] = kinecture.remap_labels({"SI-FIX": "SI"}, 
                                                 test10_features['Truth'])
test11_features['Truth'] = kinecture.remap_labels({"SI-FIX": "SI"}, 
                                                 test11_features['Truth'])

In [224]:
print(train_features.columns)
print("{} features".format(len(train_features.columns)))

Index(['duration', 'angleLeft', 'angleRight', 'confidenceLeft',
       'confidenceRight', 'loudnessLeft', 'loudnessRight', 'silenceLeft',
       'silenceRight', 'speakerX', 'speakerY', 'confLangle', 'confRangle',
       'pLeft', 'pRight', 'qLeft', 'qRight', 'loudnessAve', 'loudnessDiff',
       'pLeftZ', 'pRightZ', 'qLeftZ', 'qRightZ', 'logLoud', 'logLoudDiff',
       'logLoudZ', 'logLoudDiffZ', 'logLoudnessLeft', 'logLoudnessRight',
       'averageLogLoudness', 'silenceLeftAndRight', 'silenceLeftOrRight',
       'speakerXYNorm', 'speakerXIs0', 'sinAngleLeft', 'sinAngleRight',
       'Truth'],
      dtype='object')
37 features


In [225]:
# convert data into a form sklearn likes
train_sklearn = kinecture.convert_features_for_sklearn(train_features)
test09_sklearn = kinecture.convert_features_for_sklearn(test09_features)
test10_sklearn = kinecture.convert_features_for_sklearn(test10_features)
test11_sklearn = kinecture.convert_features_for_sklearn(test11_features)

testall_sklearn = test09_sklearn.append(test10_sklearn).append(test11_sklearn)

### Build a linear classifier
First I start by building a classifier using a simple linear decision boundary. 

I adjusted the importance of each class to be inversely proportional to the class frequency (`class_weight: auto`). This ensures that recall for all classes will be roughly equal, even though there are many more TA and SI instances than S instances. I could also set just the S class to be more important by setting `class_weights={0:3}`, this will make class 0 (S) 3 times more important than SI or TA.

Results for all test sessions are as follows:


             S   SI   TA
        S  669  232  147
       SI  300  624  195
       TA  139  397  622

             precision    recall  f1-score   support

          S       0.60      0.64      0.62      1048
         SI       0.50      0.56      0.53      1119
         TA       0.65      0.54      0.59      1158
      total       0.58      0.58      0.58      3325

We see an average recall of 58% which is not good, suggesting better features are needed.

In [226]:
from sklearn import svm
from sklearn.grid_search import GridSearchCV

# use grid search to find best hyperparameters
# Set the parameters by cross-validation
tuned_parameters = [{'C': [0.1, 1, 10]}]

# build LinearSVC classifier using training data
classifier = GridSearchCV(svm.LinearSVC(class_weight='auto'), tuned_parameters)
classifier.fit(train_sklearn.X, train_sklearn.y)

print("Best params for LinearSVC")
print(classifier.best_params_)
print()

train_predictions = classifier.predict(train_sklearn.X)
kinecture.report_accuracy(train_predictions, train_sklearn.y, header="results for train")

test09_predictions = classifier.predict(test09_sklearn.X)
kinecture.report_accuracy(test09_predictions, test09_sklearn.y, header="results for test09")

test10_predictions = classifier.predict(test10_sklearn.X)
kinecture.report_accuracy(test10_predictions, test10_sklearn.y, header="results for test10")

test11_predictions = classifier.predict(test11_sklearn.X)
kinecture.report_accuracy(test11_predictions, test11_sklearn.y, header="results for test11")

testall_predictions = classifier.predict(testall_sklearn.X)
kinecture.report_accuracy(testall_predictions, testall_sklearn.y, header="results for testall")



Best params for LinearSVC
{'C': 1}

results for train
             S   SI   TA
        S  627  457  362
       SI  392 1909  932
       TA  206  844 3177

             precision    recall  f1-score   support

          S       0.51      0.43      0.47      1446
         SI       0.59      0.59      0.59      3233
         TA       0.71      0.75      0.73      4227

avg / total       0.64      0.64      0.64      8906

results for test09
             S   SI   TA
        S  152   29   89
       SI   61  146  132
       TA   48  195  256

             precision    recall  f1-score   support

          S       0.58      0.56      0.57       270
         SI       0.39      0.43      0.41       339
         TA       0.54      0.51      0.52       499

avg / total       0.50      0.50      0.50      1108

results for test10
             S   SI   TA
        S  218   12    3
       SI  118  299   56
       TA   22  103  237

             precision    recall  f1-score   support

          S    

### Build a classifier with nonlinear kernels
Since we observed that, at least for pairs of variables, features were not linearly seperable, a more complex decision boundary may perform better.

Results for the test were as follows:

             S   SI   TA
        S  545  350  153
       SI  219  711  189
       TA  162  380  616

             precision    recall  f1-score   support

          S       0.59      0.52      0.55      1048
         SI       0.49      0.64      0.56      1119
         TA       0.64      0.53      0.58      1158

      total       0.58      0.56      0.56      3325
      
With the nonlinear classifier we see slightly lower recall, suggesting some overfitting to the training data.

Overall, my suggestion would be to focus on improving the features, since increasing the model complexity does not seem to be helping.

In [227]:
from sklearn import svm

# use grid search to find best hyperparameters
# Set the parameters by cross-validation
tuned_parameters = [{'C': [0.1, 0.5, 1], 'gamma': [10, 20, 30]}]

classifier = GridSearchCV(svm.SVC(class_weight='auto'), tuned_parameters)
classifier.fit(train_sklearn.X, train_sklearn.y)

print("Best params for SVC with rbf kernel")
print(classifier.best_params_)
print()

train_predictions = classifier.predict(train_sklearn.X)
kinecture.report_accuracy(train_predictions, train_sklearn.y, header="train")

test09_predictions = classifier.predict(test09_sklearn.X)
kinecture.report_accuracy(test09_predictions, test09_sklearn.y, header="test09")

test10_predictions = classifier.predict(test10_sklearn.X)
kinecture.report_accuracy(test10_predictions, test10_sklearn.y, header="test10")

test11_predictions = classifier.predict(test11_sklearn.X)
kinecture.report_accuracy(test11_predictions, test11_sklearn.y, header="test11")

testall_predictions = classifier.predict(testall_sklearn.X)
kinecture.report_accuracy(testall_predictions, testall_sklearn.y, header="testall")



Best params for LinearSVC
{'C': 0.1, 'gamma': 10}

train
             S   SI   TA
        S  676  460  310
       SI  450 1936  847
       TA  360  913 2954

             precision    recall  f1-score   support

          S       0.45      0.47      0.46      1446
         SI       0.59      0.60      0.59      3233
         TA       0.72      0.70      0.71      4227

avg / total       0.63      0.62      0.63      8906

test09
             S   SI   TA
        S  153   31   86
       SI   61  148  130
       TA   48  196  255

             precision    recall  f1-score   support

          S       0.58      0.57      0.58       270
         SI       0.39      0.44      0.41       339
         TA       0.54      0.51      0.53       499

avg / total       0.51      0.50      0.50      1108

test10
             S   SI   TA
        S  219   12    2
       SI  118  303   52
       TA   37   93  232

             precision    recall  f1-score   support

          S       0.59      0.94    