In [59]:
import numpy as np
import pandas as pd
import sklearn.metrics as metrics
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC

In [2]:
trainSet  = pd.read_csv('train.csv')
trainingEncoded = pd.get_dummies(trainSet)
x = trainingEncoded.drop(['hand'], axis=1)
y = trainingEncoded['hand']

xTrain, xVal, yTrain, yVal = train_test_split(x, 
                                              y,
                                              test_size=.1,
                                              random_state=12)

In [3]:
testY = pd.DataFrame(data=yVal, columns=['hand'])
print testY.hand.value_counts()

0    1220
1    1088
2     126
3      55
5       6
4       6
Name: hand, dtype: int64


In [39]:
clfRf = RandomForestClassifier(n_estimators=10, random_state=12)
clfRf.fit(xTrain, yTrain)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=12, verbose=0, warm_start=False)

In [40]:
rfPredicted = clfRf.predict(xVal)

In [41]:
print metrics.classification_report(yVal, rfPredicted)

             precision    recall  f1-score   support

          0       0.57      0.72      0.64      1220
          1       0.52      0.45      0.48      1088
          2       0.55      0.05      0.09       126
          3       1.00      0.05      0.10        55
          4       0.00      0.00      0.00         6
          5       0.00      0.00      0.00         6

avg / total       0.55      0.55      0.53      2501



In [12]:
mlpClf = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1, max_iter=2000)
mlpClf.fit(xTrain, yTrain)

MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(5, 2), learning_rate='constant',
       learning_rate_init=0.001, max_iter=2000, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
       solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False)

In [14]:
mlpPredicted = mlpClf.predict(xVal)

In [15]:
print metrics.classification_report(yVal, mlpPredicted)

             precision    recall  f1-score   support

          0       0.53      0.96      0.68      1220
          1       0.60      0.17      0.26      1088
          2       0.00      0.00      0.00       126
          3       0.00      0.00      0.00        55
          4       0.00      0.00      0.00         6
          5       0.00      0.00      0.00         6

avg / total       0.52      0.54      0.45      2501



In [25]:
svcClf = SVC()
svcClf.fit(xTrain,yTrain)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [26]:
svcPredicted = svcClf.predict(xVal)

In [28]:
print metrics.classification_report(yVal, svcPredicted)

             precision    recall  f1-score   support

          0       0.60      0.72      0.65      1220
          1       0.54      0.51      0.52      1088
          2       1.00      0.02      0.03       126
          3       0.00      0.00      0.00        55
          4       0.00      0.00      0.00         6
          5       0.00      0.00      0.00         6

avg / total       0.58      0.57      0.55      2501



In [32]:
abClf = AdaBoostClassifier()
abClf.fit(xTrain, yTrain)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=1.0, n_estimators=50, random_state=None)

In [33]:
abPredicted = abClf.predict(xVal)

In [35]:
print metrics.classification_report(yVal, abPredicted)

             precision    recall  f1-score   support

          0       0.49      0.98      0.65      1220
          1       0.00      0.00      0.00      1088
          2       0.00      0.00      0.00       126
          3       0.00      0.00      0.00        55
          4       0.00      0.00      0.00         6
          5       0.00      0.00      0.00         6
          9       0.00      0.00      0.00         0

avg / total       0.24      0.48      0.32      2501



  'recall', 'true', average, warn_for)


In [43]:
bgClf = BaggingClassifier()
bgClf.fit(xTrain, yTrain)

BaggingClassifier(base_estimator=None, bootstrap=True,
         bootstrap_features=False, max_features=1.0, max_samples=1.0,
         n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
         verbose=0, warm_start=False)

In [44]:
bgPredicted = bgClf.predict(xVal)

In [45]:
print metrics.classification_report(yVal, bgPredicted)

             precision    recall  f1-score   support

          0       0.60      0.75      0.66      1220
          1       0.54      0.48      0.51      1088
          2       0.25      0.02      0.04       126
          3       0.50      0.02      0.04        55
          4       0.00      0.00      0.00         6
          5       0.00      0.00      0.00         6

avg / total       0.55      0.57      0.55      2501



In [47]:
etClf = ExtraTreesClassifier()
etClf.fit(xTrain, yTrain)

ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
           oob_score=False, random_state=None, verbose=0, warm_start=False)

In [48]:
etPredicted = etClf.predict(xVal)

In [49]:
print metrics.classification_report(yVal, etPredicted)

             precision    recall  f1-score   support

          0       0.56      0.72      0.63      1220
          1       0.52      0.44      0.47      1088
          2       0.12      0.01      0.01       126
          3       0.50      0.02      0.04        55
          4       0.00      0.00      0.00         6
          5       1.00      0.17      0.29         6

avg / total       0.52      0.54      0.52      2501



In [58]:
gbcClf = GradientBoostingClassifier()
gbcClf.fit(xTrain, yTrain)

GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=100,
              presort='auto', random_state=None, subsample=1.0, verbose=0,
              warm_start=False)

In [60]:
gbcPredicted = gbcClf.predict(xVal)

In [62]:
print metrics.classification_report(yVal, gbcPredicted)

             precision    recall  f1-score   support

          0       0.61      0.84      0.71      1220
          1       0.59      0.45      0.51      1088
          2       0.00      0.00      0.00       126
          3       0.67      0.04      0.07        55
          4       0.00      0.00      0.00         6
          5       0.00      0.00      0.00         6

avg / total       0.57      0.60      0.57      2501

