In [2]:
import numpy as np
import pandas as pd
import sklearn.metrics as metrics
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import KFold
from sklearn.metrics import recall_score
from sklearn.feature_selection import RFE
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
trainSet  = pd.read_csv('train.csv')
trainingEncoded = pd.get_dummies(trainSet)
x = trainingEncoded.drop(['hand'], axis=1)
y = trainingEncoded['hand']
xTrain, xVal, yTrain, yVal = train_test_split(x, 
                                              y,
                                              test_size=.1,
                                              random_state=12)

In [4]:
pdTraining = pd.DataFrame(data=yTrain, columns=['hand'])
print pdTraining.hand.value_counts()

0    11273
1     9511
2     1080
3      458
4       87
5       48
6       36
7        6
9        5
8        5
Name: hand, dtype: int64


In [5]:
pdTest = pd.DataFrame(data=yVal, columns=['hand'])
print pdTest.hand.value_counts()

0    1220
1    1088
2     126
3      55
5       6
4       6
Name: hand, dtype: int64


In [6]:
rf = RandomForestClassifier(n_estimators=30, random_state=12)
rf.fit(xTrain, yTrain)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=30, n_jobs=1,
            oob_score=False, random_state=12, verbose=0, warm_start=False)

In [7]:
predicted = cross_val_predict(rf, xVal, yVal, cv=5)

pdPredicted = pd.DataFrame(data=predicted, columns=['hand'])
print pdPredicted.hand.value_counts()

0    1489
1    1010
2       2
Name: hand, dtype: int64


In [8]:
print metrics.classification_report(yVal, predicted) 

             precision    recall  f1-score   support

          0       0.53      0.65      0.59      1220
          1       0.48      0.45      0.46      1088
          2       0.00      0.00      0.00       126
          3       0.00      0.00      0.00        55
          4       0.00      0.00      0.00         6
          5       0.00      0.00      0.00         6

avg / total       0.47      0.51      0.49      2501



  'precision', 'predicted', average, warn_for)


In [35]:
model = RandomForestClassifier(n_estimators=30, random_state=12)

In [36]:
rfe = RFE(model, 2)
rfe = rfe.fit(xTrain, yTrain)
# print rfe.support_
# print rfe.ranking_
rfePredicted = rfe.predict(xVal)

pdRfePredicted = pd.DataFrame(data=rfePredicted, columns=['hand'])
print pdRfePredicted.hand.value_counts()

print metrics.classification_report(yVal, pdRfePredicted)

0    2170
1     331
Name: hand, dtype: int64
             precision    recall  f1-score   support

          0       0.51      0.91      0.66      1220
          1       0.56      0.17      0.26      1088
          2       0.00      0.00      0.00       126
          3       0.00      0.00      0.00        55
          4       0.00      0.00      0.00         6
          5       0.00      0.00      0.00         6

avg / total       0.49      0.52      0.43      2501



In [37]:
rfe = RFE(model, 3)
rfe = rfe.fit(xTrain, yTrain)
# print rfe.support_
# print rfe.ranking_
rfePredicted = rfe.predict(xVal)

pdRfePredicted = pd.DataFrame(data=rfePredicted, columns=['hand'])
print pdRfePredicted.hand.value_counts()

print metrics.classification_report(yVal, pdRfePredicted)

0    1628
1     846
2      22
3       5
Name: hand, dtype: int64
             precision    recall  f1-score   support

          0       0.60      0.80      0.68      1220
          1       0.56      0.44      0.49      1088
          2       0.14      0.02      0.04       126
          3       1.00      0.09      0.17        55
          4       0.00      0.00      0.00         6
          5       0.00      0.00      0.00         6

avg / total       0.56      0.58      0.55      2501



In [38]:
rfe = RFE(model, 4)
rfe = rfe.fit(xTrain, yTrain)
# print rfe.support_
# print rfe.ranking_
rfePredicted = rfe.predict(xVal)

pdRfePredicted = pd.DataFrame(data=rfePredicted, columns=['hand'])
print pdRfePredicted.hand.value_counts()

print metrics.classification_report(yVal, pdRfePredicted)

0    1357
1    1047
2      64
3      25
4       5
5       2
6       1
Name: hand, dtype: int64
             precision    recall  f1-score   support

          0       0.69      0.77      0.73      1220
          1       0.60      0.58      0.59      1088
          2       0.27      0.13      0.18       126
          3       0.44      0.20      0.28        55
          4       0.00      0.00      0.00         6
          5       0.00      0.00      0.00         6
          6       0.00      0.00      0.00         0

avg / total       0.62      0.64      0.63      2501



In [39]:
rfe = RFE(model, 5)
rfe = rfe.fit(xTrain, yTrain)
# print rfe.support_
# print rfe.ranking_
rfePredicted = rfe.predict(xVal)

pdRfePredicted = pd.DataFrame(data=rfePredicted, columns=['hand'])
print pdRfePredicted.hand.value_counts()

print metrics.classification_report(yVal, pdRfePredicted)

0    1450
1    1008
2      33
3       7
4       3
Name: hand, dtype: int64
             precision    recall  f1-score   support

          0       0.70      0.83      0.76      1220
          1       0.65      0.61      0.63      1088
          2       0.45      0.12      0.19       126
          3       0.71      0.09      0.16        55
          4       0.00      0.00      0.00         6
          5       0.00      0.00      0.00         6

avg / total       0.66      0.68      0.66      2501



In [40]:
rfe = RFE(model, 6)
rfe = rfe.fit(xTrain, yTrain)
# print rfe.support_
# print rfe.ranking_
rfePredicted = rfe.predict(xVal)

pdRfePredicted = pd.DataFrame(data=rfePredicted, columns=['hand'])
print pdRfePredicted.hand.value_counts()

print metrics.classification_report(yVal, pdRfePredicted)

0    1441
1    1038
2      18
3       3
4       1
Name: hand, dtype: int64
             precision    recall  f1-score   support

          0       0.68      0.80      0.73      1220
          1       0.62      0.59      0.60      1088
          2       0.44      0.06      0.11       126
          3       1.00      0.05      0.10        55
          4       0.00      0.00      0.00         6
          5       0.00      0.00      0.00         6

avg / total       0.64      0.65      0.63      2501



In [41]:
rfe = RFE(model, 7)
rfe = rfe.fit(xTrain, yTrain)
# print rfe.support_
# print rfe.ranking_
rfePredicted = rfe.predict(xVal)

pdRfePredicted = pd.DataFrame(data=rfePredicted, columns=['hand'])
print pdRfePredicted.hand.value_counts()

print metrics.classification_report(yVal, pdRfePredicted)

0    1475
1    1018
2       6
3       2
Name: hand, dtype: int64
             precision    recall  f1-score   support

          0       0.65      0.78      0.71      1220
          1       0.59      0.55      0.57      1088
          2       0.50      0.02      0.05       126
          3       1.00      0.04      0.07        55
          4       0.00      0.00      0.00         6
          5       0.00      0.00      0.00         6

avg / total       0.62      0.62      0.60      2501



In [42]:
rfe = RFE(model, 8)
rfe = rfe.fit(xTrain, yTrain)
# print rfe.support_
# print rfe.ranking_
rfePredicted = rfe.predict(xVal)

pdRfePredicted = pd.DataFrame(data=rfePredicted, columns=['hand'])
print pdRfePredicted.hand.value_counts()

print metrics.classification_report(yVal, pdRfePredicted)

0    1539
1     958
3       2
2       2
Name: hand, dtype: int64
             precision    recall  f1-score   support

          0       0.60      0.75      0.67      1220
          1       0.54      0.48      0.51      1088
          2       0.50      0.01      0.02       126
          3       0.50      0.02      0.04        55
          4       0.00      0.00      0.00         6
          5       0.00      0.00      0.00         6

avg / total       0.56      0.57      0.55      2501



In [43]:
rfe = RFE(model, 9)
rfe = rfe.fit(xTrain, yTrain)
# print rfe.support_
# print rfe.ranking_
rfePredicted = rfe.predict(xVal)

pdRfePredicted = pd.DataFrame(data=rfePredicted, columns=['hand'])
print pdRfePredicted.hand.value_counts()

print metrics.classification_report(yVal, pdRfePredicted)

0    1534
1     962
2       4
3       1
Name: hand, dtype: int64
             precision    recall  f1-score   support

          0       0.62      0.78      0.69      1220
          1       0.57      0.50      0.53      1088
          2       0.50      0.02      0.03       126
          3       1.00      0.02      0.04        55
          4       0.00      0.00      0.00         6
          5       0.00      0.00      0.00         6

avg / total       0.60      0.60      0.57      2501



In [44]:
rfe = RFE(model, 10)
rfe = rfe.fit(xTrain, yTrain)
# print rfe.support_
# print rfe.ranking_
rfePredicted = rfe.predict(xVal)

pdRfePredicted = pd.DataFrame(data=rfePredicted, columns=['hand'])
print pdRfePredicted.hand.value_counts()

print metrics.classification_report(yVal, pdRfePredicted)

0    1544
1     951
2       5
3       1
Name: hand, dtype: int64
             precision    recall  f1-score   support

          0       0.60      0.75      0.67      1220
          1       0.55      0.48      0.52      1088
          2       0.60      0.02      0.05       126
          3       1.00      0.02      0.04        55
          4       0.00      0.00      0.00         6
          5       0.00      0.00      0.00         6

avg / total       0.58      0.58      0.55      2501

