In [1]:
from image_data_process import load_image_data

data, labels = load_image_data()

In [2]:
import numpy as np

shuffled_indices = np.random.permutation(len(data))
test_set_size = int(len(data) * 0.6)
test_indices = shuffled_indices[:test_set_size]
train_indices = shuffled_indices[test_set_size:]

X_train = data[train_indices]
y_train = labels[train_indices]

X_test = data[test_indices]
y_test = labels[test_indices]

In [3]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_prepared = scaler.fit_transform(X_train.astype(float))
X_train_prepared

array([[ 0.78806648,  0.87490206, -0.96712742, ...,  0.02893678,
         0.45976935, -0.56832841],
       [ 1.0256254 ,  0.44391721, -0.96712742, ...,  2.31019388,
        -1.29571362,  0.73252632],
       [-0.20176237, -1.20819139,  0.69562808, ...,  0.36075599,
         0.3134791 , -0.30815747],
       ..., 
       [ 0.80786306,  0.65940963, -0.13574967, ...,  0.15336898,
        -3.05119658,  3.98466314],
       [-1.07281177,  0.3720864 , -0.75928298, ..., -2.52192344,
         1.48380108, -0.95858483],
       [ 0.5109144 ,  1.01856368, -0.75928298, ...,  0.5266656 ,
        -0.12539164, -0.04798652]])

In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_predict

model = RandomForestClassifier()

y_train_pred = cross_val_predict(model, X_train_prepared, y_train, cv=3)
conf_mx = confusion_matrix(y_train, y_train_pred)
conf_mx

array([[337,   0,   2],
       [  1, 336,   3],
       [  0,   3, 340]])

In [5]:
from sklearn.metrics import precision_score, recall_score, f1_score

print(precision_score(y_train, y_train_pred, average=None))
print(recall_score(y_train, y_train_pred, average=None))
print(f1_score(y_train, y_train_pred, average=None))

[ 0.99704142  0.99115044  0.98550725]
[ 0.99410029  0.98823529  0.99125364]
[ 0.99556869  0.98969072  0.98837209]


In [6]:
model.fit(X_train_prepared, y_train)
y_test_pred = model.predict(scaler.transform(X_test.astype(float)))
confusion_matrix(y_test, y_test_pred)

array([[506,   0,   0],
       [  3, 530,   1],
       [  0,   2, 490]])

In [7]:
print('Precision:\t', precision_score(y_test, y_test_pred, average=None))
print('Recall:\t', recall_score(y_test, y_test_pred, average=None))
print('F1 score:\t', f1_score(y_test, y_test_pred, average=None))

Precision:	 [ 0.99410609  0.9962406   0.99796334]
Recall:	 [ 1.          0.99250936  0.99593496]
F1 score:	 [ 0.99704433  0.99437148  0.99694812]
