# Random Forest Classifier for Morris Louis Art

In [47]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
import sklearn
import pickle

## Load the Morris Louis metrics

In [85]:
data = pd.DataFrame.from_csv('../../data/data.csv')
data = data[['period','shannon_entropy','mean_color_r','luminance','contrast', 'contour']]
data.head()

Unnamed: 0,period,shannon_entropy,mean_color_r,luminance,contrast,contour
1,themes_variations_59_60,7.577162,179.38099,147.490592,0.691603,5.1e-05
2,themes_variations_59_60,7.579554,162.274002,126.479898,0.834955,0.000693
3,themes_variations_59_60,7.387632,103.497806,113.420688,0.848749,5.4e-05
4,themes_variations_59_60,7.853511,154.675751,127.536419,0.904835,0.001158
5,themes_variations_59_60,7.246964,95.234639,84.107295,0.995782,0.005167


In [86]:
# Split into train and test data
features = data[['shannon_entropy','mean_color_r','luminance','contrast', 'contour']]
target = data['period'].astype(str)
features_train, features_test, target_train, target_test = train_test_split(features, target, random_state=41)

In [87]:
features.columns.tolist()

['shannon_entropy', 'mean_color_r', 'luminance', 'contrast', 'contour']

## Create the Random Forest Classifier and train the model

In [88]:
# Create a random forest classifier
rf = RandomForestClassifier(n_estimators=200)
rf = rf.fit(features_train, target_train)
rf.score(features_train, target_train)

1.0

In [89]:
# Random Forests in sklearn will automatically calculate feature importance
importances = rf.feature_importances_
importances

array([0.19582898, 0.23009118, 0.27016906, 0.13569902, 0.16821177])

In [90]:
# We can sort the features by their importance
sorted(zip(rf.feature_importances_, features.columns.tolist()), reverse=True)

[(0.2701690577099464, 'luminance'),
 (0.23009117665021467, 'mean_color_r'),
 (0.19582897950770853, 'shannon_entropy'),
 (0.16821176689517178, 'contour'),
 (0.135699019236959, 'contrast')]

## Predict and save the model

In [91]:
predicted = rf.predict(features_test)

In [92]:
rf.score(features_test, target_test)

0.5515151515151515

In [56]:
import sklearn
sklearn.metrics.accuracy_score(target_test,predicted)

0.593939393939394

In [57]:
actuals = target_test.tolist()
data_test = pd.DataFrame(data = {"actual": actuals, "predicted": predicted})
data_test["compare"] = data_test["actual"] == data_test["predicted"]
data_test

Unnamed: 0,actual,predicted,compare
0,abstract_expressionist_55_57,early_paintings_34_53,False
1,unfurled_paintings_60_61,stripe_paintings_61_62,False
2,veil_paintings_54,stripe_paintings_61_62,False
3,veil_paintings_58_59,themes_variations_59_60,False
4,unfurled_paintings_60_61,unfurled_paintings_60_61,True
5,early_paintings_34_53,early_paintings_34_53,True
6,veil_paintings_54,themes_variations_59_60,False
7,veil_paintings_58_59,veil_paintings_58_59,True
8,veil_paintings_58_59,veil_paintings_58_59,True
9,stripe_paintings_61_62,stripe_paintings_61_62,True


In [58]:
# Save the model to a file
filename = "MorrisRandomForestModel.pkl"
pickle.dump(rf, open(filename, "wb"))

In [93]:
# load the model from disk
loaded_rf = pickle.load(open(filename, "rb"))

In [94]:
loaded_rf.score(features_test, target_test)

0.593939393939394