# Random Forest Classifier for Morris Louis Art

In [33]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
import sklearn
import pickle

## Load the Morris Louis metrics

In [34]:
data = pd.DataFrame.from_csv('../../data/data.csv')
data = data[['period','shannon_entropy','mean_color_r','luminance','contrast', 'contour']]
data.head()

Unnamed: 0,period,shannon_entropy,mean_color_r,luminance,contrast,contour
0,themes_variations_59_60,7.683989,111.635539,104.752119,0.913992,0.001029
1,themes_variations_59_60,7.577162,179.38099,147.490592,0.691603,5.1e-05
2,themes_variations_59_60,7.579554,162.274002,126.479898,0.834955,0.000693
3,themes_variations_59_60,7.387632,103.497807,113.420688,0.848749,5.4e-05
4,themes_variations_59_60,7.853511,154.675751,127.536419,0.904835,0.001158


In [35]:
# Split into train and test data
features = data[['shannon_entropy','mean_color_r','luminance','contrast', 'contour']]
target = data['period'].astype(str)
features_train, features_test, target_train, target_test = train_test_split(features, target, random_state=41)

In [36]:
features.columns.tolist()

['shannon_entropy', 'mean_color_r', 'luminance', 'contrast', 'contour']

## Create the Random Forest Classifier and train the model

In [37]:
# Create a random forest classifier
rf = RandomForestClassifier(n_estimators=200)
rf = rf.fit(features_train, target_train)
rf.score(features_train, target_train)

1.0

In [38]:
# Random Forests in sklearn will automatically calculate feature importance
importances = rf.feature_importances_
importances

array([0.18775708, 0.2312438 , 0.25766617, 0.15228853, 0.17104442])

In [39]:
# We can sort the features by their importance
sorted(zip(rf.feature_importances_, features.columns.tolist()), reverse=True)

[(0.25766617087076754, 'luminance'),
 (0.23124379682417862, 'mean_color_r'),
 (0.1877570826351552, 'shannon_entropy'),
 (0.17104442250653495, 'contour'),
 (0.15228852716336425, 'contrast')]

## Predict and save the model

In [40]:
predicted = rf.predict(features_test)

In [41]:
rf.score(features_test, target_test)

0.6204819277108434

In [42]:
import sklearn
sklearn.metrics.accuracy_score(target_test,predicted)

0.6204819277108434

In [43]:
actuals = target_test.tolist()
data_test = pd.DataFrame(data = {"actual": actuals, "predicted": predicted})
data_test["compare"] = data_test["actual"] == data_test["predicted"]
data_test

Unnamed: 0,actual,predicted,compare
0,early_paintings_34_53,stripe_paintings_61_62,False
1,abstract_expressionist_55_57,veil_paintings_58_59,False
2,themes_variations_59_60,stripe_paintings_61_62,False
3,veil_paintings_58_59,veil_paintings_58_59,True
4,stripe_paintings_61_62,unfurled_paintings_60_61,False
5,early_paintings_34_53,veil_paintings_58_59,False
6,stripe_paintings_61_62,stripe_paintings_61_62,True
7,unfurled_paintings_60_61,unfurled_paintings_60_61,True
8,unfurled_paintings_60_61,unfurled_paintings_60_61,True
9,themes_variations_59_60,themes_variations_59_60,True


In [44]:
# Save the model to a file
filename = "MorrisRandomForestModel.pkl"
pickle.dump(rf, open(filename, "wb"))

In [45]:
# load the model from disk
loaded_rf = pickle.load(open(filename, "rb"))

In [46]:
loaded_rf.score(features_test, target_test)

0.6204819277108434