# Setup

In [None]:
!pip install pydot

In [None]:
!pip install graphviz

In [None]:
import os
from skimage.measure import regionprops, label
from skimage.io import imread, imshow
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import export_graphviz
from six import StringIO
from IPython.display import Image
import seaborn as sns

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
def loadImages(path, subfolder):
    '''Put files into lists and return them as one list with all images
     in the folder'''
    image_files = sorted([os.path.join(path, subfolder, file)
                          for file in os.listdir(path + "/" + subfolder)
                          if file.endswith('.bmp')])

    return image_files

# Preparing the Data

In [None]:
image_path = 'Dataset Filters/BIN/'

fp_bin = loadImages(image_path, '0')
vp_bin = loadImages(image_path, '1')

In [None]:
df = pd.DataFrame([])

for i in range(len(fp_bin)):
  img_fp_bin = imread(fp_bin[i], cv2.COLOR_BGR2GRAY)

  label_pred, n_pred = label(img_fp_bin,  connectivity=2, return_num=True)
  regions_pred = regionprops(label_pred)

  for props in regions_pred:
    if (props.area>25 and props.area<900):

      area = props.area
      maior_eixo = props.major_axis_length
      menor_eixo = props.minor_axis_length
      excentricidade = props.eccentricity
      momento_hu = props.moments_hu

      df = df.append([[area, maior_eixo, menor_eixo, excentricidade, momento_hu[0], momento_hu[1], momento_hu[2], momento_hu[3], momento_hu[4], momento_hu[5], momento_hu[6], 0]], ignore_index=True)

for i in range(len(vp_bin)):
  img_vp_bin = imread(vp_bin[i], cv2.COLOR_BGR2GRAY)

  label_pred, n_pred = label(img_vp_bin,  connectivity=2, return_num=True)

  regions_pred = regionprops(label_pred)

  for props in regions_pred:
    if (props.area>25 and props.area<900):

      area = props.area
      maior_eixo = props.major_axis_length
      menor_eixo = props.minor_axis_length
      excentricidade = props.eccentricity
      momento_hu = props.moments_hu

      df = df.append([[area, maior_eixo, menor_eixo, excentricidade, momento_hu[0], momento_hu[1], momento_hu[2], momento_hu[3], momento_hu[4], momento_hu[5], momento_hu[6], 1]], ignore_index=True)

In [None]:
data = df.drop(columns=[11])

In [None]:
labels = df.drop(columns=[0,1,2,3,4,5,6,7,8,9,10])

In [None]:
X_treino, X_teste, y_treino, y_teste = train_test_split(data,labels,test_size=0.2,random_state=35)
y_treino = np.ravel(y_treino)
y_teste = np.ravel(y_teste)

# Random Forest

In [None]:
clf_rf = RandomForestClassifier(max_depth=10, random_state=42, max_leaf_nodes=100)
clf_rf.fit(X_treino,y_treino)

y_pred = clf_rf.predict(X_teste)
print("Accuracy:",metrics.accuracy_score(y_teste, y_pred))

In [None]:
feature_cols = ['area', 'major_eixo', 'minor_eixo', 'eccentricity', 'moment_hu_0', 'moment_hu_1', 'moment_hu_2', 'moment_hu_3', 'moment_hu_4', 'moment_hu_5', 'moment_hu_6']
feature_imp = pd.Series(clf_rf.feature_importances_,index=feature_cols).sort_values(ascending=False)

In [None]:
%matplotlib inline
sns.barplot(x=feature_imp, y=feature_imp.index, palette="Blues_d")
plt.xlabel('Feature Imposrtance Score')
plt.ylabel('Features')
plt.title("Visualizing Important Features")
plt.legend()
plt.tight_layout()
plt.savefig('rf-shape.tif', dpi=300)
plt.show()

In [None]:
import pickle

filename = 'model_shape_filter.sav'
pickle.dump(clf_rf, open(filename, 'wb'))