# Shape Classifier

Use RandomForestClassifier to classify 4 different shapes: a square, star, circle, and triangle.  

## Import Libraries

In [1]:
from PIL import Image
import numpy as np
import os

## Import Dataset

In [2]:
import random

def import_dataset():
    shapes = ["circle", "square", "triangle", "star"]
    X = []
    y = []

    for shape in shapes:
        shape_files = os.listdir(f"shapes/{shape}")
        # random.shuffle(shape_files)

        for image in shape_files:
            X.append(np.asarray(Image.open(f"shapes/{shape}/{image}")))
            y.append(shape)

    return X, y

X, y = import_dataset()

## Encode Labels

In [3]:
X = [np.asarray(row).flatten() for row in X]

In [4]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y = le.fit_transform(y)

## Split Dataset

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/4, random_state=42)

## Train Model

In [6]:
from sklearn.ensemble import RandomForestClassifier

classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

RandomForestClassifier(random_state=42)

## Make Predictions

In [7]:
y_pred = classifier.predict(X_test)

In [8]:
pil_image = Image.open("shapes/test_circle.png").convert('1').resize((200, 200))
test_image = [ 255 if pixel else 0 for pixel in np.asarray(pil_image).flatten() ]

le.inverse_transform(classifier.predict([test_image]))

array(['circle'], dtype='<U8')

## Make Confusion Matrix

In [9]:
from sklearn.metrics import confusion_matrix, accuracy_score

print(confusion_matrix(y_test, y_pred))
print(accuracy_score(y_test, y_pred))

[[947   1   0   0]
 [  0 910   0   0]
 [  0   0 948   0]
 [  0   0   0 937]]
0.9997328346246327


## Save Model

In [10]:
from joblib import dump

dump(classifier, "model/shapes_classifier.joblib", compress=3)
dump(le, "model/shapes_label_encoder.joblib", compress=3)

['model/shapes_label_encoder.joblib']