In [1]:
import numpy as np

from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import cross_val_predict

from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

import pickle

from cv2 import imread, IMREAD_GRAYSCALE
import numpy as np

In [2]:
with open('../data_processed/X.npy', 'rb') as f:
    X = np.load(f)
with open('../data_processed/y.npy', 'rb') as f:
    y = np.load(f)


In [3]:
random_state = 42

sgd_clf = SGDClassifier(random_state=random_state)
rf_clf = RandomForestClassifier(random_state=random_state)

In [4]:
y_sgd_pred = cross_val_predict(sgd_clf, X, y, cv=3)
y_rf_pred = cross_val_predict(rf_clf, X, y, cv=3)



# Getting Models Scores

In [5]:
def prediction_scores(y_test, y_pred):
  accuracy = accuracy_score(y_test, y_pred)
  precision = precision_score(y_test, y_pred)
  recall = recall_score(y_test, y_pred)
  f1 = f1_score(y_test, y_pred)
  
  
  print(f"Accuracy: {accuracy}")
  print(f"Precision: {precision}")
  print(f"Recall: {recall}")
  print(f"F1: {f1}")
  
  return {
    "accuracy_score": accuracy,
    "precision_score": precision,
    "recall_score": recall,
    "f1_score": f1
  }


In [6]:

print("SGD Scores:")
sgd_f1 = prediction_scores(y, y_sgd_pred)['f1_score']

SGD Scores:
Accuracy: 0.9886000206334468
Precision: 0.9485875360009804
Recall: 0.9694995929103777
F1: 0.9589295669949823


In [7]:
print("Random Forest Scores:")
rf_f1 = prediction_scores(y, y_rf_pred)['f1_score']


Random Forest Scores:
Accuracy: 0.9941366621960865
Precision: 0.9922705314009662
Recall: 0.9648024049602305
F1: 0.9783437063381176


In [8]:
best_model = "SGD Classifier" if sgd_f1 > rf_f1 else "Random Forest Classifier"

print(f"The best model is: {best_model}")

The best model is: Random Forest Classifier


# Fitting Models and Saving

In [9]:
sgd_clf.fit(X, y)
rf_clf.fit(X, y)

print("Models fitted!")

Models fitted!


In [10]:
def save_model(model, filename):
  with open(f"../models/{filename}.pkl", 'wb') as f:
    pickle.dump(model, f)

save_model(sgd_clf, "sgd_classifier")
save_model(rf_clf, "random_forest_classifier")

# Using Model to Classify an Image

In [11]:
def load_model(filename):
  with open(f"../models/{filename}.pkl", 'rb') as f:
    return pickle.load(f)
  
loaded_sgd_clf = load_model("sgd_classifier")
loaded_rf_clf = load_model("random_forest_classifier")

In [12]:
def convert_image(image_path):
  image = imread(image_path, IMREAD_GRAYSCALE)
  binary_matrix = (image > 0).astype(np.bool_)
  
  return binary_matrix.flatten()

In [13]:
def predict_using(model, image_path):
  return model.predict([convert_image(image_path)])[0]

print(f"Target image is i or I?\n{predict_using(loaded_sgd_clf, "../target/train_69_00000.png")}")

Target image is i or I?
True
