In [1]:
# necessary libraries
from sklearn.linear_model import LogisticRegression # ml model
from sklearn.svm import SVC # ml model
from sklearn.tree import DecisionTreeClassifier # ml model
from sklearn.ensemble import RandomForestClassifier # ml model
from sklearn.neighbors import KNeighborsClassifier # ml model
from sklearn.model_selection import train_test_split # split data for train & test
from sklearn.metrics import accuracy_score # test the accuracy
import pandas as pd # load csv
import numpy as np # convert image to np array
import joblib #  extract the model in pkl format
import os
from PIL import Image

In [2]:
cat_path = "Cat"
dog_path = "Dog"

cat_images = []
dog_images = []

for path in os.listdir(cat_path):
    img = Image.open(os.path.join(cat_path, path))
    img = img.resize((100,100))
    img = img.convert('L')
    img = np.array(img)
    img = img.flatten()
    cat_images.append(img)
cat_images = np.asarray(cat_images)

for path in os.listdir(dog_path):
    img = Image.open(os.path.join(dog_path, path))
    img = img.resize((100,100))
    img = img.convert('L')
    img = np.array(img)
    img = img.flatten()
    dog_images.append(img)
dog_images = np.asarray(dog_images)

In [3]:
cat_label = np.zeros((100,1))
dog_label = np.ones((100,1))

In [4]:
images = np.concatenate((cat_images, dog_images), axis=0)
labels = np.concatenate((cat_label, dog_label), axis=0)
labels = labels.ravel()

In [5]:
def shuffle_data(images, labels):
    indices = np.arange(images.shape[0])
    np.random.shuffle(indices)
    images = images[indices]
    labels = labels[indices]
    return images, labels

images, labels = shuffle_data(images, labels)

In [6]:
x_data = images
y_data = labels

In [7]:
X_train, X_test, y_train, y_test = train_test_split(x_data, y_data)

In [8]:
# logistic regression model
log_model = LogisticRegression(max_iter=100)
log_model.fit(X_train, y_train)
log_y_pred = log_model.predict(X_test)

log_acs = accuracy_score(y_test, log_y_pred)

print(f"Logistic Regression Accuracy Score: {log_acs * 100}%")

Logistic Regression Accuracy Score: 50.0%


In [9]:
# svm model
svm_model = SVC(kernel="linear")
svm_model.fit(X_train, y_train)

svm_y_pred = svm_model.predict(X_test)

svm_acs = accuracy_score(y_test, svm_y_pred)

print(f"SVM Accuracy Score: {svm_acs * 100}%")

SVM Accuracy Score: 54.0%


In [10]:
# decision tree model
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

dt_y_pred = dt_model.predict(X_test)

dt_acs = accuracy_score(y_test, dt_y_pred)

print(f"Decision Tree Accuracy Score: {dt_acs * 100}%")

Decision Tree Accuracy Score: 57.99999999999999%


In [11]:
# random forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

rf_y_pred = rf_model.predict(X_test)

rf_acs = accuracy_score(y_test, rf_y_pred)

print(f"RandomForestClassifier Accuracy Score: {rf_acs * 100}%")

RandomForestClassifier Accuracy Score: 64.0%


In [12]:
# kneighbors classifier model
knc_model = KNeighborsClassifier(n_neighbors=3)
knc_model.fit(X_train, y_train)

knc_y_pred = knc_model.predict(X_test)

knc_acs = accuracy_score(y_test, knc_y_pred)

print(f"KNeighborsClassifier Accuracy Score: {knc_acs * 100}%")

KNeighborsClassifier Accuracy Score: 52.0%


In [13]:
# extracting model and vectorize data
joblib.dump(rf_model, "cat_dog_randomforest_model.pkl")

['cat_dog_randomforest_model.pkl']

In [20]:
def predict_image(file_path) :
    img = Image.open(file_path)
    img = img.resize((100,100))
    img = img.convert('L')
    img = np.array(img)
    img = img.flatten().reshape(1, -1)
    img = np.asarray(img)
    predict = rf_model.predict(img)
    print(f"Prediction: {"Cat" if predict == 0 else "Dog"}")

In [25]:
from plyer import filechooser
file_path = filechooser.open_file(title="Select an Image", filters=[("Image Files", "*.jpg;*.jpeg;*.png;*.bmp;*.gif")])

if file_path:
    predict_image(file_path[0])
else:
    print("No file selected.")

Prediction: Dog
