## Create models and train

In [1]:
import cv2
import glob
import numpy as np
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
import joblib

# Data preprocessing
data = []
labels = []

for i, address in enumerate(glob.glob("train\\*\\*.jpg")):
    img = cv2.imread(address)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = cv2.resize(img, (64, 64))
    img = img/255
    img = img.flatten()

    data.append(img)
    
    label = address.split("\\")[-1].split(".")[0]
    labels.append(label)

    if i%200 == 0:
        print(f"[INFO] Processed {i} images...")

data = np.array(data)

X = data
y = labels

# Standardize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define classifiers
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Logistic Regression CV": LogisticRegressionCV(max_iter=1000),
    "SVM": SVC(kernel='linear'),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    "KNN": KNeighborsClassifier(n_neighbors=5)
}

best_model = None
best_acc = 0

# Train and evaluate each model
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"\n{name} Accuracy: {acc:.4f}")
    print(classification_report(y_test, y_pred, target_names=["Cat", "Dog"]))
    
    if acc > best_acc:
        best_acc = acc
        best_model = model

# Save the best model and scaler
joblib.dump(best_model, "cat_dog_best_model.pkl")
joblib.dump(scaler, "scaler.pkl")
print(f"\nBest model saved with accuracy: {best_acc:.4f}")

[INFO] Processed 0 images...
[INFO] Processed 200 images...
[INFO] Processed 400 images...
[INFO] Processed 600 images...
[INFO] Processed 800 images...
[INFO] Processed 1000 images...
[INFO] Processed 1200 images...
[INFO] Processed 1400 images...
[INFO] Processed 1600 images...
[INFO] Processed 1800 images...

Logistic Regression Accuracy: 0.4850
              precision    recall  f1-score   support

         Cat       0.48      0.51      0.50       199
         Dog       0.49      0.46      0.47       201

    accuracy                           0.48       400
   macro avg       0.49      0.49      0.48       400
weighted avg       0.49      0.48      0.48       400


Logistic Regression CV Accuracy: 0.5650
              precision    recall  f1-score   support

         Cat       0.56      0.55      0.56       199
         Dog       0.57      0.58      0.57       201

    accuracy                           0.56       400
   macro avg       0.56      0.56      0.56       400
weighted 

## Load model and test

In [2]:
# Load model
model = joblib.load("cat_dog_best_model.pkl")
scaler = joblib.load("scaler.pkl")

def predict_image(address, image_size=(64, 64)):
    img = cv2.imread(address)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = cv2.resize(img, (64, 64))
    img = img/255
    img = img.flatten().reshape(1, -1)

    img_scaled = scaler.transform(img)

    pred = model.predict(img_scaled)[0]
    print(f"Prediction for {address}: {pred}")

    img = img.resize(image_size)

for i, address in enumerate(glob.glob("test\\*\\*.jpg")):
    predict_image(address)

Prediction for test\Cat\Cat (1).jpg: cat
Prediction for test\Cat\Cat (2).jpg: dog
Prediction for test\Cat\Cat (3).jpg: dog
Prediction for test\Cat\Cat (4).jpg: dog
Prediction for test\Cat\Cat (5).jpg: cat
Prediction for test\Dog\Dog (1).jpg: cat
Prediction for test\Dog\Dog (2).jpg: cat
Prediction for test\Dog\Dog (3).jpg: dog
Prediction for test\Dog\Dog (4).jpg: dog
Prediction for test\Dog\Dog (5).jpg: cat


## Results
The model was able to recognize some images correctly, but the average accuracy score of 0.6 is too low to rely on the model's results, because with a random choice of 2 options, the average accuracy score would be 0.5.