In [2]:
import os
import requests
from zipfile import ZipFile
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import SGDClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
import joblib
from tensorflow.keras.models import load_model

In [2]:
url = "https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip"
dataset_path = "cats_and_dogs.zip"

if not os.path.exists("dataset"):
    print("Downloading dataset...")
    response = requests.get(url)
    with open(dataset_path, 'wb') as file:
        file.write(response.content)

Downloading dataset...


In [3]:
with ZipFile(dataset_path, 'r') as zip_ref:
        zip_ref.extractall("dataset")

In [3]:
def preprocess_image(image_path, size=(16, 16)):  # Reduced size for faster processing
    try:
        image = cv2.imread(image_path)
        image = cv2.resize(image, size)
        image = image / 255.0  # Normalize
        return image
    except:
        return None

def load_data(data_dir, label_map, subset_size=None):
    images, labels = [], []
    for label, folder in label_map.items():
        folder_path = os.path.join(data_dir, folder)
        for i, filename in enumerate(os.listdir(folder_path)):
            if subset_size and i >= subset_size:
                break
            file_path = os.path.join(folder_path, filename)
            image = preprocess_image(file_path)
            if image is not None:
                images.append(image)
                labels.append(label)
    return np.array(images), np.array(labels)


In [4]:
data_dir = "dataset/PetImages"
label_map = {0: "Cat", 1: "Dog"}
subset_size = 5000  # Use a subset for faster training
images, labels = load_data(data_dir, label_map, subset_size=subset_size)

In [5]:
flattened_images = images.reshape(len(images), -1)

In [6]:
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)
y_categorical = to_categorical(encoded_labels)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(flattened_images, encoded_labels, test_size=0.2, random_state=42)
cnn_X_train, cnn_X_test, cnn_y_train, cnn_y_test = train_test_split(images, y_categorical, test_size=0.2, random_state=42)

In [None]:
print("Training SVM...")
svm_model = SVC(kernel='linear', C=0.1, probability=True)
svm_model.fit(X_train, y_train)
joblib.dump(svm_model, "svm_model.pkl")
print("SVM training completed and saved.")

Training SVM...


In [None]:
print("Training Random Forest...")
rf_model = RandomForestClassifier(n_estimators=50, max_depth=10, random_state=42)
rf_model.fit(X_train, y_train)
joblib.dump(rf_model, "rf_model.pkl")
print("Random Forest training completed and saved.")

Training Random Forest...
Random Forest training completed and saved.


In [None]:
# Train Logistic Regression (SGD)
print("Training Logistic Regression...")
sgd_model = SGDClassifier(loss='log_loss', max_iter=1000, random_state=42)  # Updated loss parameter
sgd_model.fit(X_train, y_train)
joblib.dump(sgd_model, "sgd_model.pkl")
print("Logistic Regression training completed and saved.")


# Train CNN
print("Training CNN...")
cnn_model = Sequential([
    Conv2D(16, (3, 3), activation='relu', input_shape=(16, 16, 3)),  # Fewer filters
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(64, activation='relu'),  # Smaller dense layer
    Dense(2, activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
cnn_model.fit(cnn_X_train, cnn_y_train, epochs=30, batch_size=64, validation_data=(cnn_X_test, cnn_y_test))  # Fewer epochs
cnn_model.save("cnn_model.h5")
print("CNN training completed and saved.")

Training Logistic Regression...
Logistic Regression training completed and saved.
Training CNN...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 34ms/step - accuracy: 0.5525 - loss: 0.6829 - val_accuracy: 0.6832 - val_loss: 0.6144
Epoch 2/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.6681 - loss: 0.6140 - val_accuracy: 0.6982 - val_loss: 0.5852
Epoch 3/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6949 - loss: 0.5757 - val_accuracy: 0.6752 - val_loss: 0.6123
Epoch 4/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7170 - loss: 0.5568 - val_accuracy: 0.7013 - val_loss: 0.5722
Epoch 5/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7214 - loss: 0.5489 - val_accuracy: 0.6912 - val_loss: 0.5969
Epoch 6/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7250 - loss: 0.5373 - val_accuracy: 0.7108 - val_loss: 0.5788
Epoch 7/30
[1m125/125[0m 



CNN training completed and saved.


In [9]:
# Load models for inference
print("Loading models for inference...")
svm_model = joblib.load("svm_model.pkl")
rf_model = joblib.load("rf_model.pkl")
sgd_model = joblib.load("sgd_model.pkl")
cnn_model = load_model("cnn_model.h5")

# Test on one sample image
sample_image = X_test[0].reshape(1, -1)  # For non-CNN models
cnn_sample_image = cnn_X_test[0].reshape(1, 16, 16, 3)  # For CNN

print("SVM Prediction:", label_encoder.inverse_transform(svm_model.predict(sample_image)))
print("Random Forest Prediction:", label_encoder.inverse_transform(rf_model.predict(sample_image)))
print("Logistic Regression Prediction:", label_encoder.inverse_transform(sgd_model.predict(sample_image)))
print("CNN Prediction:", label_encoder.inverse_transform(np.argmax(cnn_model.predict(cnn_sample_image), axis=1)))

Loading models for inference...


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


SVM Prediction: [1]
Random Forest Prediction: [1]
Logistic Regression Prediction: [1]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 282ms/step
CNN Prediction: [1]


In [10]:
# Train K-Means
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')  # Suppress warnings for clean outpu
print("Training K-Means...")
kmeans_model = KMeans(n_clusters=2, random_state=42)
kmeans_model.fit(X_train)  # Unsupervised training on flattened images
joblib.dump(kmeans_model, "kmeans_model.pkl")
print("K-Means training completed and saved.")

Training K-Means...
K-Means training completed and saved.


In [11]:
pip install flask-ngrok flask tensorflow scikit-learn pillow

Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl.metadata (1.8 kB)
Collecting flask
  Using cached flask-3.1.0-py3-none-any.whl.metadata (2.7 kB)
Collecting pillow
  Using cached pillow-11.1.0-cp312-cp312-win_amd64.whl.metadata (9.3 kB)
Collecting Jinja2>=3.1.2 (from flask)
  Using cached jinja2-3.1.5-py3-none-any.whl.metadata (2.6 kB)
Collecting itsdangerous>=2.2 (from flask)
  Using cached itsdangerous-2.2.0-py3-none-any.whl.metadata (1.9 kB)
Collecting click>=8.1.3 (from flask)
  Using cached click-8.1.8-py3-none-any.whl.metadata (2.3 kB)
Collecting blinker>=1.9 (from flask)
  Using cached blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)
Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Using cached flask-3.1.0-py3-none-any.whl (102 kB)
Using cached pillow-11.1.0-cp312-cp312-win_amd64.whl (2.6 MB)
Using cached blinker-1.9.0-py3-none-any.whl (8.5 kB)
Using cached click-8.1.8-py3-none-any.whl (98 kB)
Using cached itsdangerous-2.2.0-py3-none-any.whl (16 k


[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [12]:
pip install jupyter-dash

Collecting jupyter-dash
  Downloading jupyter_dash-0.4.2-py3-none-any.whl.metadata (3.6 kB)
Collecting dash (from jupyter-dash)
  Downloading dash-2.18.2-py3-none-any.whl.metadata (10 kB)
Collecting retrying (from jupyter-dash)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Collecting ansi2html (from jupyter-dash)
  Downloading ansi2html-1.9.2-py3-none-any.whl.metadata (3.7 kB)
Collecting flask (from jupyter-dash)
  Downloading flask-3.0.3-py3-none-any.whl.metadata (3.2 kB)
Collecting Werkzeug<3.1 (from dash->jupyter-dash)
  Downloading werkzeug-3.0.6-py3-none-any.whl.metadata (3.7 kB)
Collecting plotly>=5.0.0 (from dash->jupyter-dash)
  Downloading plotly-5.24.1-py3-none-any.whl.metadata (7.3 kB)
Collecting dash-html-components==2.0.0 (from dash->jupyter-dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl.metadata (3.8 kB)
Collecting dash-core-components==2.0.0 (from dash->jupyter-dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl.metadata (2.


[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [13]:
import plotly.express as px
from jupyter_dash import JupyterDash   #3
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output# Load Data

ImportError: Plotly express requires pandas to be installed.

In [None]:
pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Downloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.3


In [None]:
from flask import Flask #5
from pyngrok import ngrok

In [None]:
ngrok.set_auth_token('2rzXZwoI1RuZe6zVB4NL0l9RUdF_5vmoW5KdQbipakof7yNrF')
public_url = ngrok.connect(5000).public_url
print(public_url) #6

https://d714-34-143-139-211.ngrok-free.app


In [None]:
from flask import Flask, request, jsonify, render_template
from tensorflow.keras.models import load_model
import joblib
import cv2
import numpy as np
from pyngrok import ngrok      #7

# Initialize Flask app
app = Flask(__name__)

# Set up ngrok
public_url = ngrok.connect(5000)
print(f"Public URL: {public_url}")

# Load models
svm_model = joblib.load("svm_model.pkl")
rf_model = joblib.load("rf_model.pkl")
sgd_model = joblib.load("sgd_model.pkl")
cnn_model = load_model("cnn_model.h5")
kmeans_model = joblib.load("kmeans_model.pkl")  # Load KMeans model

# Label map
label_map = {0: "Cat", 1: "Dog"}

def inverse_label(label_idx):
    return label_map[label_idx]

# Preprocess image
def preprocess_image(image_file, size=(16, 16)):
    image = cv2.imdecode(np.frombuffer(image_file.read(), np.uint8), cv2.IMREAD_COLOR)
    if image is None:
        return None
    image = cv2.resize(image, size)
    image = image / 255.0  # Normalize
    return image

# Root route
@app.route('/')
def home():
    return """
    <html>
        <head><title>Cat and Dog Classifier</title></head>
        <body>
            <h1>Welcome to the Cat and Dog Classifier</h1>
            <form action="/predict" method="post" enctype="multipart/form-data">
                <label for="image">Upload an image:</label>
                <input type="file" name="image" accept="image/*" required>
                <button type="submit">Predict</button>
            </form>
        </body>
    </html>
    """

# Prediction route
@app.route('/predict', methods=['POST'])
def predict():
    if 'image' not in request.files:
        return jsonify({'error': 'No image uploaded'}), 400

    image_file = request.files['image']
    image = preprocess_image(image_file)

    if image is None:
        return jsonify({'error': 'Invalid image format'}), 400

    # Flatten image for non-CNN models
    flattened_image = image.reshape(1, -1)

    # CNN requires a 4D tensor
    cnn_image = image.reshape(1, 16, 16, 3)

    # Make predictions
    svm_prediction = inverse_label(svm_model.predict(flattened_image)[0])
    rf_prediction = inverse_label(rf_model.predict(flattened_image)[0])
    sgd_prediction = inverse_label(sgd_model.predict(flattened_image)[0])
    cnn_prediction = inverse_label(np.argmax(cnn_model.predict(cnn_image), axis=1)[0])

    # KMeans prediction (returns cluster number)
    kmeans_cluster = kmeans_model.predict(flattened_image)[0]
    kmeans_prediction = f"Cluster {kmeans_cluster}"

    return jsonify({
        'svm_prediction': svm_prediction,
        'rf_prediction': rf_prediction,
        'sgd_prediction': sgd_prediction,
        'cnn_prediction': cnn_prediction,
        'kmeans_prediction': kmeans_prediction
    })

if __name__ == '__main__':
    app.run(port=5000)



Public URL: NgrokTunnel: "https://e90a-34-143-139-211.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [22/Jan/2025 17:37:54] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [22/Jan/2025 17:37:55] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 183ms/step


INFO:werkzeug:127.0.0.1 - - [22/Jan/2025 17:38:06] "POST /predict HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [22/Jan/2025 17:39:04] "[31m[1mPOST /predict HTTP/1.1[0m" 400 -
INFO:werkzeug:127.0.0.1 - - [22/Jan/2025 17:39:56] "[31m[1mPOST /predict HTTP/1.1[0m" 400 -


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step


INFO:werkzeug:127.0.0.1 - - [22/Jan/2025 17:40:50] "POST /predict HTTP/1.1" 200 -
