# Cat and Dog Classification - ML Pipeline

This notebook demonstrates the complete machine learning pipeline for classifying images of cats and dogs.

## Objectives:
1. Data Acquisition and Preprocessing
2. Model Creation using Transfer Learning
3. Model Training with Optimization Techniques
4. Model Evaluation with Multiple Metrics
5. Model Testing and Prediction


In [1]:
# Import necessary libraries
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers, callbacks
from tensorflow.keras.applications import MobileNetV2
import sys

# Add src to path
sys.path.append('../src')
from preprocessing import prepare_dataset, augment_data
from model import create_model, train_model, evaluate_model, save_model_metadata

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("TensorFlow version:", tf.__version__)
print("GPU Available:", tf.config.list_physical_devices('GPU'))


TensorFlow version: 2.20.0
GPU Available: []


## 1. Data Acquisition and Preprocessing


In [2]:
# Define data paths
train_cats_dir = '../data/train/cats'
train_dogs_dir = '../data/train/dogs'
test_cats_dir = '../data/test/cats'
test_dogs_dir = '../data/test/dogs'

# Load dataset (using subset for faster training - remove max_train/max_test for full dataset)
print("Loading dataset...")
X_train, X_test, y_train, y_test = prepare_dataset(
    train_cats_dir, train_dogs_dir,
    test_cats_dir, test_dogs_dir,
    target_size=(224, 224),
    max_train=1000,  # Use 1000 images per class for training (remove for full dataset)
    max_test=200     # Use 200 images per class for testing (remove for full dataset)
)

print(f"\nTraining set shape: {X_train.shape}")
print(f"Test set shape: {X_test.shape}")
print(f"Training labels shape: {y_train.shape}")
print(f"Test labels shape: {y_test.shape}")


Loading dataset...
Loading training images...
Loading test images...


KeyboardInterrupt: 

In [None]:
# Visualize sample images
fig, axes = plt.subplots(2, 4, figsize=(15, 8))
fig.suptitle('Sample Training Images', fontsize=16)

for i in range(4):
    # Cat images
    cat_idx = np.where(y_train == 0)[0][i]
    axes[0, i].imshow(X_train[cat_idx])
    axes[0, i].set_title('Cat')
    axes[0, i].axis('off')
    
    # Dog images
    dog_idx = np.where(y_train == 1)[0][i]
    axes[1, i].imshow(X_train[dog_idx])
    axes[1, i].set_title('Dog')
    axes[1, i].axis('off')

plt.tight_layout()
plt.show()


In [None]:
# Data augmentation
print("Augmenting training data...")
X_train_aug, y_train_aug = augment_data(X_train, y_train, augment_factor=1)
print(f"Augmented training set shape: {X_train_aug.shape}")

# Split training data into train and validation sets
X_train_final, X_val, y_train_final, y_val = train_test_split(
    X_train_aug, y_train_aug, 
    test_size=0.2, 
    random_state=42,
    stratify=y_train_aug
)

print(f"\nFinal training set: {X_train_final.shape}")
print(f"Validation set: {X_val.shape}")


## 2. Model Creation with Transfer Learning


In [None]:
# Create model using transfer learning with MobileNetV2
print("Creating model with MobileNetV2 (pretrained on ImageNet)...")
model = create_model(
    input_shape=(224, 224, 3),
    num_classes=2,
    use_pretrained=True  # Using pretrained weights for transfer learning
)

# Display model architecture
model.summary()


## 3. Model Training with Optimization Techniques


In [None]:
# Training configuration
EPOCHS = 20
BATCH_SIZE = 32
MODEL_PATH = '../models/cat_dog_model.h5'

# Train the model
print("Starting model training...")
print(f"Epochs: {EPOCHS}, Batch Size: {BATCH_SIZE}")

history = train_model(
    model, X_train_final, y_train_final, X_val, y_val,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    model_save_path=MODEL_PATH
)


In [None]:
# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Accuracy plot
axes[0].plot(history.history['accuracy'], label='Training Accuracy')
axes[0].plot(history.history['val_accuracy'], label='Validation Accuracy')
axes[0].set_title('Model Accuracy')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Accuracy')
axes[0].legend()
axes[0].grid(True)

# Loss plot
axes[1].plot(history.history['loss'], label='Training Loss')
axes[1].plot(history.history['val_loss'], label='Validation Loss')
axes[1].set_title('Model Loss')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.show()


## 4. Model Evaluation with Multiple Metrics


In [None]:
# Load the best model (saved during training)
from model import load_model
best_model = load_model(MODEL_PATH)

# Evaluate model on test set
print("Evaluating model on test set...")
metrics = evaluate_model(best_model, X_test, y_test)

# Display metrics
print("\n" + "="*50)
print("MODEL EVALUATION METRICS")
print("="*50)
print(f"Accuracy:  {metrics['accuracy']:.4f} ({metrics['accuracy']*100:.2f}%)")
print(f"Precision: {metrics['precision']:.4f} ({metrics['precision']*100:.2f}%)")
print(f"Recall:    {metrics['recall']:.4f} ({metrics['recall']*100:.2f}%)")
print(f"F1 Score:  {metrics['f1_score']:.4f} ({metrics['f1_score']*100:.2f}%)")
print(f"Test Loss: {metrics['test_loss']:.4f}")
print(f"Test Accuracy: {metrics['test_accuracy']:.4f} ({metrics['test_accuracy']*100:.2f}%)")
print("="*50)


In [None]:
# Confusion Matrix
cm = np.array(metrics['confusion_matrix'])

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Cat', 'Dog'], 
            yticklabels=['Cat', 'Dog'])
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# Calculate per-class metrics
tn, fp, fn, tp = cm.ravel()
print(f"\nPer-class metrics:")
print(f"True Negatives (Cat correctly identified): {tn}")
print(f"False Positives (Cat misclassified as Dog): {fp}")
print(f"False Negatives (Dog misclassified as Cat): {fn}")
print(f"True Positives (Dog correctly identified): {tp}")


In [None]:
# Classification Report
y_pred = np.argmax(best_model.predict(X_test, verbose=0), axis=1)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Cat', 'Dog']))


In [None]:
# Visualize predictions on test set
fig, axes = plt.subplots(3, 6, figsize=(18, 9))
fig.suptitle('Sample Test Predictions', fontsize=16)

predictions = best_model.predict(X_test[:18], verbose=0)
predicted_classes = np.argmax(predictions, axis=1)
confidences = np.max(predictions, axis=1)

class_names = ['Cat', 'Dog']

for i in range(18):
    row = i // 6
    col = i % 6
    
    axes[row, col].imshow(X_test[i])
    true_label = class_names[int(y_test[i])]
    pred_label = class_names[predicted_classes[i]]
    confidence = confidences[i]
    
    color = 'green' if true_label == pred_label else 'red'
    axes[row, col].set_title(f"True: {true_label}\nPred: {pred_label} ({confidence:.2f})", 
                            color=color, fontsize=9)
    axes[row, col].axis('off')

plt.tight_layout()
plt.show()


## 5. Save Model Metadata


In [None]:
# Save model metadata
save_model_metadata(metrics, '../models/model_metadata.json')
print("Model metadata saved successfully!")


## Summary

This notebook demonstrates:
1. ✅ Data preprocessing with augmentation
2. ✅ Model creation using transfer learning (MobileNetV2)
3. ✅ Training with optimization techniques (Early Stopping, Learning Rate Reduction, Model Checkpointing)
4. ✅ Comprehensive evaluation with multiple metrics (Accuracy, Precision, Recall, F1 Score, Loss, Confusion Matrix)
5. ✅ Model testing and visualization of predictions

The model is now ready for deployment!
