# All Models Training and Evaluation

This notebook trains and evaluates all CNN model architectures defined in `models.py`.

**Models included:**
- 1-block to 6-block architectures
- 3 variants per architecture: base, dropout, batchnorm
- Total: 18 models

**Usage:**
- Run all cells to train all models
- Or run specific cells to train selected models
- Results are saved and compared at the end


## 1. Setup and Imports


In [None]:
import os
import sys
import pickle
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# TensorFlow and Keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

# Import models from models.py
from models import get_model

# Set TensorFlow to use float64 (matching original notebooks)
tf.keras.backend.set_floatx("float64")

print(f"TensorFlow version: {tf.__version__}")
print(f"Python version: {sys.version}")


## 2. Environment Setup


In [None]:
# Detect if running in Google Colab
try:
    import google.colab
    IN_COLAB = True
    from google.colab import drive
    drive.mount('/content/drive', force_remount=False)
    base_dir = '/content/drive/MyDrive/CatsDogs'
except ImportError:
    IN_COLAB = False
    base_dir = os.path.join(os.getcwd(), 'CatsDogs')

print(f"Running in Colab: {IN_COLAB}")
print(f"Base directory: {base_dir}")

# Create results directory
results_dir = os.path.join(base_dir, 'Results')
os.makedirs(results_dir, exist_ok=True)
print(f"Results directory: {results_dir}")


## 3. Load Preprocessed Data


In [None]:
# Load pickled data
pickles_dir = os.path.join(base_dir, 'Pickles')
x_pickle_path = os.path.join(pickles_dir, 'X.pickle')
y_pickle_path = os.path.join(pickles_dir, 'y.pickle')

print("Loading preprocessed data...")
with open(x_pickle_path, 'rb') as f:
    X = pickle.load(f)

with open(y_pickle_path, 'rb') as f:
    y = pickle.load(f)

print(f"Data loaded: X shape = {X.shape}, y shape = {y.shape}")
print(f"X dtype: {X.dtype}, X range: [{X.min():.3f}, {X.max():.3f}]")


## 4. Prepare Data for Training


In [None]:
# Convert to TensorFlow tensors and ensure normalization
X = tf.cast(X, tf.float32)
if X.numpy().max() > 1.0:
    X = X / 255.0
    print("Normalized pixel values to [0, 1]")

# One-hot encode labels (depth=1 for binary classification)
x_size = X.shape[0]
depth = 1
y = tf.reshape(tf.one_hot(y, depth), shape=[x_size, depth])

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X.numpy(), y.numpy(), test_size=0.2, random_state=42
)

print(f"Train set: {X_train.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")
print(f"Input shape: {X_train.shape[1:]}")


## 5. Training Configuration


In [None]:
# Hyperparameters
EPOCHS = 50
BATCH_SIZE = 64
OPTIMIZER = 'adam'  # Options: 'adam', 'sgd', 'rmsprop'

# Loss function
loss = keras.losses.BinaryCrossentropy(from_logits=False)  # from_logits=False because we use sigmoid

# Optimizers
optimizers = {
    'adam': keras.optimizers.Adam(),
    'sgd': keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, decay=0.01/EPOCHS),
    'rmsprop': keras.optimizers.RMSprop(learning_rate=1e-3)
}

optimizer = optimizers[OPTIMIZER]

print(f"Training configuration:")
print(f"  Epochs: {EPOCHS}")
print(f"  Batch size: {BATCH_SIZE}")
print(f"  Optimizer: {OPTIMIZER}")


## 6. Helper Functions


In [None]:
def train_model(model, model_name, X_train, y_train, X_test, y_test, epochs, batch_size, optimizer, loss):
    """
    Train a model and return history and evaluation results.
    """
    print(f"\n{'='*60}")
    print(f"Training: {model_name}")
    print(f"{'='*60}")
    
    # Compile model
    model.compile(optimizer=optimizer, loss=loss, metrics=['binary_accuracy'])
    
    # Train model
    history = model.fit(
        X_train, y_train,
        epochs=epochs,
        batch_size=batch_size,
        verbose=1,
        validation_data=(X_test, y_test)
    )
    
    # Evaluate model
    results = model.evaluate(X_test, y_test, verbose=0)
    test_loss, test_accuracy = results[0], results[1]
    
    print(f"\n{model_name} Results:")
    print(f"  Test Loss: {test_loss:.4f}")
    print(f"  Test Accuracy: {test_accuracy:.4f}")
    
    return history, test_loss, test_accuracy


def plot_training_history(history, model_name, save_path=None):
    """
    Plot training history (loss and accuracy).
    """
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))
    
    # Loss plot
    axes[0].plot(history.history['loss'], label='train')
    axes[0].plot(history.history['val_loss'], label='val')
    axes[0].set_xlabel('Epoch', size=12)
    axes[0].set_ylabel('Loss', size=12)
    axes[0].legend(fontsize=12)
    axes[0].set_title(f'{model_name} - Loss')
    axes[0].grid(True)
    
    # Accuracy plot
    axes[1].plot(history.history['binary_accuracy'], label='train')
    axes[1].plot(history.history['val_binary_accuracy'], label='val')
    axes[1].set_xlabel('Epoch', size=12)
    axes[1].set_ylabel('Accuracy', size=12)
    axes[1].legend(fontsize=12)
    axes[1].set_title(f'{model_name} - Accuracy')
    axes[1].grid(True)
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=150, bbox_inches='tight')
        print(f"Plot saved to {save_path}")
    
    plt.show()
    plt.close()


In [None]:
# Store results
results = []


### 7.1 One-Block Models


In [None]:
# One-block base
model = get_model(1, 'base', X_train.shape[1:])
history, test_loss, test_acc = train_model(
    model, '1-block-base', X_train, y_train, X_test, y_test,
    EPOCHS, BATCH_SIZE, optimizer, loss
)
results.append({
    'model': '1-block-base',
    'test_loss': test_loss,
    'test_accuracy': test_acc,
    'history': history
})
plot_training_history(history, '1-block-base', 
                     os.path.join(results_dir, '1block_base_history.png'))


In [None]:
# One-block dropout
model = get_model(1, 'dropout', X_train.shape[1:])
history, test_loss, test_acc = train_model(
    model, '1-block-dropout', X_train, y_train, X_test, y_test,
    EPOCHS, BATCH_SIZE, optimizer, loss
)
results.append({
    'model': '1-block-dropout',
    'test_loss': test_loss,
    'test_accuracy': test_acc,
    'history': history
})
plot_training_history(history, '1-block-dropout',
                     os.path.join(results_dir, '1block_dropout_history.png'))


In [None]:
# One-block batchnorm
model = get_model(1, 'batchnorm', X_train.shape[1:])
history, test_loss, test_acc = train_model(
    model, '1-block-batchnorm', X_train, y_train, X_test, y_test,
    EPOCHS, BATCH_SIZE, optimizer, loss
)
results.append({
    'model': '1-block-batchnorm',
    'test_loss': test_loss,
    'test_accuracy': test_acc,
    'history': history
})
plot_training_history(history, '1-block-batchnorm',
                     os.path.join(results_dir, '1block_batchnorm_history.png'))


### 7.2 Two-Block Models


In [None]:
# Two-block base
model = get_model(2, 'base', X_train.shape[1:])
history, test_loss, test_acc = train_model(
    model, '2-block-base', X_train, y_train, X_test, y_test,
    EPOCHS, BATCH_SIZE, optimizer, loss
)
results.append({
    'model': '2-block-base',
    'test_loss': test_loss,
    'test_accuracy': test_acc,
    'history': history
})
plot_training_history(history, '2-block-base',
                     os.path.join(results_dir, '2block_base_history.png'))


In [None]:
# Two-block dropout
model = get_model(2, 'dropout', X_train.shape[1:])
history, test_loss, test_acc = train_model(
    model, '2-block-dropout', X_train, y_train, X_test, y_test,
    EPOCHS, BATCH_SIZE, optimizer, loss
)
results.append({
    'model': '2-block-dropout',
    'test_loss': test_loss,
    'test_accuracy': test_acc,
    'history': history
})
plot_training_history(history, '2-block-dropout',
                     os.path.join(results_dir, '2block_dropout_history.png'))


In [None]:
# Two-block batchnorm
model = get_model(2, 'batchnorm', X_train.shape[1:])
history, test_loss, test_acc = train_model(
    model, '2-block-batchnorm', X_train, y_train, X_test, y_test,
    EPOCHS, BATCH_SIZE, optimizer, loss
)
results.append({
    'model': '2-block-batchnorm',
    'test_loss': test_loss,
    'test_accuracy': test_acc,
    'history': history
})
plot_training_history(history, '2-block-batchnorm',
                     os.path.join(results_dir, '2block_batchnorm_history.png'))


In [None]:
# Three-block base
model = get_model(3, 'base', X_train.shape[1:])
history, test_loss, test_acc = train_model(
    model, '3-block-base', X_train, y_train, X_test, y_test,
    EPOCHS, BATCH_SIZE, optimizer, loss
)
results.append({
    'model': '3-block-base',
    'test_loss': test_loss,
    'test_accuracy': test_acc,
    'history': history
})
plot_training_history(history, '3-block-base',
                     os.path.join(results_dir, '3block_base_history.png'))


In [None]:
# Three-block dropout
model = get_model(3, 'dropout', X_train.shape[1:])
history, test_loss, test_acc = train_model(
    model, '3-block-dropout', X_train, y_train, X_test, y_test,
    EPOCHS, BATCH_SIZE, optimizer, loss
)
results.append({
    'model': '3-block-dropout',
    'test_loss': test_loss,
    'test_accuracy': test_acc,
    'history': history
})
plot_training_history(history, '3-block-dropout',
                     os.path.join(results_dir, '3block_dropout_history.png'))


In [None]:
# Three-block batchnorm
model = get_model(3, 'batchnorm', X_train.shape[1:])
history, test_loss, test_acc = train_model(
    model, '3-block-batchnorm', X_train, y_train, X_test, y_test,
    EPOCHS, BATCH_SIZE, optimizer, loss
)
results.append({
    'model': '3-block-batchnorm',
    'test_loss': test_loss,
    'test_accuracy': test_acc,
    'history': history
})
plot_training_history(history, '3-block-batchnorm',
                     os.path.join(results_dir, '3block_batchnorm_history.png'))


### 7.4 Four-Block Models


In [None]:
# Four-block base
model = get_model(4, 'base', X_train.shape[1:])
history, test_loss, test_acc = train_model(
    model, '4-block-base', X_train, y_train, X_test, y_test,
    EPOCHS, BATCH_SIZE, optimizer, loss
)
results.append({
    'model': '4-block-base',
    'test_loss': test_loss,
    'test_accuracy': test_acc,
    'history': history
})
plot_training_history(history, '4-block-base',
                     os.path.join(results_dir, '4block_base_history.png'))


In [None]:
# Four-block dropout
model = get_model(4, 'dropout', X_train.shape[1:])
history, test_loss, test_acc = train_model(
    model, '4-block-dropout', X_train, y_train, X_test, y_test,
    EPOCHS, BATCH_SIZE, optimizer, loss
)
results.append({
    'model': '4-block-dropout',
    'test_loss': test_loss,
    'test_accuracy': test_acc,
    'history': history
})
plot_training_history(history, '4-block-dropout',
                     os.path.join(results_dir, '4block_dropout_history.png'))


In [None]:
# Four-block batchnorm
model = get_model(4, 'batchnorm', X_train.shape[1:])
history, test_loss, test_acc = train_model(
    model, '4-block-batchnorm', X_train, y_train, X_test, y_test,
    EPOCHS, BATCH_SIZE, optimizer, loss
)
results.append({
    'model': '4-block-batchnorm',
    'test_loss': test_loss,
    'test_accuracy': test_acc,
    'history': history
})
plot_training_history(history, '4-block-batchnorm',
                     os.path.join(results_dir, '4block_batchnorm_history.png'))


In [None]:
# Five-block base
model = get_model(5, 'base', X_train.shape[1:])
history, test_loss, test_acc = train_model(
    model, '5-block-base', X_train, y_train, X_test, y_test,
    EPOCHS, BATCH_SIZE, optimizer, loss
)
results.append({
    'model': '5-block-base',
    'test_loss': test_loss,
    'test_accuracy': test_acc,
    'history': history
})
plot_training_history(history, '5-block-base',
                     os.path.join(results_dir, '5block_base_history.png'))


In [None]:
# Five-block dropout
model = get_model(5, 'dropout', X_train.shape[1:])
history, test_loss, test_acc = train_model(
    model, '5-block-dropout', X_train, y_train, X_test, y_test,
    EPOCHS, BATCH_SIZE, optimizer, loss
)
results.append({
    'model': '5-block-dropout',
    'test_loss': test_loss,
    'test_accuracy': test_acc,
    'history': history
})
plot_training_history(history, '5-block-dropout',
                     os.path.join(results_dir, '5block_dropout_history.png'))


In [None]:
# Five-block batchnorm
model = get_model(5, 'batchnorm', X_train.shape[1:])
history, test_loss, test_acc = train_model(
    model, '5-block-batchnorm', X_train, y_train, X_test, y_test,
    EPOCHS, BATCH_SIZE, optimizer, loss
)
results.append({
    'model': '5-block-batchnorm',
    'test_loss': test_loss,
    'test_accuracy': test_acc,
    'history': history
})
plot_training_history(history, '5-block-batchnorm',
                     os.path.join(results_dir, '5block_batchnorm_history.png'))


In [None]:
# Six-block base
model = get_model(6, 'base', X_train.shape[1:])
history, test_loss, test_acc = train_model(
    model, '6-block-base', X_train, y_train, X_test, y_test,
    EPOCHS, BATCH_SIZE, optimizer, loss
)
results.append({
    'model': '6-block-base',
    'test_loss': test_loss,
    'test_accuracy': test_acc,
    'history': history
})
plot_training_history(history, '6-block-base',
                     os.path.join(results_dir, '6block_base_history.png'))


In [None]:
# Six-block dropout
model = get_model(6, 'dropout', X_train.shape[1:])
history, test_loss, test_acc = train_model(
    model, '6-block-dropout', X_train, y_train, X_test, y_test,
    EPOCHS, BATCH_SIZE, optimizer, loss
)
results.append({
    'model': '6-block-dropout',
    'test_loss': test_loss,
    'test_accuracy': test_acc,
    'history': history
})
plot_training_history(history, '6-block-dropout',
                     os.path.join(results_dir, '6block_dropout_history.png'))


In [None]:
# Six-block batchnorm
model = get_model(6, 'batchnorm', X_train.shape[1:])
history, test_loss, test_acc = train_model(
    model, '6-block-batchnorm', X_train, y_train, X_test, y_test,
    EPOCHS, BATCH_SIZE, optimizer, loss
)
results.append({
    'model': '6-block-batchnorm',
    'test_loss': test_loss,
    'test_accuracy': test_acc,
    'history': history
})
plot_training_history(history, '6-block-batchnorm',
                     os.path.join(results_dir, '6block_batchnorm_history.png'))


In [None]:
# Create results DataFrame
results_df = pd.DataFrame([
    {
        'Model': r['model'],
        'Test Loss': r['test_loss'],
        'Test Accuracy': r['test_accuracy']
    }
    for r in results
])

# Sort by accuracy (descending)
results_df = results_df.sort_values('Test Accuracy', ascending=False)

print("\n" + "="*60)
print("RESULTS SUMMARY")
print("="*60)
print(results_df.to_string(index=False))

# Save to CSV
csv_path = os.path.join(results_dir, 'all_models_results.csv')
results_df.to_csv(csv_path, index=False)
print(f"\nResults saved to {csv_path}")


In [None]:
# Visualize results comparison
fig, axes = plt.subplots(1, 2, figsize=(18, 6))

# Accuracy comparison
axes[0].barh(range(len(results_df)), results_df['Test Accuracy'], color='steelblue')
axes[0].set_yticks(range(len(results_df)))
axes[0].set_yticklabels(results_df['Model'], fontsize=9)
axes[0].set_xlabel('Test Accuracy', size=12)
axes[0].set_title('Model Comparison - Test Accuracy', size=14, fontweight='bold')
axes[0].grid(True, alpha=0.3, axis='x')
axes[0].invert_yaxis()

# Loss comparison
axes[1].barh(range(len(results_df)), results_df['Test Loss'], color='coral')
axes[1].set_yticks(range(len(results_df)))
axes[1].set_yticklabels(results_df['Model'], fontsize=9)
axes[1].set_xlabel('Test Loss', size=12)
axes[1].set_title('Model Comparison - Test Loss', size=14, fontweight='bold')
axes[1].grid(True, alpha=0.3, axis='x')
axes[1].invert_yaxis()

plt.tight_layout()
comparison_path = os.path.join(results_dir, 'all_models_comparison.png')
plt.savefig(comparison_path, dpi=150, bbox_inches='tight')
print(f"Comparison plot saved to {comparison_path}")
plt.show()


In [None]:
# Find best model
best_model = results_df.iloc[0]
print("\n" + "="*60)
print("BEST MODEL")
print("="*60)
print(f"Model: {best_model['Model']}")
print(f"Test Accuracy: {best_model['Test Accuracy']:.4f}")
print(f"Test Loss: {best_model['Test Loss']:.4f}")

# Group by variant
print("\n" + "="*60)
print("RESULTS BY VARIANT")
print("="*60)
for variant in ['base', 'dropout', 'batchnorm']:
    variant_results = results_df[results_df['Model'].str.contains(variant)]
    avg_acc = variant_results['Test Accuracy'].mean()
    avg_loss = variant_results['Test Loss'].mean()
    print(f"\n{variant.upper()}:")
    print(f"  Average Accuracy: {avg_acc:.4f}")
    print(f"  Average Loss: {avg_loss:.4f}")
    print(f"  Best: {variant_results.iloc[0]['Model']} ({variant_results.iloc[0]['Test Accuracy']:.4f})")


## 9. Optional: Train Specific Models Only

If you want to train only specific models, you can use this cell to select which ones to train.


In [None]:
# Uncomment and modify to train specific models only
# models_to_train = [
#     (3, 'batchnorm'),  # 3-block with batch normalization
#     (2, 'dropout'),    # 2-block with dropout
#     # Add more as needed
# ]
# 
# results = []
# for blocks, variant in models_to_train:
#     model_name = f"{blocks}-block-{variant}"
#     model = get_model(blocks, variant, X_train.shape[1:])
#     history, test_loss, test_acc = train_model(
#         model, model_name, X_train, y_train, X_test, y_test,
#         EPOCHS, BATCH_SIZE, optimizer, loss
#     )
#     results.append({
#         'model': model_name,
#         'test_loss': test_loss,
#         'test_accuracy': test_acc,
#         'history': history
#     })
#     plot_training_history(history, model_name,
#                          os.path.join(results_dir, f'{blocks}block_{variant}_history.png'))
