# Rock-Paper-Scissors CNN Project
## 3. Model Development and Training

This notebook focuses on designing and training different CNN architectures with increasing complexity.


In [None]:
# Import necessary libraries
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yaml
import warnings
warnings.filterwarnings('ignore')

# Add src to path for imports
sys.path.append('../src')

from models.cnn_models import RockPaperScissorsCNN
from utils.training_utils import TrainingManager
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf

# Set style for plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Set random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("✅ All libraries imported successfully!")
print(f"TensorFlow version: {tf.__version__}")
print(f"GPU available: {tf.config.list_physical_devices('GPU')}")
print(f"CPU cores: {tf.config.threading.get_inter_op_parallelism_threads()}")


### Configuration and Setup

Let's load the configuration and set up the model development environment.


In [None]:
# Load configuration
config_path = '../config/config.yaml'
with open(config_path, 'r') as file:
    config = yaml.safe_load(file)

# Extract configuration parameters
model_configs = config['models']
training_config = config['training']
data_config = config['data']
classes = config['classes']

print("CONFIGURATION LOADED")
print("="*50)
print(f"Number of classes: {len(classes)}")
print(f"Classes: {classes}")
print(f"Image size: {data_config['image_size']}")
print(f"Batch size: {data_config['batch_size']}")
print(f"Training epochs: {training_config['epochs']}")
print(f"Learning rate: {training_config['learning_rate']}")
print(f"Optimizer: {training_config['optimizer']}")
print("="*50)

# Initialize model creator and training manager
cnn_creator = RockPaperScissorsCNN(config_path)
trainer = TrainingManager(config_path)

print("✅ Model creator and training manager initialized!")


### Data Generators Setup

Let's set up the data generators for training and validation.


In [None]:
# Set up data generators (assuming data preprocessing was completed)
print("SETTING UP DATA GENERATORS...")
print("="*50)

# Check if processed data exists
train_dir = '../data/processed/train'
val_dir = '../data/processed/val'
test_dir = '../data/processed/test'

if os.path.exists(train_dir) and os.path.exists(val_dir):
    # Create data generators
    train_datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True,
        zoom_range=0.1,
        fill_mode='nearest',
        rescale=1./255
    )
    
    val_test_datagen = ImageDataGenerator(rescale=1./255)
    
    # Create generators
    train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=tuple(data_config['image_size']),
        batch_size=data_config['batch_size'],
        class_mode='categorical',
        shuffle=True
    )
    
    val_generator = val_test_datagen.flow_from_directory(
        val_dir,
        target_size=tuple(data_config['image_size']),
        batch_size=data_config['batch_size'],
        class_mode='categorical',
        shuffle=False
    )
    
    test_generator = None
    if os.path.exists(test_dir):
        test_generator = val_test_datagen.flow_from_directory(
            test_dir,
            target_size=tuple(data_config['image_size']),
            batch_size=data_config['batch_size'],
            class_mode='categorical',
            shuffle=False
        )
    
    print("✅ Data generators created successfully!")
    print(f"Training samples: {train_generator.samples}")
    print(f"Validation samples: {val_generator.samples}")
    if test_generator:
        print(f"Test samples: {test_generator.samples}")
    print(f"Class indices: {train_generator.class_indices}")
    
else:
    print("❌ Processed data not found!")
    print("Please run the data preprocessing notebook first.")
    print("Expected directories:")
    print(f"  {train_dir}")
    print(f"  {val_dir}")
    print(f"  {test_dir}")
    
    # Create dummy generators for demonstration
    print("\n⚠️ Creating dummy generators for demonstration...")
    
    # Create dummy data
    dummy_x = np.random.random((32, 224, 224, 3))
    dummy_y = np.random.random((32, 3))
    
    class DummyGenerator:
        def __init__(self, x, y):
            self.x = x
            self.y = y
            self.samples = len(x)
            self.class_indices = {'paper': 0, 'rock': 1, 'scissors': 2}
        
        def __iter__(self):
            return self
        
        def __next__(self):
            return self.x, self.y
    
    train_generator = DummyGenerator(dummy_x, dummy_y)
    val_generator = DummyGenerator(dummy_x, dummy_y)
    test_generator = DummyGenerator(dummy_x, dummy_y)
    
    print("✅ Dummy generators created for demonstration")


### Model Architecture Design

Now let's design and create the three CNN architectures with increasing complexity as required by the project.


In [None]:
# Create the three CNN architectures
print("CREATING CNN ARCHITECTURES...")
print("="*60)

# Define input shape
input_shape = (224, 224, 3)

# 1. Simple CNN
print("\n1. SIMPLE CNN ARCHITECTURE")
print("-" * 40)
simple_model = cnn_creator.create_simple_cnn(input_shape)
print("Model Summary:")
print(cnn_creator.get_model_summary(simple_model))

# 2. Medium CNN
print("\n2. MEDIUM CNN ARCHITECTURE")
print("-" * 40)
medium_model = cnn_creator.create_medium_cnn(input_shape)
print("Model Summary:")
print(cnn_creator.get_model_summary(medium_model))

# 3. Complex CNN
print("\n3. COMPLEX CNN ARCHITECTURE")
print("-" * 40)
complex_model = cnn_creator.create_complex_cnn(input_shape)
print("Model Summary:")
print(cnn_creator.get_model_summary(complex_model))

# Compare model complexities
print("\nMODEL COMPLEXITY COMPARISON")
print("="*60)
models_info = [
    ("Simple CNN", simple_model),
    ("Medium CNN", medium_model),
    ("Complex CNN", complex_model)
]

for name, model in models_info:
    params = model.count_params()
    print(f"{name:15}: {params:,} parameters")

print("="*60)


### Model Training

Now let's train each model and compare their performance. We'll start with the Simple CNN.


In [None]:
# Train Simple CNN
print("TRAINING SIMPLE CNN...")
print("="*50)

# Train the model
simple_history = trainer.train_model(
    simple_model, 
    train_generator, 
    val_generator, 
    "simple_cnn"
)

# Plot training history
trainer.plot_training_history(simple_history, "simple_cnn")

# Save the model
cnn_creator.save_model(simple_model, "simple_cnn")

print("✅ Simple CNN training completed!")


In [None]:
# Train Medium CNN
print("TRAINING MEDIUM CNN...")
print("="*50)

# Train the model
medium_history = trainer.train_model(
    medium_model, 
    train_generator, 
    val_generator, 
    "medium_cnn"
)

# Plot training history
trainer.plot_training_history(medium_history, "medium_cnn")

# Save the model
cnn_creator.save_model(medium_model, "medium_cnn")

print("✅ Medium CNN training completed!")


In [None]:
# Train Complex CNN
print("TRAINING COMPLEX CNN...")
print("="*50)

# Train the model
complex_history = trainer.train_model(
    complex_model, 
    train_generator, 
    val_generator, 
    "complex_cnn"
)

# Plot training history
trainer.plot_training_history(complex_history, "complex_cnn")

# Save the model
cnn_creator.save_model(complex_model, "complex_cnn")

print("✅ Complex CNN training completed!")


### Training Results Comparison

Let's compare the training results of all three models.


In [None]:
# Compare training results
print("TRAINING RESULTS COMPARISON")
print("="*60)

# Collect training histories
histories = {
    "Simple CNN": simple_history,
    "Medium CNN": medium_history,
    "Complex CNN": complex_history
}

# Create comparison plots
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('Model Training Comparison', fontsize=16, fontweight='bold')

# Plot training accuracy
for name, history in histories.items():
    axes[0, 0].plot(history.history['accuracy'], label=f'{name} (Train)', linestyle='-')
    axes[0, 0].plot(history.history['val_accuracy'], label=f'{name} (Val)', linestyle='--')

axes[0, 0].set_title('Training and Validation Accuracy')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Accuracy')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Plot training loss
for name, history in histories.items():
    axes[0, 1].plot(history.history['loss'], label=f'{name} (Train)', linestyle='-')
    axes[0, 1].plot(history.history['val_loss'], label=f'{name} (Val)', linestyle='--')

axes[0, 1].set_title('Training and Validation Loss')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Loss')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Plot final validation accuracy comparison
model_names = list(histories.keys())
final_val_acc = [max(history.history['val_accuracy']) for history in histories.values()]
final_train_acc = [max(history.history['accuracy']) for history in histories.values()]

x = np.arange(len(model_names))
width = 0.35

bars1 = axes[1, 0].bar(x - width/2, final_train_acc, width, label='Training', alpha=0.8)
bars2 = axes[1, 0].bar(x + width/2, final_val_acc, width, label='Validation', alpha=0.8)

axes[1, 0].set_title('Final Accuracy Comparison')
axes[1, 0].set_ylabel('Accuracy')
axes[1, 0].set_xticks(x)
axes[1, 0].set_xticklabels(model_names, rotation=45)
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# Add value labels on bars
for bars in [bars1, bars2]:
    for bar in bars:
        height = bar.get_height()
        axes[1, 0].text(bar.get_x() + bar.get_width()/2., height + 0.01,
                        f'{height:.3f}', ha='center', va='bottom', fontsize=9)

# Plot final validation loss comparison
final_val_loss = [min(history.history['val_loss']) for history in histories.values()]
final_train_loss = [min(history.history['loss']) for history in histories.values()]

bars3 = axes[1, 1].bar(x - width/2, final_train_loss, width, label='Training', alpha=0.8)
bars4 = axes[1, 1].bar(x + width/2, final_val_loss, width, label='Validation', alpha=0.8)

axes[1, 1].set_title('Final Loss Comparison')
axes[1, 1].set_ylabel('Loss')
axes[1, 1].set_xticks(x)
axes[1, 1].set_xticklabels(model_names, rotation=45)
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

# Add value labels on bars
for bars in [bars3, bars4]:
    for bar in bars:
        height = bar.get_height()
        axes[1, 1].text(bar.get_x() + bar.get_width()/2., height + 0.01,
                        f'{height:.3f}', ha='center', va='bottom', fontsize=9)

plt.tight_layout()
plt.show()

# Print summary statistics
print("\nTRAINING SUMMARY STATISTICS")
print("="*60)
print(f"{'Model':<15} {'Final Train Acc':<15} {'Final Val Acc':<15} {'Final Train Loss':<15} {'Final Val Loss':<15}")
print("-" * 80)

for name, history in histories.items():
    train_acc = max(history.history['accuracy'])
    val_acc = max(history.history['val_accuracy'])
    train_loss = min(history.history['loss'])
    val_loss = min(history.history['val_loss'])
    
    print(f"{name:<15} {train_acc:<15.4f} {val_acc:<15.4f} {train_loss:<15.4f} {val_loss:<15.4f}")

print("="*60)


### Overfitting and Underfitting Analysis

Let's analyze the training curves to identify overfitting and underfitting patterns.


In [None]:
# Analyze overfitting and underfitting
print("OVERFITTING AND UNDERFITTING ANALYSIS")
print("="*60)

def analyze_fitting_pattern(history, model_name):
    """Analyze fitting patterns from training history."""
    train_acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    train_loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    # Calculate gaps
    acc_gap = max(train_acc) - max(val_acc)
    loss_gap = min(val_loss) - min(train_loss)
    
    # Determine fitting pattern
    if acc_gap > 0.1 or loss_gap > 0.1:
        pattern = "OVERFITTING"
        severity = "High" if acc_gap > 0.2 or loss_gap > 0.2 else "Moderate"
    elif acc_gap < 0.02 and loss_gap < 0.02:
        pattern = "GOOD FIT"
        severity = "Good"
    else:
        pattern = "SLIGHT OVERFITTING"
        severity = "Low"
    
    # Check for underfitting
    if max(val_acc) < 0.7 and max(train_acc) < 0.8:
        pattern = "UNDERFITTING"
        severity = "High"
    
    return {
        'model': model_name,
        'pattern': pattern,
        'severity': severity,
        'acc_gap': acc_gap,
        'loss_gap': loss_gap,
        'final_train_acc': max(train_acc),
        'final_val_acc': max(val_acc),
        'final_train_loss': min(train_loss),
        'final_val_loss': min(val_loss)
    }

# Analyze each model
analyses = []
for name, history in histories.items():
    analysis = analyze_fitting_pattern(history, name)
    analyses.append(analysis)

# Display analysis results
print(f"{'Model':<15} {'Pattern':<20} {'Severity':<10} {'Acc Gap':<10} {'Loss Gap':<10}")
print("-" * 80)

for analysis in analyses:
    print(f"{analysis['model']:<15} {analysis['pattern']:<20} {analysis['severity']:<10} "
          f"{analysis['acc_gap']:<10.4f} {analysis['loss_gap']:<10.4f}")

print("\nDETAILED ANALYSIS:")
print("-" * 40)

for analysis in analyses:
    print(f"\n{analysis['model']}:")
    print(f"  Pattern: {analysis['pattern']}")
    print(f"  Severity: {analysis['severity']}")
    print(f"  Final Training Accuracy: {analysis['final_train_acc']:.4f}")
    print(f"  Final Validation Accuracy: {analysis['final_val_acc']:.4f}")
    print(f"  Accuracy Gap: {analysis['acc_gap']:.4f}")
    print(f"  Final Training Loss: {analysis['final_train_loss']:.4f}")
    print(f"  Final Validation Loss: {analysis['final_val_loss']:.4f}")
    print(f"  Loss Gap: {analysis['loss_gap']:.4f}")

# Recommendations
print("\nRECOMMENDATIONS:")
print("-" * 40)

for analysis in analyses:
    print(f"\n{analysis['model']}:")
    if analysis['pattern'] == "OVERFITTING":
        print("  - Consider adding more dropout")
        print("  - Reduce model complexity")
        print("  - Increase data augmentation")
        print("  - Use early stopping")
    elif analysis['pattern'] == "UNDERFITTING":
        print("  - Increase model complexity")
        print("  - Train for more epochs")
        print("  - Reduce regularization")
        print("  - Check learning rate")
    elif analysis['pattern'] == "GOOD FIT":
        print("  - Model is well-balanced")
        print("  - Consider fine-tuning hyperparameters")
    else:
        print("  - Monitor training closely")
        print("  - Consider slight adjustments")

print("="*60)


### Summary and Next Steps

Let's summarize the model development phase and prepare for hyperparameter tuning.

**Model Development Summary:**
1. **Architecture Design**: Created 3 CNN architectures with increasing complexity
2. **Model Training**: Trained all models with proper callbacks and monitoring
3. **Performance Comparison**: Analyzed training curves and model performance
4. **Overfitting Analysis**: Identified fitting patterns and provided recommendations

**Key Achievements:**
✅ **Simple CNN**: Basic architecture with 2 conv layers
✅ **Medium CNN**: Intermediate architecture with 3 conv layers + batch normalization
✅ **Complex CNN**: Advanced architecture with 4 conv layers + global pooling
✅ **Training Curves**: Comprehensive visualization of training progress
✅ **Overfitting Analysis**: Detailed analysis of fitting patterns

**Project Requirements Addressed:**
✅ **3 CNN Architectures**: Simple, Medium, and Complex models implemented
✅ **Incremental Complexity**: Each model is more complex than the previous
✅ **Architecture Justification**: Clear definition of layers, activations, pooling, dropout
✅ **Training with Optimizer**: Proper optimizer and loss function usage
✅ **Training Curves**: Visualization of loss and accuracy curves
✅ **Overfitting Analysis**: Discussion of overfitting and underfitting patterns

**Next Steps:**
- Hyperparameter tuning for the best performing model
- Comprehensive model evaluation on test set
- Misclassification analysis
- Final model selection and optimization
