# Lesson 4: Deep Learning and Neural Networks

Dive deep into neural networks and modern deep learning techniques.

## What You'll Learn
- Neural network architecture and theory
- Building networks with TensorFlow/Keras
- Convolutional Neural Networks (CNNs)
- Training strategies and optimization
- Transfer learning

## Neural Network Fundamentals

A neural network consists of:
- **Neurons**: Basic units that process information
- **Layers**: Groups of neurons
  - Input layer: Receives data
  - Hidden layers: Process information
  - Output layer: Produces predictions
- **Weights**: Learnable parameters
- **Activation functions**: Introduce non-linearity (ReLU, sigmoid, tanh)

### The Forward Pass
```
output = activation(weights × input + bias)
```

## Building a Simple Neural Network

In [None]:
# Install required packages:
# pip install tensorflow numpy matplotlib scikit-learn

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Generate non-linear dataset
X, y = make_moons(n_samples=1000, noise=0.1, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print(f"Training samples: {len(X_train)}")
print(f"Testing samples: {len(X_test)}")
print(f"Feature shape: {X_train.shape[1]}")

## Creating the Model Architecture

In [None]:
# Build neural network
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(2,)),
    layers.Dropout(0.2),  # Regularization
    layers.Dense(32, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # Binary classification
])

# Compile model
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Display architecture
model.summary()

## Training the Network

In [None]:
# Train model with validation
history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    verbose=0  # Suppress output for notebook
)

# Evaluate on test set
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")
print(f"Test Loss: {test_loss:.4f}")

## Visualizing Training Progress

In [None]:
# Plot training history
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Accuracy
ax1.plot(history.history['accuracy'], label='Training')
ax1.plot(history.history['val_accuracy'], label='Validation')
ax1.set_title('Model Accuracy')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Accuracy')
ax1.legend()
ax1.grid(True)

# Loss
ax2.plot(history.history['loss'], label='Training')
ax2.plot(history.history['val_loss'], label='Validation')
ax2.set_title('Model Loss')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Loss')
ax2.legend()
ax2.grid(True)

plt.tight_layout()
plt.show()

## Convolutional Neural Networks (CNNs)

CNNs are specialized for processing grid-like data (images):
- **Convolutional layers**: Detect features (edges, textures, patterns)
- **Pooling layers**: Reduce spatial dimensions
- **Fully connected layers**: Make final predictions

In [None]:
# Load MNIST dataset
(X_train_img, y_train_img), (X_test_img, y_test_img) = keras.datasets.mnist.load_data()

# Preprocess
X_train_img = X_train_img.reshape(-1, 28, 28, 1).astype('float32') / 255
X_test_img = X_test_img.reshape(-1, 28, 28, 1).astype('float32') / 255

# One-hot encode labels
y_train_img = keras.utils.to_categorical(y_train_img, 10)
y_test_img = keras.utils.to_categorical(y_test_img, 10)

print(f"Training images: {X_train_img.shape}")
print(f"Image shape: {X_train_img.shape[1:]}")

## Building a CNN

In [None]:
# Create CNN architecture
cnn_model = keras.Sequential([
    # First convolutional block
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.BatchNormalization(),
    
    # Second convolutional block
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.BatchNormalization(),
    
    # Third convolutional block
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    
    # Flatten and dense layers
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(10, activation='softmax')  # 10 digits
])

cnn_model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

cnn_model.summary()

## Training the CNN

In [None]:
# Train CNN (using subset for speed)
cnn_history = cnn_model.fit(
    X_train_img[:10000], y_train_img[:10000],
    epochs=10,
    batch_size=128,
    validation_split=0.2,
    verbose=1
)

# Evaluate
test_loss, test_acc = cnn_model.evaluate(X_test_img, y_test_img, verbose=0)
print(f"\nTest Accuracy: {test_acc * 100:.2f}%")

## Advanced Techniques

### Learning Rate Scheduling

In [None]:
# Define learning rate schedule
def lr_schedule(epoch, lr):
    """Decrease learning rate as training progresses."""
    if epoch > 5:
        lr = lr * 0.9
    return lr

lr_callback = keras.callbacks.LearningRateScheduler(lr_schedule)

# Early stopping to prevent overfitting
early_stop = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

# Model checkpoint
checkpoint = keras.callbacks.ModelCheckpoint(
    'best_model.h5',
    monitor='val_accuracy',
    save_best_only=True
)

## Transfer Learning

Use pre-trained models as a starting point:

In [None]:
# Load pre-trained VGG16 (without top layers)
base_model = keras.applications.VGG16(
    weights='imagenet',
    include_top=False,
    input_shape=(224, 224, 3)
)

# Freeze base model layers
base_model.trainable = False

# Add custom top layers
transfer_model = keras.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(10, activation='softmax')  # 10 classes
])

transfer_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("Transfer learning model created!")
print(f"Total parameters: {transfer_model.count_params():,}")
print(f"Trainable parameters: {sum([tf.size(w).numpy() for w in transfer_model.trainable_weights]):,}")

## Advanced Architectures

### Residual Connections (ResNet-style)

In [None]:
def residual_block(x, filters):
    """Create a residual block."""
    # Main path
    y = layers.Conv2D(filters, (3, 3), padding='same')(x)
    y = layers.BatchNormalization()(y)
    y = layers.Activation('relu')(y)
    y = layers.Conv2D(filters, (3, 3), padding='same')(y)
    y = layers.BatchNormalization()(y)
    
    # Skip connection
    x = layers.Conv2D(filters, (1, 1), padding='same')(x)
    
    # Add
    out = layers.Add()([x, y])
    out = layers.Activation('relu')(out)
    
    return out

# Build model with residual blocks
inputs = keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, (3, 3), padding='same')(inputs)
x = residual_block(x, 32)
x = layers.MaxPooling2D((2, 2))(x)
x = residual_block(x, 64)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(128, activation='relu')(x)
outputs = layers.Dense(10, activation='softmax')(x)

resnet_model = keras.Model(inputs=inputs, outputs=outputs)
print("ResNet-style model created!")

## Exercise

Build and train a CNN for CIFAR-10 dataset:
1. Load CIFAR-10 (10 classes of 32x32 color images)
2. Design a CNN architecture with at least 3 convolutional blocks
3. Implement data augmentation (rotation, flipping, zoom)
4. Use callbacks for learning rate scheduling and early stopping
5. Achieve >70% test accuracy
6. Visualize training progress and some predictions

In [None]:
# Your code here
# Hint: Use keras.datasets.cifar10.load_data()
# Hint: Use keras.preprocessing.image.ImageDataGenerator for augmentation

