# Exp_8: To build, train, test and evaluate a simple neural network on curated vector dataset. Build the model from scratch.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import seaborn as sns

In [None]:
# Input data and targets
input_data = [-1, 0, 1, 2, 3]
target_data = [1, 2, 3, 4, 5]  # Fixed to match input length
type(input_data)

In [None]:
# Convert to PyTorch tensors
X = torch.tensor(input_data, dtype=torch.float32).view(-1, 1)  # Reshape to column vector
y = torch.tensor(target_data, dtype=torch.float32).view(-1, 1)

In [None]:
type(X)

In [None]:
X.shape

In [None]:
print(f"Input (X): {X.flatten().tolist()}")
print(f"Target (y): {y.flatten().tolist()}")
print(f"Input shape: {X.shape}")
print(f"Target shape: {y.shape}")

# NEURAL NETWORK ARCHITECTURE

In [None]:
class MyNeuralNetwork(nn.Module):
    """
    A simple neural network with multiple layers.
    Architecture: Input -> Hidden1 -> Hidden2 -> Output
    """
    def __init__(self, input_size=1, hidden1_size=8, hidden2_size=4, output_size=1):
        super(MyNeuralNetwork, self).__init__()
        
        # Define layers
        self.layer1 = nn.Linear(input_size, hidden1_size)    # Input to first hidden layer
        self.layer2 = nn.Linear(hidden1_size, hidden2_size)  # First to second hidden layer
        self.layer3 = nn.Linear(hidden2_size, output_size)   # Second hidden to output layer
        
        # Activation functions
        self.relu = nn.ReLU()      # ReLU activation for hidden layers
        self.sigmoid = nn.Sigmoid() # Sigmoid activation (optional)
        
        # Store intermediate outputs for visualization
        self.layer_outputs = {}
        
    def forward(self, x):
        """
        Forward pass through the network
        """
        print(f"\n🔍 Forward Pass Details:")
        print(f"Input: {x.flatten().tolist()}")
        
        # Layer 1: Input -> Hidden1
        z1 = self.layer1(x)
        print(f"After Linear Layer 1 (before activation): {z1.detach().numpy().round(3)}")
        
        a1 = self.relu(z1)
        print(f"After ReLU Activation 1: {a1.detach().numpy().round(3)}")
        self.layer_outputs['hidden1'] = a1.detach()
        
        # Layer 2: Hidden1 -> Hidden2
        z2 = self.layer2(a1)
        print(f"After Linear Layer 2 (before activation): {z2.detach().numpy().round(3)}")
        
        a2 = self.relu(z2)
        print(f"After ReLU Activation 2: {a2.detach().numpy().round(3)}")
        self.layer_outputs['hidden2'] = a2.detach()
        
        # Layer 3: Hidden2 -> Output
        output = self.layer3(a2)
        print(f"Final Output: {output.detach().numpy().round(3)}")
        self.layer_outputs['output'] = output.detach()
        
        return output

In [None]:
# Define hyperparameters
HYPERPARAMETERS = {
    'learning_rate': 0.01,
    'epochs': 100,
    'hidden1_size': 8,
    'hidden2_size': 4,
    'weight_init': 'xavier_uniform'
}
print("Hyperparameters:")
for key, value in HYPERPARAMETERS.items():
    print(f"  {key}: {value}")

In [None]:
# Create model instance
model = MyNeuralNetwork(
    input_size=1, 
    hidden1_size=HYPERPARAMETERS['hidden1_size'],
    hidden2_size=HYPERPARAMETERS['hidden2_size'],
    output_size=1
)

In [None]:
# Initialize weights using Xavier uniform initialization
def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        torch.nn.init.zeros_(m.bias)

model.apply(init_weights)

print("Model Architecture:")
print(model)

In [None]:
# Display initial weights
print("\nInitial Weights:")
for name, param in model.named_parameters():
    print(f"{name}: {param.data.numpy().round(4)}")


In [None]:
# Define loss function (Mean Squared Error)
criterion = nn.MSELoss()
print("Loss Function: Mean Squared Error (MSE)")
print("MSE = (1/n) * Σ(predicted - actual)²")

In [None]:
# Define optimizer (Stochastic Gradient Descent)
optimizer = optim.SGD(model.parameters(), lr=HYPERPARAMETERS['learning_rate'])
print(f"Optimizer: SGD with learning rate = {HYPERPARAMETERS['learning_rate']}")

In [None]:
# Storage for tracking training metrics
loss_history = []
predictions_history = []
weight_history = {name: [] for name, _ in model.named_parameters()}

print("Starting training...")
print("Each epoch includes: Forward Pass -> Loss Calculation -> Backpropagation -> Weight Update")

In [None]:
# Training loop
for epoch in range(HYPERPARAMETERS['epochs']):
    # Forward pass
    predictions = model(X)
    
    # Calculate loss
    loss = criterion(predictions, y)
    loss_history.append(loss.item())
    predictions_history.append(predictions.detach().numpy().copy())
    
    # Store weights for visualization
    for name, param in model.named_parameters():
        weight_history[name].append(param.data.clone())
    
    # Backward pass and optimization
    optimizer.zero_grad()  # Clear gradients
    loss.backward()        # Compute gradients
    optimizer.step()       # Update weights
    
    # Print progress every 100 epochs
    if epoch % 100 == 0 or epoch < 5:
        print(f"\nEpoch {epoch}:")
        print(f"  Loss: {loss.item():.6f}")
        print(f"  Predictions: {predictions.detach().numpy().flatten().round(3)}")
        print(f"  Targets: {y.numpy().flatten()}")
        
        if epoch < 5:  # Show gradients for first few epochs
            print("  Gradients:")
            for name, param in model.named_parameters():
                if param.grad is not None:
                    print(f"    {name}: {param.grad.data.numpy().round(6)}")

print(f"\nTraining completed! Final loss: {loss_history[-1]:.6f}")

In [None]:
# Final predictions
final_predictions = model(X).detach().numpy().flatten()
targets_np = y.numpy().flatten()

In [None]:
# Calculate metrics
mse = mean_squared_error(targets_np, final_predictions)
mae = mean_absolute_error(targets_np, final_predictions)
r2 = r2_score(targets_np, final_predictions)
rmse = np.sqrt(mse)

print("Performance Metrics:")
print(f"  Mean Squared Error (MSE): {mse:.6f}")
print(f"  Root Mean Squared Error (RMSE): {rmse:.6f}")
print(f"  Mean Absolute Error (MAE): {mae:.6f}")
print(f"  R² Score: {r2:.6f}")

print("\nFinal Results:")
print("Input -> Prediction (Target)")
for i, (inp, pred, target) in enumerate(zip(input_data, final_predictions, targets_np)):
    print(f"  {inp} -> {pred:.3f} ({target})")


In [None]:
# Create comprehensive visualizations
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
fig.suptitle('Neural Network Learning Process Visualization', fontsize=16, fontweight='bold')

# 1. Loss Curve
axes[0, 0].plot(loss_history, 'b-', linewidth=2)
axes[0, 0].set_title('Training Loss Over Time')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Loss (MSE)')
axes[0, 0].grid(True, alpha=0.3)
axes[0, 0].set_yscale('log')

# 2. Predictions vs Targets
axes[0, 1].scatter(targets_np, final_predictions, color='red', s=100, alpha=0.7, label='Predictions')
axes[0, 1].plot([min(targets_np), max(targets_np)], [min(targets_np), max(targets_np)], 'k--', alpha=0.8, label='Perfect Prediction')
axes[0, 1].set_title('Predictions vs Targets')
axes[0, 1].set_xlabel('Target Values')
axes[0, 1].set_ylabel('Predicted Values')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# 3. Input vs Output Mapping
axes[0, 2].plot(input_data, targets_np, 'go-', label='Target', markersize=8, linewidth=2)
axes[0, 2].plot(input_data, final_predictions, 'ro-', label='Predicted', markersize=8, linewidth=2)
axes[0, 2].set_title('Input-Output Mapping')
axes[0, 2].set_xlabel('Input')
axes[0, 2].set_ylabel('Output')
axes[0, 2].legend()
axes[0, 2].grid(True, alpha=0.3)

# 4. Weight Evolution (Layer 1)
layer1_weights = np.array([w[0].numpy() for w in weight_history['layer1.weight']])
for i in range(min(4, layer1_weights.shape[1])):  # Show first 4 weights
    axes[1, 0].plot(layer1_weights[:, i], label=f'Weight {i+1}', linewidth=2)
axes[1, 0].set_title('Layer 1 Weight Evolution')
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('Weight Value')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# 5. Gradient Descent Visualization (simplified 2D representation)
# Sample some points along the loss surface for visualization
x_range = np.linspace(-2, 2, 100)
loss_surface = []
for x_val in x_range:
    # Create temporary predictions using a simple linear relationship
    temp_pred = x_val * np.array(input_data) + 2
    temp_loss = np.mean((temp_pred - target_data)**2)
    loss_surface.append(temp_loss)

axes[1, 1].plot(x_range, loss_surface, 'b-', linewidth=2, alpha=0.7, label='Loss Surface')
axes[1, 1].scatter([0], [loss_history[-1]], color='red', s=100, label='Final Position', zorder=5)
axes[1, 1].set_title('Gradient Descent Concept')
axes[1, 1].set_xlabel('Parameter Space (Simplified)')
axes[1, 1].set_ylabel('Loss')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

# 6. Learning Rate Effect Visualization
epochs_sample = np.arange(0, min(500, len(loss_history)))
axes[1, 2].plot(epochs_sample, loss_history[:len(epochs_sample)], 'g-', linewidth=2)
axes[1, 2].set_title(f'Learning Rate Effect (LR={HYPERPARAMETERS["learning_rate"]})')
axes[1, 2].set_xlabel('Epoch')
axes[1, 2].set_ylabel('Loss')
axes[1, 2].grid(True, alpha=0.3)
axes[1, 2].set_yscale('log')

plt.tight_layout()
plt.show()

In [None]:
# ==========================================
# 9. NETWORK ACTIVATION VISUALIZATION
# ==========================================
print("\n🔍 STEP 9: NETWORK ACTIVATION ANALYSIS")
print("-" * 40)

# Perform one final forward pass to get layer outputs
_ = model(X)
layer_outputs = model.layer_outputs

# Visualize layer activations
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
fig.suptitle('Neural Network Layer Activations', fontsize=14, fontweight='bold')

# Hidden Layer 1 activations
im1 = axes[0].imshow(layer_outputs['hidden1'].T, cmap='viridis', aspect='auto')
axes[0].set_title('Hidden Layer 1 Activations')
axes[0].set_xlabel('Input Samples')
axes[0].set_ylabel('Neurons')
plt.colorbar(im1, ax=axes[0])

# Hidden Layer 2 activations
im2 = axes[1].imshow(layer_outputs['hidden2'].T, cmap='viridis', aspect='auto')
axes[1].set_title('Hidden Layer 2 Activations')
axes[1].set_xlabel('Input Samples')
axes[1].set_ylabel('Neurons')
plt.colorbar(im2, ax=axes[1])

# Output layer
axes[2].plot(range(len(input_data)), layer_outputs['output'].numpy().flatten(), 'ro-', linewidth=2, markersize=8, label='Network Output')
axes[2].plot(range(len(input_data)), targets_np, 'go-', linewidth=2, markersize=8, label='Target Output')
axes[2].set_title('Output Layer')
axes[2].set_xlabel('Input Samples')
axes[2].set_ylabel('Output Value')
axes[2].legend()
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# ==========================================
# 10. KEY CONCEPTS EXPLANATION
# ==========================================
print("\n📚 STEP 10: KEY CONCEPTS EXPLAINED")
print("-" * 40)

print("""
🎯 KEY NEURAL NETWORK CONCEPTS:

1. FORWARD PASS:
   - Data flows from input through hidden layers to output
   - Each layer applies: output = activation(weights × input + bias)
   - ReLU activation: max(0, x) - introduces non-linearity

2. LOSS FUNCTION (MSE):
   - Measures difference between predictions and targets
   - MSE = (1/n) × Σ(predicted - actual)²
   - Guides the learning process

3. BACKPROPAGATION:
   - Calculates gradients of loss with respect to each weight
   - Uses chain rule to propagate errors backward
   - Tells us how to adjust weights to reduce loss

4. GRADIENT DESCENT:
   - Updates weights in direction that reduces loss
   - new_weight = old_weight - learning_rate × gradient
   - Learning rate controls step size

5. ACTIVATION FUNCTIONS:
   - ReLU: Introduces non-linearity, allows learning complex patterns
   - Without activation, network would be just linear transformations

6. HYPERPARAMETERS:
   - Learning rate: Too high = unstable, too low = slow learning
   - Network size: More neurons = more capacity but risk of overfitting
   - Epochs: Number of complete passes through data
""")

print("\n✅ TRAINING COMPLETE!")
print("The neural network has successfully learned to map inputs to targets!")
print(f"Final loss: {loss_history[-1]:.6f}")
print(f"The network can now predict outputs for new inputs based on the learned pattern.")