# Lab 4
Let's go back to using the Lab1 data and see if we are now able to improve on what linear regression was not able to do.

In [1]:
# Import the necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns
sns.set(rc={"figure.figsize": (10, 6)})
import matplotlib.pyplot as plt
import scipy.stats as stats

plt.style.use('ggplot') # setting the plot style
%matplotlib inline
from __future__ import print_function, unicode_literals, division

# ignore various warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
#import drive

# from google.colab import drive
# drive.mount('/content/drive')


ModuleNotFoundError: No module named 'google.colab'

In [3]:
# planetData = pd.read_csv("/content/drive/MyDrive/Data-intensive/oec.csv")

planetData = pd.read_csv("oec.csv")

In [None]:
planetData.columns

In [None]:
planetData['PeriodYears'] = planetData['PeriodDays']/365.25
mult_cols = ['SemiMajorAxisAU','HostStarMassSlrMass',
             'PlanetaryMassJpt','PeriodYears']   # take X to be 6 columns and y to be 1 column
mult_features = ['SemiMajorAxisAU','HostStarMassSlrMass', 'PlanetaryMassJpt']
planets_selectedFeatures = planetData[mult_cols].dropna()

X_mult = planets_selectedFeatures[mult_features]
y_mult = planets_selectedFeatures['PeriodYears']   # y is the PeriodYears column
print("Number of observations: ", X_mult.shape[0])
print("Number of values for the response variable y: ", y_mult.shape[0])

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_mult, y_mult, test_size=0.3, random_state=42)
# Split arrays or matrices into random train and test subsets
# -- use random state for debugging purposes
print("training data size:",X_train.shape)
print("testing data size:",X_test.shape)
print ("total data size:", X_mult.shape)
print("size of y_mult:", y_mult.shape)
print("size of y_mult_train: ", y_train.shape)
print("size of y_mult_test: ", y_test.shape)

In [7]:
from sklearn.preprocessing import StandardScaler

# Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Based on the previous examples, test a neural network to predict y_test and calculate r2.

*  Try different configurations of layers and neurons and get a good result.
*  With your final neural network configuration. Experiment with different learning rates (0.1, 0.01, 0.001) and batch sizes (8, 16, 32, 64). How do these hyperparameters affect the training process, loss, and final model accuracy? Run experiments by varying the learning rate and batch size, then compare the results in terms of convergence, accuracy, and training speed.

**STEP 1 - TRY DIFFERENT CONFIGURATIONS**

**Layer Configurations**: The variable layer_configs defines different network architectures:

*   [64]: A single hidden layer with 64 neurons.
*   [128, 64]: Two hidden layers with 128 neurons in the first hidden layer and 64 in the second.
*   [128, 64, 32]: Three hidden layers with 128, 64, and 32 neurons, respectively.

**Learning Rates and Batch Sizes:** The network is trained using various learning rates (0.1, 0.01, 0.001) and batch sizes (8, 16, 32, 64) to experiment with the effect of these hyperparameters on training time, accuracy, and loss.

**Dynamic Layer Construction**: The architecture of the network is dynamically created based on the number of layers and neurons specified in the layer_config variable.

**Training Loop**: The training loop runs for a fixed number of epochs (100), and training/validation losses are printed every 10 epochs.

In [None]:
# Import the necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import r2_score

# Define the neural network class with flexible architecture
class PlanetNet(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(PlanetNet, self).__init__()

        layers = []
        # Create hidden layers dynamically
        for i in range(len(hidden_sizes)):
            if i == 0:
                layers.append(nn.Linear(input_size, hidden_sizes[i]))  # First layer connects input to first hidden layer
            else:
                layers.append(nn.Linear(hidden_sizes[i-1], hidden_sizes[i]))  # Hidden layer to hidden layer

            layers.append(nn.ReLU())  # Add ReLU activation after each hidden layer

        layers.append(nn.Linear(hidden_sizes[-1], output_size))  # Output layer
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

# Function to train the neural network
def train_model(model, criterion, optimizer, train_loader, val_loader, epochs=100):
    for epoch in range(epochs):
        model.train()  # Set the model to training mode
        train_loss = 0.0
        for batch_x, batch_y in train_loader:
            optimizer.zero_grad()  # Clear gradients
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()  # Backpropagation
            optimizer.step()  # Gradient descent
            train_loss += loss.item()

        # Validation loop
        model.eval()  # Set the model to evaluation mode
        val_loss = 0.0
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                val_outputs = model(batch_x)
                val_loss += criterion(val_outputs, batch_y).item()

        if epoch % 10 == 0:
            print(f'Epoch {epoch}/{epochs}, Train Loss: {train_loss / len(train_loader)}, Val Loss: {val_loss / len(val_loader)}')

# Function to evaluate the model and calculate R²
def evaluate_model(model, X_test_tensor, y_test_tensor):
    model.eval()
    with torch.no_grad():
        predictions = model(X_test_tensor)
        r2 = r2_score(y_test_tensor.cpu().numpy(), predictions.cpu().numpy())
        print(f'R² Score: {r2:.4f}')
        return r2

# Convert the training and testing data to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

# Hyperparameter settings
learning_rates = [0.1, 0.01, 0.001]
batch_sizes = [8, 16, 32, 64]
layer_configs = [
    [64],        # 1 hidden layer with 64 neurons
    [128, 64],   # 2 hidden layers with 128 and 64 neurons
    [128, 64, 32]  # 3 hidden layers with 128, 64, and 32 neurons
]

for layer_config in layer_configs:
    for lr in learning_rates:
        for batch_size in batch_sizes:
            print(f"\nTraining with layers {layer_config}, learning rate {lr}, and batch size {batch_size}...\n")

            # Create DataLoader for training and validation
            train_loader = torch.utils.data.DataLoader(
                torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor),
                batch_size=batch_size, shuffle=True
            )
            val_loader = torch.utils.data.DataLoader(
                torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor),
                batch_size=batch_size, shuffle=False
            )

            # Initialize the model, loss function, and optimizer
            model = PlanetNet(input_size=3, hidden_sizes=layer_config, output_size=1)
            criterion = nn.MSELoss()  # Mean Squared Error for regression
            optimizer = optim.Adam(model.parameters(), lr=lr)

            # Train the model
            train_model(model, criterion, optimizer, train_loader, val_loader, epochs=100)

            # Evaluate the model
            evaluate_model(model, X_test_tensor, y_test_tensor)


**STEP 2- Experiment with the best architecture ([128, 64, 32]) and then vary:**

**Learning rates**: 0.1, 0.01, and 0.001
**Batch sizes**: 8, 16, 32, and 64
Graph and visualize the training loss, validation loss, and R² score for each combination of learning rate and batch size.



In [None]:
# Import the necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import r2_score

# Define the neural network class with the best architecture
class BestPlanetNet(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(BestPlanetNet, self).__init__()

        layers = []
        # Create hidden layers dynamically
        for i in range(len(hidden_sizes)):
            if i == 0:
                layers.append(nn.Linear(input_size, hidden_sizes[i]))  # First layer connects input to first hidden layer
            else:
                layers.append(nn.Linear(hidden_sizes[i-1], hidden_sizes[i]))  # Hidden layer to hidden layer

            layers.append(nn.ReLU())  # Add ReLU activation after each hidden layer

        layers.append(nn.Linear(hidden_sizes[-1], output_size))  # Output layer
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

# Function to train the neural network
def train_best_model(model, criterion, optimizer, train_loader, val_loader, epochs=100):
    train_losses = []
    val_losses = []
    for epoch in range(epochs):
        model.train()  # Set the model to training mode
        train_loss = 0.0
        for batch_x, batch_y in train_loader:
            optimizer.zero_grad()  # Clear gradients
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()  # Backpropagation
            optimizer.step()  # Gradient descent
            train_loss += loss.item()

        # Validation loop
        model.eval()  # Set the model to evaluation mode
        val_loss = 0.0
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                val_outputs = model(batch_x)
                val_loss += criterion(val_outputs, batch_y).item()

        # Store losses for visualization
        train_losses.append(train_loss / len(train_loader))
        val_losses.append(val_loss / len(val_loader))

        if epoch % 10 == 0:
            print(f'Epoch {epoch}/{epochs}, Train Loss: {train_loss / len(train_loader)}, Val Loss: {val_loss / len(val_loader)}')

    return train_losses, val_losses

# Function to evaluate the model and calculate R²
def evaluate_best_model(model, X_test_tensor, y_test_tensor):
    model.eval()
    with torch.no_grad():
        predictions = model(X_test_tensor)
        r2 = r2_score(y_test_tensor.cpu().numpy(), predictions.cpu().numpy())
        print(f'R² Score: {r2:.4f}')
        return predictions, r2

# Function to plot training and validation losses
def plot_losses(train_losses, val_losses, title):
    plt.figure(figsize=(10, 6))
    plt.plot(train_losses, label='Training Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title(title)
    plt.legend()
    plt.grid(True)
    plt.show()

# Function to plot true vs predicted values
def plot_true_vs_predicted(y_true, y_pred, title):
    plt.figure(figsize=(10, 6))
    plt.scatter(y_true, y_pred, alpha=0.5, color='blue')
    plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'r--', lw=2)  # Reference line for perfect predictions
    plt.xlabel('True Values')
    plt.ylabel('Predicted Values')
    plt.title(title)
    plt.grid(True)
    plt.show()

# Function to conduct the full experiment
def run_experiment(hidden_sizes, learning_rates, batch_sizes, epochs):
    # Convert the training and testing data to tensors
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
    y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

    # Loop over learning rates and batch sizes
    for lr in learning_rates:
        for batch_size in batch_sizes:
            print(f"\nTraining with learning rate {lr} and batch size {batch_size}...\n")

            # Create DataLoader for training and validation
            train_loader = torch.utils.data.DataLoader(
                torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor),
                batch_size=batch_size, shuffle=True
            )
            val_loader = torch.utils.data.DataLoader(
                torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor),
                batch_size=batch_size, shuffle=False
            )

            # Initialize the model, loss function, and optimizer
            model = BestPlanetNet(input_size=3, hidden_sizes=hidden_sizes, output_size=1)
            criterion = nn.MSELoss()  # Mean Squared Error for regression
            optimizer = optim.Adam(model.parameters(), lr=lr)

            # Train the model
            train_losses, val_losses = train_best_model(model, criterion, optimizer, train_loader, val_loader, epochs=epochs)

            # Evaluate the model
            predictions, r2 = evaluate_best_model(model, X_test_tensor, y_test_tensor)

            # Plot losses and true vs predicted values
            plot_losses(train_losses, val_losses, f"Loss: LR={lr}, Batch Size={batch_size}")
            plot_true_vs_predicted(y_test_tensor.cpu().numpy(), predictions.cpu().numpy(), f"True vs Predicted: LR={lr}, Batch Size={batch_size}")

# Hyperparameters for the experiment
hidden_sizes = [128, 64, 32]  # Best architecture
learning_rates = [0.1, 0.01, 0.001]
batch_sizes = [8, 16, 32, 64]
epochs = 100

# Run the experiment
run_experiment(hidden_sizes, learning_rates, batch_sizes, epochs)
