In [5]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import math
import plotnine as p9
import statistics
import time
import pandas as pd
import numpy as np

In [6]:
batch_size = 200
num_epochs = 250

In [7]:
class MultivariateData(Dataset):

  def __init__(self):
    np.random.seed(100)
    
    n = 10000
    # Generate data using pandas and numpy
    df = pd.DataFrame({
        'x1': np.random.randn(n),
        'x2': np.random.randn(n)
    })
    
    # Calculate y = 2 * x1 + 3 * x2 + noise
    df['y'] = 2 * df['x1'] + 3 * df['x2'] + np.random.randn(n)
    
    # Convert to torch tensors
    self.x_data = torch.tensor(df[['x1', 'x2']].values, dtype=torch.float32)
    self.y_data = torch.tensor(df['y'].values, dtype=torch.float32).view(-1, 1)

    self.n_samples = self.x_data.shape[0]

  # Implement indexing so that dataset[i] can be used to get ith sample
  def __getitem__(self, index):
    return self.x_data[index], self.y_data[index]

  # Implement len(dataset) to return the size
  def __len__(self):
    return self.n_samples

In [None]:
# Create dataset
dataset = MultivariateData()

# get first sample and unpack
first_data = dataset[0]
features, labels = first_data
print(features, labels)

In [None]:
# Load whole dataset with DataLoader
# shuffle: shuffle data, good for training
train_loader = DataLoader(dataset = dataset,
                          batch_size = batch_size,
                          shuffle = True)

# Convert to an iterator and print at one batch
data_iterator = iter(train_loader)
data = next(data_iterator)
features, labels = data
print(features, "\n", labels)

In [10]:
class MultipleRegressionModel(nn.Module):
  def __init__(self):
    super(MultipleRegressionModel, self).__init__()
    self.linear = nn.Linear(2, 1)
        
  def forward(self, x):
    return self.linear(x)

model = MultipleRegressionModel()

criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.001)

In [None]:
total_samples = len(dataset)
n_iterations = math.ceil(total_samples / batch_size)
print(total_samples, n_iterations)

loss_values = []  # List to store loss values

start_time = time.time()  # Record start time

for epoch in range(num_epochs):
  for i, (inputs, labels) in enumerate(train_loader):
    
    # Forward pass
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    
    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # zero grad before new step
    optimizer.zero_grad()
    
    # if (i + 1) % 10 == 0:
    #   print(f'Epoch: {epoch + 1}/{num_epochs}, Step {i + 1}/{n_iterations} | Loss: {loss.item()}')

  # Store loss value
  loss_values.append(loss.item())

end_time = time.time()  # Record end time

training_time = end_time - start_time
print(f'Batch size: {batch_size} | Training time: {training_time:.2f} seconds')


In [None]:
data = [(5, 51.9),
        (10, 28.39),
        (25, 14.42),
        (50, 9.56),
        (100, 7.47),
        (200, 5.95),
        (300, 6.46),
        (400, 6.32),
        (500, 6.58),
        (750, 6.73),
        (1000, 7.66),
        (1500, 7.44),
        (2000, 7.99),
        (3000, 9.77)]
df_times = pd.DataFrame(data, columns=['Batch Size', 'Time'])

p = (p9.ggplot(df_times, p9.aes(x = 'Batch Size', y = 'Time')) +
  p9.geom_point(size = 4, color = 'firebrick') +
  p9.labs(x = 'Batch Size', y = 'Training Time (s)') +
  p9.theme_classic() +
  p9.theme(figure_size = (8, 5)))
p.show()

In [None]:
p2 = (p9.ggplot(p9.aes(x = range(1, num_epochs + 1), y = loss_values)) +
      p9.geom_line(size = 1.5, color = 'firebrick') +
      p9.labs(x = 'Epoch', y = 'Loss') +
      p9.theme_classic() +
      p9.theme(figure_size = (8, 5)))
p2.show()

In [None]:
mean_loss = statistics.mean(loss_values[100:])
print(f'Mean loss for epochs 100 and above: {mean_loss}')

In [None]:
with torch.no_grad():
  # Define the input data pairs
  pred_X = [[0, 0],   # 0 
            [1, 1],   # 2 + 3 = 5
            [2, 2],   # 4 + 6 = 10
            [10, 20]] # 20 + 60 = 80
  input_data = torch.tensor(pred_X, dtype=torch.float32)

  # Generate predictions using the trained model
  predictions = model(input_data).numpy()

  # Print the predictions
  print(np.round(predictions, 2))