In [1]:
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import Dataset, TensorDataset, DataLoader
from torch.utils.data.dataset import random_split

> ### Rethinking the Training Loop

In [2]:
def make_train_step(model, loss_fn, optimizer):
    # Builds function that performs a step in the train loop
    def perform_train_step(x, y):
        # Set model to TRAIN mode
        model.train()

        # Compute prediction and loss
        yhat = model(x)

        # Compute loss
        loss = loss_fn(yhat, y)
        
        # Perform backward pass
        loss.backward()

        # Update parameters
        optimizer.step()
        optimizer.zero_grad()

        # Return the loss
        return loss.item()
    
    # Return the function that will be called inside the train loop
    return perform_train_step

In [3]:
%run -i ../data_preparation/v0.py

In [4]:
%%writefile ../model_configuration/v1.py

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Set learning rate
lr = 0.001

torch.manual_seed(42)

# Create model and send it to device
model = torch.nn.Sequential(torch.nn.Linear(1, 1)).to(device)

# Defines a optimizer to update the parameters of the model
optimizer = optim.SGD(model.parameters(), lr=lr)

# Define the MSE loss function
loss_fn = torch.nn.MSELoss(reduction='mean')

# Create the train_step function for our model, loss function and optimizer
train_step = make_train_step(model, loss_fn, optimizer)

Overwriting ../model_configuration/v1.py


In [5]:
%run -i ../model_configuration/v1.py

In [6]:
%%writefile ../model_training/v1.py

# Define number of epochs
n_epochs = 1000

losses = [] 

# For each epoch ...
for epoch in range(n_epochs):
    # Perform one train step and record the corresponding loss
    loss = train_step(x_train_tensor, y_train_tensor)
    losses.append(loss)

Overwriting ../model_training/v1.py


In [7]:
%run -i ../model_training/v1.py

In [8]:
model.state_dict()

OrderedDict([('0.weight', tensor([[1.1880]], device='cuda:0')),
             ('0.bias', tensor([1.3553], device='cuda:0'))])

In [9]:
len(losses)

1000

> ### Dataset

In [10]:
class CustomDataset(Dataset):
    def __init__(self, x_tensor, y_tensor):
        self.x = x_tensor
        self.y = y_tensor

    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)
    
x_train_tensor = torch.as_tensor(x_train, dtype=torch.float32)
y_train_tensor = torch.as_tensor(y_train, dtype=torch.float32)

train_data = CustomDataset(x_train_tensor, y_train_tensor)
train_data[0]

(tensor([0.7713]), tensor([2.4745]))

In [11]:
train_data[8:16]

(tensor([[0.4722],
         [0.9696],
         [0.1220],
         [0.7751],
         [0.8022],
         [0.7296],
         [0.0977],
         [0.1849]]),
 tensor([[1.9857],
         [2.8401],
         [1.2406],
         [2.4936],
         [2.6229],
         [2.5751],
         [1.4417],
         [1.5888]]))

In [12]:
!export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python

In [13]:
from torch.utils.tensorboard import SummaryWriter

> ### DataLoader

In [14]:
train_loader = DataLoader(
    dataset=train_data,
    batch_size=16,
    shuffle=True
)

In [15]:
%%writefile ../data_preparation/v1.py

# our data was in Numpy arrays, but we need to transform them into PyTorch tensors
x_train_tensor = torch.as_tensor(x_train, dtype=torch.float32)
y_train_tensor = torch.as_tensor(y_train, dtype=torch.float32)

# Builds Dataset
train_data = TensorDataset(x_train_tensor, y_train_tensor)

# Builds DataLoader
train_loader = DataLoader(
    dataset=train_data,
    batch_size=16,
    shuffle=True)

Overwriting ../data_preparation/v1.py


In [16]:
%run -i ../data_preparation/v1.py

In [17]:
%run -i ../model_configuration/v1.py

In [18]:
%%writefile ../model_training/v2.py

# Incorporate the mini-batch gradient descent logic into our model training part of the code
# Define number of epochs
n_epochs = 1000

losses = []

# For each epoch ...
for epoch in range(n_epochs):
    # Inner loop: for each mini-batch
    mini_batch_losses = []
    for x_batch, y_batch in train_loader:
        # The dataset 'lives' in the CPu, so do our mini-batches
        # therefore, we need to send them to the device where the model lives
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)

        # Perform one train step and record the corresponding loss
        loss_mini_batch = train_step(x_batch, y_batch)
        mini_batch_losses.append(loss_mini_batch)

    # Compute the average loss for the epoch
    loss = np.mean(mini_batch_losses)
    losses.append(loss)

Overwriting ../model_training/v2.py


In [19]:
%run -i ../model_training/v2.py

In [20]:
print(model.state_dict())

OrderedDict([('0.weight', tensor([[1.5572]], device='cuda:0')), ('0.bias', tensor([1.2340], device='cuda:0'))])


In [21]:
%run -i ../data_preparation/v1.py
%run -i ../model_configuration/v1.py
%run -i ../model_training/v2.py
print(model.state_dict())

OrderedDict([('0.weight', tensor([[1.5572]], device='cuda:0')), ('0.bias', tensor([1.2340], device='cuda:0'))])


In [22]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def mini_batch(device, data_loader, step):
    mini_batch_losses = []
    for x_batch, y_batch in data_loader:
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)

        mini_batch_loss = step(x_batch, y_batch)
        mini_batch_losses.append(mini_batch_loss)
    
    return np.mean(mini_batch_losses)

In [23]:
%%writefile ../model_training/v3.py



# Defines number of epochs
n_epochs = 200

losses = []

for epoch in range(n_epochs):
    # inner loop
    loss = mini_batch(device, train_loader, train_step)
    losses.append(loss)

Overwriting ../model_training/v3.py


In [24]:
%run -i ../model_training/v3.py

In [25]:
print(model.state_dict())

OrderedDict([('0.weight', tensor([[1.6149]], device='cuda:0')), ('0.bias', tensor([1.2045], device='cuda:0'))])


In [26]:
%%writefile ../data_preparation/v2.py

torch.manual_seed(13)

# Builds tensors from numpy array BEFORE split
x_tensor = torch.as_tensor(x, dtype=torch.float32)
y_tensor = torch.as_tensor(y, dtype=torch.float32)

# Builds Dataset containing all the data points
dataset = TensorDataset(x_tensor, y_tensor)


# Performs the split
ratio = 0.8
n_total = len(dataset)
n_train = int(ratio * n_total)
n_val = n_total - n_train

train_data, val_data = random_split(dataset, [n_train, n_val])

# Builds DataLoader
train_loader = DataLoader(dataset=train_data, batch_size=16, shuffle=True)
val_loader = DataLoader(dataset=val_data, batch_size=16)

Overwriting ../data_preparation/v2.py


In [27]:
%run -i ../data_preparation/v2.py

> ### Evaluation

In [28]:
def make_val_step(model, loss_fn):
    # Builds function that performs a validation step in the validation loop
    def perform_val_step(x, y):
        # Sets model to EVAL mode
        model.eval()

        # Step 1 - Computes our model's predicted output
        # Forward pass
        # model_device = model.to(device)
        # x = x.to(device)
        yhat = model(x)

        # Step 2: Computes the loss
        loss = loss_fn(yhat, y)

        # There is no need to compute Steps 3 and 4,
        # since we don't update parameters during evaluation
        return loss.item()
    
    return perform_val_step

In [29]:
%%writefile ../model_configuration/v2.py

# Set the learning rate
learning_rate = 1e-3

torch.manual_seed(42)

# Create model
model = torch.nn.Sequential(
    torch.nn.Linear(1, 1)).to(device)

# Define optimizer to update the model's parameters
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Defines a MSE loss function
loss_fn = torch.nn.MSELoss()

# Create the train-step function for our model, loss function and optimizer
train_step = make_train_step(model, loss_fn, optimizer)

# Create the validation-step function for our model and loss function
val_step = make_val_step(model, loss_fn)

Overwriting ../model_configuration/v2.py


In [30]:
%run -i ../model_configuration/v2.py

In [31]:
%%writefile ../model_training/v4.py

# Defines number of epochs
n_epochs = 200

losses = []
val_losses = []

for epoch in range(n_epochs):
    # inner loop
    # Training
    loss = mini_batch(device, train_loader, train_step)
    losses.append(loss)

    # Validation - NO GRADIENTS IN VALIDATION
    with torch.no_grad():
        val_loss = mini_batch(device, val_loader, val_step)
        val_losses.append(val_loss)

Overwriting ../model_training/v4.py


In [32]:
%run -i ../model_training/v4.py

In [33]:
print(model.state_dict())

OrderedDict([('0.weight', tensor([[1.3462]], device='cuda:0')), ('0.bias', tensor([1.3083], device='cuda:0'))])


>> #### Plotting Losses

In [34]:
%run -i ../data_preparation/v2.py
%run -i ../model_configuration/v2.py
%run -i ../model_training/v4.py
model.state_dict()

OrderedDict([('0.weight', tensor([[1.3462]], device='cuda:0')),
             ('0.bias', tensor([1.3083], device='cuda:0'))])

> ### TensorBoard

In [35]:
%load_ext tensorboard

In [36]:
%tensorboard --logdir runs

Reusing TensorBoard on port 6006 (pid 3334127), started 0:25:30 ago. (Use '!kill 3334127' to kill it.)

In [37]:
writer = SummaryWriter('run/test')

2023-02-25 11:17:49.539516: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


>> #### add_graph

In [38]:
# Fetching a tuple of feature (dummy_x) and label (dummy_y) from the train_loader
dummy_x, dummy_y = next(iter(train_loader))

# Since our model was sent to device, we need to do the same with our dummy data
# Even here, both model and data need to be on the same device!
writer.add_graph(model, dummy_x.to(device))

>> #### add_scalars

In [39]:
writer.add_scalars(
    main_tag='Loss',
    tag_scalar_dict={'training': loss,
                        'validation': val_loss},
    global_step=epoch)

In [40]:
%run -i ../data_preparation/v2.py

In [41]:
%%writefile ../model_configuration/v3.py

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Set the learning rate
learning_rate = 1e-3

torch.manual_seed(42)

# Create model
model = torch.nn.Sequential(
    torch.nn.Linear(1, 1)).to(device)

# Define optimizer to update the model's parameters
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Defines a MSE loss function
loss_fn = torch.nn.MSELoss(reduction='mean')

# Create the train-step function for our model, loss function and optimizer
train_step = make_train_step(model, loss_fn, optimizer)

# Create the validation-step function for our model and loss function
val_step = make_val_step(model, loss_fn)

# Creates a SummaryWriter to interface with TensorBoard
writer = SummaryWriter('runs/simple_LR')

# Fetching a tuple of feature (dummy_x) and label (dummy_y) from the train_loader
dummy_x, dummy_y = next(iter(train_loader))
writer.add_graph(model, dummy_x.to(device))

Overwriting ../model_configuration/v3.py


In [42]:
%run -i ../model_configuration/v3.py

In [43]:
%%writefile ../model_training/v5.py

# Defines number of epochs
n_epochs = 200

losses = []
val_losses = []

for epoch in range(n_epochs):
    # inner loop
    # Training
    loss = mini_batch(device, train_loader, train_step)
    losses.append(loss)

    # Validation - NO GRADIENTS IN VALIDATION
    with torch.no_grad():
        val_loss = mini_batch(device, val_loader, val_step)
        val_losses.append(val_loss)

    # Writes to TensorBoard
    writer.add_scalars(
        main_tag='Loss',
        tag_scalar_dict={'training': loss,
                            'validation': val_loss},
        global_step=epoch)

# close the writer
writer.close()

Overwriting ../model_training/v5.py


In [44]:
%run -i ../model_training/v5.py

> ### Saving and Loading Models

>> #### Model State

>>> ##### Saving

In [45]:
checkpoint = {  'epoch': n_epochs,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': losses,
                'val_loss': val_losses}

torch.save(checkpoint, 'checkpoint_folder/checkpoint.pth')

>>> ##### Resuming Training

In [46]:
%run -i ../data_preparation/v2.py
%run -i ../model_configuration/v3.py

In [47]:
model.state_dict()

OrderedDict([('0.weight', tensor([[0.7645]], device='cuda:0')),
             ('0.bias', tensor([0.8300], device='cuda:0'))])

In [48]:
checkpoint = torch.load('checkpoint_folder/checkpoint.pth')

model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
losses = checkpoint['loss']
val_losses = checkpoint['val_loss']

model.train()       # ALWAYS SET TO TRAIN MODE FOR RESUMING TRAINING

Sequential(
  (0): Linear(in_features=1, out_features=1, bias=True)
)

In [49]:
model.state_dict()

OrderedDict([('0.weight', tensor([[1.3466]], device='cuda:0')),
             ('0.bias', tensor([1.3081], device='cuda:0'))])

In [50]:
%run -i ../model_training/v5.py
model.state_dict()

OrderedDict([('0.weight', tensor([[1.5691]], device='cuda:0')),
             ('0.bias', tensor([1.2239], device='cuda:0'))])

>>> ##### Deploying / Making Predictions

In [51]:
%run -i ../model_configuration/v3.py

In [52]:
checkpoint = torch.load('checkpoint_folder/checkpoint.pth')

model.load_state_dict(checkpoint['model_state_dict'])

model.state_dict()

OrderedDict([('0.weight', tensor([[1.3466]], device='cuda:0')),
             ('0.bias', tensor([1.3081], device='cuda:0'))])

In [53]:
new_input = torch.tensor([[5.0], [7.], [2.]])

model.eval()       # ALWAYS SET TO EVAL MODE FOR INFERENCE
model(new_input.to(device))

tensor([[ 8.0415],
        [10.7348],
        [ 4.0014]], device='cuda:0', grad_fn=<AddmmBackward0>)

> ### Putting It All Together

In [54]:
# %load data_preparation/v2.py
torch.manual_seed(13)

# Builds tensors from numpy arrays BEFORE splitting
x_tensor = torch.as_tensor(x, dtype=torch.float32)
y_tensor = torch.as_tensor(y, dtype=torch.float32)

# Builds dataset containing ALL data points
dataset = TensorDataset(x_tensor, y_tensor)

# Splits dataset into train and validation sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Builds data loaders for train and validation sets
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [55]:
# %load model_configuration/v3.py
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Set the learning rate
learning_rate = 1e-3

torch.manual_seed(42)

# Create model
model = torch.nn.Sequential(
    torch.nn.Linear(1, 1)).to(device)

# Define optimizer to update the model's parameters
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Defines a MSE loss function
loss_fn = torch.nn.MSELoss(reduction='mean')

# Create the train-step function for our model, loss function and optimizer
train_step = make_train_step(model, loss_fn, optimizer)

# Create the validation-step function for our model and loss function
val_step = make_val_step(model, loss_fn)

# Creates a SummaryWriter to interface with TensorBoard
writer = SummaryWriter('runs/simple_LR')

# Fetching a tuple of feature (dummy_x) and label (dummy_y) from the train_loader
dummy_x, dummy_y = next(iter(train_loader))
writer.add_graph(model, dummy_x.to(device))

In [56]:
# %load model_training/v5.py

# Defines number of epochs
n_epochs = 200

losses = []
val_losses = []

for epoch in range(n_epochs):
    # inner loop
    # Training
    loss = mini_batch(device, train_loader, train_step)
    losses.append(loss)

    # Validation - NO GRADIENTS IN VALIDATION
    with torch.no_grad():
        val_loss = mini_batch(device, val_loader, val_step)
        val_losses.append(val_loss)

    # Writes to TensorBoard
    writer.add_scalars(
        main_tag='Loss',
        tag_scalar_dict={'training': loss,
                            'validation': val_loss},
        global_step=epoch)
    
# Close the writer
writer.close()

In [57]:
model.state_dict()

OrderedDict([('0.weight', tensor([[1.2076]], device='cuda:0')),
             ('0.bias', tensor([1.2423], device='cuda:0'))])