In [1]:
import torch
from torch import nn
from tqdm.notebook import tqdm

In [2]:
# Create a Linear Regression model class
class LinearRegressionModel(nn.Module): # <- almost everything in PyTorch is a nn.Module (think of this as neural network lego blocks)
    def __init__(self):
        super().__init__() 
        self.weights = nn.Parameter(torch.randn(1, # <- start with random weights (this will get adjusted as the model learns)
                                                dtype=torch.float), # <- PyTorch loves float32 by default
                                   requires_grad=True) # <- can we update this value with gradient descent?)

        self.bias = nn.Parameter(torch.randn(1, # <- start with random bias (this will get adjusted as the model learns)
                                            dtype=torch.float), # <- PyTorch loves float32 by default
                                requires_grad=True) # <- can we update this value with gradient descent?))

    # Forward defines the computation in the model
    def forward(self, x: torch.Tensor) -> torch.Tensor: # <- "x" is the input data (e.g. training/testing features)
        return self.weights * x + self.bias # <- this is the linear regression formula (y = m*x + b)

In [3]:
# list(model.parameters())
# model.state_dict()

In [93]:
X  = torch.arange(0,1,0.02).reshape(-1,1)
w, b = 0.7, 0.3

y = X*w + b

In [94]:
X_train, y_train = X[:40], y[:40]
X_test, y_test = X[40:], y[40:]

In [20]:
model = LinearRegressionModel()
loss_fn = torch.nn.MSELoss()
optim = torch.optim.SGD(params=model.parameters(),lr = 0.1)

In [21]:
list(model.parameters())

[Parameter containing:
 tensor([0.3367], requires_grad=True),
 Parameter containing:
 tensor([0.1288], requires_grad=True)]

In [22]:
torch.manual_seed(42)
EPOCHS = 500

for i in tqdm(range(EPOCHS)):

    # Training 
    model.train()

    y_pred = model(X_train)
    loss = loss_fn(y_pred, y_train)

    optim.zero_grad()
    loss.backward()
    optim.step()

    # Testing
    model.eval()

    with torch.inference_mode():
        test_pred = model(X_test)
        test_loss = loss_fn(test_pred, y_test)

    if i%100 == 0:
        print(f"Test Loss : {test_loss :.4f}")

  0%|          | 0/500 [00:00<?, ?it/s]

Test Loss : 0.1659
Test Loss : 0.0023
Test Loss : 0.0004
Test Loss : 0.0001
Test Loss : 0.0000


In [15]:
list(model.parameters())

[Parameter containing:
 tensor([0.6975], requires_grad=True),
 Parameter containing:
 tensor([0.3010], requires_grad=True)]

In [95]:
class LinearRegressionModelv2(nn.Module):
    """Some Information about LinearRegressionModelv2"""
    def __init__(self):
        super().__init__()
        self.linear_layer = nn.Linear(in_features=1,out_features=1)

    def forward(self, x: torch.tensor) -> torch.tensor:
        return self.linear_layer(x)

In [96]:
model = LinearRegressionModelv2()
loss_fn = torch.nn.MSELoss()
optim = torch.optim.SGD(params=model.parameters(),lr = 0.1)

In [97]:
torch.manual_seed(42)
EPOCHS = 500

for i in tqdm(range(EPOCHS)):

    # Training
    model.train()
    
    y_pred = model(X_train)
    loss = loss_fn(y_pred, y_train)
    optim.zero_grad()
    loss.backward()
    optim.step()


    # Testing
    model.eval()

# I think this way it is more efficient
    if i % 100 == 0:
        with torch.inference_mode():
            test_pred = model(X_test)
            test_loss = loss_fn(test_pred, y_test)
            print(f"Loss : {test_loss : .4f}")




  0%|          | 0/500 [00:00<?, ?it/s]

Loss :  0.1912
Loss :  0.0006
Loss :  0.0001
Loss :  0.0000
Loss :  0.0000


In [98]:
model.state_dict()
# optim.state_dict()

OrderedDict([('linear_layer.weight', tensor([[0.6987]])),
             ('linear_layer.bias', tensor([0.3005]))])

In [99]:
from pathlib import Path

# 1. Create models directory 
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

# 2. Create model save path 
MODEL_NAME = "01_pytorch_workflow_model_1.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

# 3. Save the model state dict 
print(f"Saving model to: {MODEL_SAVE_PATH}")
torch.save(obj=model.state_dict(), # only saving the state_dict() only saves the models learned parameters
           f=MODEL_SAVE_PATH)

Saving model to: models/01_pytorch_workflow_model_1.pth


In [100]:
# Instantiate a fresh instance of LinearRegressionModelV2
loaded_model = LinearRegressionModelv2()

# Load model state dict 
loaded_model.load_state_dict(torch.load(MODEL_SAVE_PATH))


<All keys matched successfully>

In [101]:
# Evaluate loaded model
loaded_model.eval()
with torch.inference_mode():
    test_pred = model(X_test)
    loaded_model_pred = loaded_model(X_test)
    
torch.norm(test_pred - loaded_model_pred)

tensor(0.)

In [104]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [134]:
X_train = X_train.to(device)
y_train = y_train.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)
model = LinearRegressionModelv2()
model.to(device)

loss_fn = torch.nn.MSELoss()
optim = torch.optim.SGD(params=model.parameters(),lr = 0.1)

In [142]:
next(model.parameters()).device, X_train.device
# list(model.parameters())

(device(type='cuda', index=0), device(type='cuda', index=0))

In [137]:
torch.manual_seed(42)
EPOCHS = 500

for i in tqdm(range(EPOCHS)):

    # Training
    model.train()
    
    y_pred = model(X_train)
    loss = loss_fn(y_pred, y_train)
    optim.zero_grad()
    loss.backward()
    optim.step()

    # Testing
    model.eval()

# I think this way it is more efficient
    if i % 100 == 0:
        with torch.inference_mode():
            test_pred = model(X_test)
            test_loss = loss_fn(test_pred, y_test)
            print(f"Loss : {test_loss : .4f}| {loss}")

  0%|          | 0/500 [00:00<?, ?it/s]

Loss :  0.1912| 0.3084445595741272
Loss :  0.0006| 0.00014370800636243075
Loss :  0.0001| 2.265658440592233e-05
Loss :  0.0000| 3.572052946765325e-06
Loss :  0.0000| 5.631786166304664e-07


In [138]:
list(model.parameters())

[Parameter containing:
 tensor([[0.6987]], device='cuda:0', requires_grad=True),
 Parameter containing:
 tensor([0.3005], device='cuda:0', requires_grad=True)]

In [139]:
optim.state_dict()

{'state': {0: {'momentum_buffer': None}, 1: {'momentum_buffer': None}},
 'param_groups': [{'lr': 0.1,
   'momentum': 0,
   'dampening': 0,
   'weight_decay': 0,
   'nesterov': False,
   'maximize': False,
   'foreach': None,
   'differentiable': False,
   'params': [0, 1]}]}