In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [5]:
class PracticeModel(nn.Module):
    def __init__(self):
        super().__init__()
        ##First Layer
        self.fc1 = nn.Linear(784,128)
        self.relu1 = nn.ReLU()
        ##Second Layer
        self.fc2 = nn.Linear(128,64)
        self.relu2= nn.ReLU()
        ## Final
        self.fc3 = nn.Linear(64,10)
    
        
    def forward(self,x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x
        

In [6]:
model = PracticeModel()
print(model)

PracticeModel(
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=64, out_features=10, bias=True)
)


In [10]:
# Loss Function
criterion = nn.CrossEntropyLoss()
print(criterion)
#optimiser
optimiser = optim.Adam(model.parameters(),lr=0.01)
print(optimiser)

CrossEntropyLoss()
Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.01
    maximize: False
    weight_decay: 0
)


In [8]:
# Training Step
def training_step(model,data,target,criterion,optimiser):
    output = model(data)
    loss = criterion(output,target)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return loss.item()

In [16]:
# Usage in a loop:
num_epochs = 10

In [12]:
def inference(model, data):
    # Set model to evaluation mode (disables dropout, batch norm, etc.)
    model.eval()
    
    # Disable gradient computation for faster inference
    with torch.no_grad():
        # Forward pass
        output = model(data)
        
        # Get predicted class (argmax)
        predictions = torch.argmax(output, dim=1)
    
    return predictions

In [11]:
def save_checkpoint(model, optimizer, epoch, loss, filepath):
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),      # Model weights
        'optimizer_state_dict': optimizer.state_dict(),  # Optimizer state
        'loss': loss,
    }
    torch.save(checkpoint, filepath)
    print(f"Checkpoint saved to {filepath}")

# Usage:
# save_checkpoint(model, optimizer, epoch=5, loss=0.234, filepath='checkpoints/ckpt.pt')

In [13]:
def load_checkpoint(model, optimizer, filepath):
    checkpoint = torch.load(filepath)
    
    # Load model weights
    model.load_state_dict(checkpoint['model_state_dict'])
    
    # Load optimizer state
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    
    # Extract metadata
    epoch = checkpoint['epoch']
    loss = checkpoint['loss']
    
    print(f"Loaded checkpoint from epoch {epoch} with loss {loss}")
    return model, optimizer, epoch, loss

# Usage:
# model, optimizer, start_epoch, last_loss = load_checkpoint(model, optimizer, 'checkpoints/ckpt.pt')
# for epoch in range(start_epoch + 1, num_epochs):
#     # Resume training...

In [14]:
def train_with_checkpointing(model, train_loader, val_loader, num_epochs, checkpoint_dir='checkpoints'):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    best_val_loss = float('inf')
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0
        for batch_data, batch_target in train_loader:
            output = model(batch_data)
            loss = criterion(output, batch_target)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
        
        train_loss /= len(train_loader)
        
        # Validation phase
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch_data, batch_target in val_loader:
                output = model(batch_data)
                loss = criterion(output, batch_target)
                val_loss += loss.item()
        
        val_loss /= len(val_loader)
        
        print(f"Epoch {epoch+1}: Train Loss={train_loss:.4f}, Val Loss={val_loss:.4f}")
        
        # Save checkpoint if validation loss improves
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            save_checkpoint(model, optimizer, epoch, val_loss, 
                          f'{checkpoint_dir}/best_model.pt')
            print(f"  → Saved best model!")
        
        # Save regular checkpoint every 5 epochs
        if (epoch + 1) % 5 == 0:
            save_checkpoint(model, optimizer, epoch, train_loss,
                          f'{checkpoint_dir}/ckpt_epoch_{epoch+1}.pt')
    
    return model

In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Create dummy data for demonstration
X_train = torch.randn(1000, 784)  # 1000 samples, 784 features
y_train = torch.randint(0, 10, (1000,))  # 10 classes
X_val = torch.randn(200, 784)
y_val = torch.randint(0, 10, (200,))

# Create DataLoaders
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

# Create directories
import os
os.makedirs('checkpoints', exist_ok=True)

In [18]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10

In [19]:
best_val_loss = float('inf')

for epoch in range(num_epochs):
    # Training phase
    model.train()
    train_loss = 0
    for batch_data, batch_target in train_loader:
        output = model(batch_data)
        loss = criterion(output, batch_target)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
    
    train_loss /= len(train_loader)
    
    # Validation phase
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_data, batch_target in val_loader:
            output = model(batch_data)
            loss = criterion(output, batch_target)
            val_loss += loss.item()
    
    val_loss /= len(val_loader)
    
    print(f"Epoch {epoch+1}/{num_epochs}: Train Loss={train_loss:.4f}, Val Loss={val_loss:.4f}")
    
    # Save best checkpoint
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        checkpoint = {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': val_loss,
        }
        torch.save(checkpoint, 'checkpoints/best_model.pt')
        print(f"  ✓ Best model saved!")

Epoch 1/10: Train Loss=2.3094, Val Loss=2.2971
  ✓ Best model saved!
Epoch 2/10: Train Loss=2.0458, Val Loss=2.3220
Epoch 3/10: Train Loss=1.5172, Val Loss=2.4669
Epoch 4/10: Train Loss=0.6526, Val Loss=2.8814
Epoch 5/10: Train Loss=0.1482, Val Loss=3.2911
Epoch 6/10: Train Loss=0.0427, Val Loss=3.5413
Epoch 7/10: Train Loss=0.0212, Val Loss=3.6721
Epoch 8/10: Train Loss=0.0138, Val Loss=3.7669
Epoch 9/10: Train Loss=0.0100, Val Loss=3.8577
Epoch 10/10: Train Loss=0.0076, Val Loss=3.9285


In [20]:
# Load best checkpoint
checkpoint = torch.load('checkpoints/best_model.pt')
model.load_state_dict(checkpoint['model_state_dict'])

# Perform inference
model.eval()
test_data = torch.randn(10, 784)  # 10 new samples

with torch.no_grad():
    output = model(test_data)
    predictions = torch.argmax(output, dim=1)
    probabilities = torch.softmax(output, dim=1)

print("Predictions:", predictions)
print("Confidence:", probabilities.max(dim=1)[0])

Predictions: tensor([5, 5, 5, 9, 6, 5, 7, 5, 5, 5])
Confidence: tensor([0.1180, 0.1301, 0.1089, 0.1226, 0.1104, 0.1236, 0.1213, 0.1245, 0.1183,
        0.1203])


In [21]:
# Load checkpoint
checkpoint = torch.load('checkpoints/best_model.pt')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
start_epoch = checkpoint['epoch'] + 1

# Continue training
for epoch in range(start_epoch, num_epochs + 5):
    # Training loop continues...
    pass