<a href="https://colab.research.google.com/github/josephxlp/PyTorch100Days/blob/main/W3DAY16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
!pip install pytorch-lightning litmodels --q

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m28.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m147.8/147.8 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.5/62.5 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for wget (setup.py) ... [?25l[?25hdone


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import pytorch_lightning as pl

In [6]:
transform = transforms.ToTensor()
train_ds = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_ds = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

100%|██████████| 9.91M/9.91M [00:00<00:00, 39.6MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 1.22MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 9.25MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 4.24MB/s]


In [7]:
train_dl = DataLoader(train_ds, batch_size=128, shuffle=True)
valid_dl = DataLoader(test_ds, batch_size=128, shuffle=False)

In [8]:
class MNIST_Classifier(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(28*28, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.fc1(x))
        return self.fc2(x)

    def training_step(self, batch, batch_idx):
        images, labels = batch
        logists = self(images)
        loss = F.cross_entropy(logists, labels)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        images, labels = batch
        logists = self(images)
        loss = F.cross_entropy(logists, labels)
        acc = (logists.argmax(dim=1) == labels).float().mean()
        self.log('valid_loss', loss)
        self.log('valid_acc', acc)
       # return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)


In [9]:
model = MNIST_Classifier()
trainer = pl.Trainer(max_epochs=10, log_every_n_steps=50)
trainer.fit(model, train_dl, valid_dl)

INFO:pytorch_lightning.utilities.rank_zero:Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 100 K  | train
1 | fc2  | Linear | 1.3 K  | train
----------------------------------------
101 K     Trainable params
0         Non-trainable params
101 K     Total params
0.407     Total estimated model params size (MB)
2         Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=10` reached.


In [11]:

# Access the validation and training losses
train_losses = trainer.callback_metrics['train_loss'].cpu().numpy()
valid_losses = trainer.callback_metrics['valid_loss'].cpu().numpy()
valid_acc = trainer.callback_metrics['valid_acc'].cpu().numpy()
print(f"Final Train Loss: {train_losses}")
print(f"Final Validation Loss: {valid_losses}")
print(f"Final Validation Accuracy: {valid_acc}")

# Plotting learning curves
import matplotlib.pyplot as plt
import numpy as np

# Retrieve logged metrics history - this is not directly available as a simple list
# We need to access the logger and extract the data
# This requires a logger to be set up in the trainer.
# Assuming you are using the default TensorBoard logger, the logs are saved in `./lightning_logs/version_X/metrics.csv`

# If you have a logger configured, you can load the data.
# Example using TensorBoard logger (default)
# You might need to adjust the path based on your setup
import os
import pandas as pd

# Function to find the latest lightning_logs version directory
def find_latest_version_dir(log_dir="./lightning_logs"):
    version_dirs = [d for d in os.listdir(log_dir) if os.path.isdir(os.path.join(log_dir, d)) and d.startswith('version_')]
    if not version_dirs:
        return None
    latest_version = max([int(d.split('_')[1]) for d in version_dirs])
    return os.path.join(log_dir, f'version_{latest_version}')

latest_log_dir = find_latest_version_dir()

if latest_log_dir and os.path.exists(os.path.join(latest_log_dir, 'metrics.csv')):
    metrics_df = pd.read_csv(os.path.join(latest_log_dir, 'metrics.csv'))

    # Plotting train_loss and valid_loss
    plt.figure(figsize=(10, 5))
    plt.plot(metrics_df['step'], metrics_df['train_loss'], label='Train Loss')
    plt.plot(metrics_df['step'], metrics_df['valid_loss'], label='Validation Loss')
    plt.xlabel('Step')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss over Steps')
    plt.legend()
    plt.grid(True)
    plt.show()

    # Plotting valid_acc
    plt.figure(figsize=(10, 5))
    plt.plot(metrics_df['step'], metrics_df['valid_acc'], label='Validation Accuracy', color='green')
    plt.xlabel('Step')
    plt.ylabel('Accuracy')
    plt.title('Validation Accuracy over Steps')
    plt.legend()
    plt.grid(True)
    plt.show()

else:
    print("Could not find metrics.csv. Make sure a logger is configured and training completed successfully.")


# Getting predictions on the test set
model.eval() # Set the model to evaluation mode
predictions = []
true_labels = []

with torch.no_grad(): # Disable gradient calculation
    for images, labels in valid_dl: # Use the validation dataloader for prediction
        logists = model(images)
        preds = logists.argmax(dim=1)
        predictions.extend(preds.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())

print("\nSample Predictions:")
print(predictions[:10])
print("Sample True Labels:")
print(true_labels[:10])

Final Train Loss: 0.04399880766868591
Final Validation Loss: 0.08092773705720901
Final Validation Accuracy: 0.9742000102996826
Could not find metrics.csv. Make sure a logger is configured and training completed successfully.

Sample Predictions:
[np.int64(7), np.int64(2), np.int64(1), np.int64(0), np.int64(4), np.int64(1), np.int64(4), np.int64(9), np.int64(5), np.int64(9)]
Sample True Labels:
[np.int64(7), np.int64(2), np.int64(1), np.int64(0), np.int64(4), np.int64(1), np.int64(4), np.int64(9), np.int64(5), np.int64(9)]
