In [None]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

## 1. Environment Setup



In [None]:
!nvidia-smi
!pip install nvidia-ml-py3


## 2. Library Imports & Device Configuration

Importing PyTorch, TorchVision, and setting up CUDA device.


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import pandas as pd


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


## 3. GPU Telemetry Initialization

Initializing NVIDIA Management Library (NVML) for real-time GPU monitoring.


In [None]:
from pynvml import *
import time
import csv

import torch

if torch.cuda.is_available():
    from pynvml import *
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
else:
    print("NVML not available â€” running on CPU")


## 4. Dataset Preparation

Loading CIFAR-10 dataset and applying preprocessing transformations.


In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform)


## 5. CNN Model Architecture

Defining a simple convolutional neural network for image classification.


In [None]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.fc1 = nn.Linear(32 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 32 * 8 * 8)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = SimpleCNN().to(device)


## 6. Training Configuration

Defining loss function and optimization strategy.


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


## 7. Experimental Framework

Running controlled batch-size experiments while logging GPU utilization and memory usage.


In [None]:
def run_experiment(batch_size):

    print(f"\nStarting experiment with batch size {batch_size}")

    trainloader = torch.utils.data.DataLoader(
        trainset, batch_size=batch_size, shuffle=True)

    log_filename = f"gpu_log_{batch_size}.csv"

    with open(log_filename, "w", newline="") as file:
        writer = csv.writer(file)
        writer.writerow(["Time", "GPU_Util", "Memory_MB"])

        for epoch in range(1):
            for i, data in enumerate(trainloader, 0):
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                # GPU Monitoring
                if torch.cuda.is_available():
                  util = nvmlDeviceGetUtilizationRates(handle).gpu
                  mem = nvmlDeviceGetMemoryInfo(handle).used / 1024**2
                else:
                  util = 0
                  mem = 0 


                writer.writerow([time.time(), util, mem])

                if i % 100 == 99:
                    print(f"[BatchSize {batch_size}] Batch {i+1}, GPU: {util}%, Mem: {mem:.0f}MB")

    print(f"Finished experiment for batch size {batch_size}")


## 8. Comparative Analysis

Loading telemetry logs and comparing GPU utilization and memory usage across configurations.


In [None]:
run_experiment(128)
run_experiment(256)


In [None]:


df_128 = pd.read_csv("gpu_log_128.csv")
df_256 = pd.read_csv("gpu_log_256.csv")

print("128 samples:", len(df_128))
print("256 samples:", len(df_256))

print("Average GPU Utilization:")
print("Batch 128:", df_128["GPU_Util"].mean())
print("Batch 256:", df_256["GPU_Util"].mean())

print("\nAverage GPU Memory Usage (MB):")
print("Batch 128:", df_128["Memory_MB"].mean())
print("Batch 256:", df_256["Memory_MB"].mean())



In [None]:
plt.figure(figsize=(12,5))

plt.plot(df_128["GPU_Util"], label="Batch Size 128")
plt.plot(df_256["GPU_Util"], label="Batch Size 256")

plt.title("GPU Utilization Comparison")
plt.xlabel("Time Step")
plt.ylabel("GPU Utilization (%)")
plt.legend()
plt.grid(True)

plt.show()


In [None]:
plt.figure(figsize=(12,5))

plt.plot(df_128["Memory_MB"], label="Batch Size 128")
plt.plot(df_256["Memory_MB"], label="Batch Size 256")

plt.title("GPU Memory Usage Comparison")
plt.xlabel("Time Step")
plt.ylabel("Memory Used (MB)")
plt.legend()
plt.grid(True)

plt.show()


## 9. Conclusion

Batch-size scaling increases GPU memory usage significantly, while GPU utilization shows modest improvement. These findings highlight baseline behavioral patterns and motivate adaptive optimization strategies.
