<a href="https://colab.research.google.com/github/rakshit9695/NLP/blob/main/TEST_GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!nvidia-smi

Fri Jul 25 17:28:37 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   66C    P8             12W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
import tensorflow as tf

# List available physical GPUs
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print("✅ GPU is available!")
    print("GPU details:", gpus)
else:
    print("❌ GPU is not available.")

# Run a simple matrix multiplication on GPU
with tf.device('/GPU:0' if gpus else '/CPU:0'):
    a = tf.random.normal([10000, 1000])
    b = tf.random.normal([1000, 2000])
    c = tf.matmul(a, b)
    print("Matrix multiplication successful. Result shape:", c.shape)


✅ GPU is available!
GPU details: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Matrix multiplication successful. Result shape: (10000, 2000)


In [3]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# 1. Load and normalize CIFAR-10
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

# 2. Define a CNN
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 8 * 8, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

net = Net().to(device)

# 3. Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

# 4. Train the network
start_time = time.time()
for epoch in range(3):  # run for a few epochs to keep it quick
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 100 == 99:
            print(f"[Epoch {epoch + 1}, Batch {i + 1}] loss: {running_loss / 100:.3f}")
            running_loss = 0.0

print("Finished Training in", round(time.time() - start_time, 2), "seconds")

# 5. Evaluate accuracy
correct = 0
total = 0
net.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on 10000 test images: {100 * correct / total:.2f}%")


Using device: cuda


100%|██████████| 170M/170M [00:13<00:00, 12.9MB/s]


[Epoch 1, Batch 100] loss: 1.699
[Epoch 1, Batch 200] loss: 1.335
[Epoch 1, Batch 300] loss: 1.246
[Epoch 2, Batch 100] loss: 1.040
[Epoch 2, Batch 200] loss: 1.030
[Epoch 2, Batch 300] loss: 0.974
[Epoch 3, Batch 100] loss: 0.867
[Epoch 3, Batch 200] loss: 0.834
[Epoch 3, Batch 300] loss: 0.834
Finished Training in 40.49 seconds
Accuracy on 10000 test images: 68.90%
