In [4]:
!pip uninstall torch torchvision torchaudio
!pip install torch torchvision torchaudio

Found existing installation: torch 2.4.1+cu121
Uninstalling torch-2.4.1+cu121:
  Would remove:
    /usr/local/bin/convert-caffe2-to-onnx
    /usr/local/bin/convert-onnx-to-caffe2
    /usr/local/bin/torchrun
    /usr/local/lib/python3.10/dist-packages/functorch/*
    /usr/local/lib/python3.10/dist-packages/torch-2.4.1+cu121.dist-info/*
    /usr/local/lib/python3.10/dist-packages/torch/*
    /usr/local/lib/python3.10/dist-packages/torchgen/*
Proceed (Y/n)? y
y
[31mERROR: Operation cancelled by user[0m[31m
[0mCollecting torch
  Downloading torch-2.5.0-cp310-cp310-manylinux1_x86_64.whl.metadata (28 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidi

In [3]:
import os
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as dset
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torchvision.utils import save_image

In [7]:
# Parameters
dataroot = "data/"
batch_size = 64
image_size = 28
nc = 1  # Number of channels (1 for grayscale images)
nz = 100  # Size of the latent vector (input to the generator)
ngf = 64  # Size of feature maps in generator
ndf = 28  # Size of feature maps in discriminator
num_epochs = 800
lr = 0.00005  # Learning rate
beta1 = 0.5  # Beta1 hyperparameter for Adam optimizers
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create directories for saving results
os.makedirs("checkpoints", exist_ok=True)
os.makedirs("generated_images", exist_ok=True)
os.makedirs("loss_plots", exist_ok=True)

# Load MNIST dataset
transform = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
dataset = dset.MNIST(root=dataroot, train=True, download=True, transform=transform)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)


In [8]:
# Generator Network
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            nn.ConvTranspose2d(nz, ngf * 4, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),

            nn.ConvTranspose2d(ngf * 4, ngf * 2, 3, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),

            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),

            nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
            nn.Tanh()
        )

    def forward(self, input):
        return self.main(input)

# Discriminator Network
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(ndf * 2, ndf * 4, 3, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(ndf * 4, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

    def forward(self, input):
        return self.main(input).view(-1)

# Initialize models
netG = Generator().to(device)
netD = Discriminator().to(device)

# Loss and Optimizer
criterion = nn.BCELoss()
optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))

# Labels
real_label = 1.0
fake_label = 0.0

# Fixed noise for generating images
fixed_noise = torch.randn(100, nz, 1, 1, device=device)

# Lists for storing loss values
g_losses = []
d_losses = []

# Log file setup
os.makedirs("checkpoints", exist_ok=True)
os.makedirs("generated_images", exist_ok=True)
os.makedirs("loss_plots", exist_ok=True)
log_file = open("training_log.txt", "w")
start_time = time.time()

# Training Loop
for epoch in range(num_epochs):
    epoch_start_time = time.time()

    for i, data in enumerate(dataloader, 0):
        ############################
        # (1) Update Discriminator
        ############################
        netD.zero_grad()
        real_images = data[0].to(device)
        b_size = real_images.size(0)
        label = torch.full((b_size,), real_label, device=device, dtype=torch.float)  # Ensure label is float

        output = netD(real_images)
        lossD_real = criterion(output, label)
        lossD_real.backward()

        noise = torch.randn(b_size, nz, 1, 1, device=device)
        fake_images = netG(noise)
        label.fill_(fake_label)
        output = netD(fake_images.detach())
        lossD_fake = criterion(output, label)
        lossD_fake.backward()
        optimizerD.step()

        lossD = lossD_real + lossD_fake
        d_losses.append(lossD.item())

        ###########################
        # (2) Update Generator
        ###########################
        netG.zero_grad()
        label.fill_(real_label)
        output = netD(fake_images)
        lossG = criterion(output, label)
        lossG.backward()
        optimizerG.step()

        g_losses.append(lossG.item())

        # Logging each step (optional)
        if i % 100 == 0:
            print(f"[{epoch}/{num_epochs}][{i}/{len(dataloader)}] Loss_D: {lossD.item():.4f} Loss_G: {lossG.item():.4f}")

    # Save generated images for the epoch
    save_image(fake_images[:100], f"generated_images/epoch_{epoch+1}.png", nrow=10, normalize=True)

    # Save model checkpoints every 25 epochs
    if (epoch + 1) % 25 == 0:
        torch.save(netG.state_dict(), f"checkpoints/netG_epoch_{epoch+1}.pth")
        torch.save(netD.state_dict(), f"checkpoints/netD_epoch_{epoch+1}.pth")

    # Plot losses every 10 epochs
    if (epoch + 1) % 10 == 0:
        plt.figure(figsize=(10, 5))
        plt.title(f"Generator and Discriminator Loss at Epoch {epoch+1}")
        plt.plot(g_losses, label="G")
        plt.plot(d_losses, label="D")
        plt.xlabel("Iterations")
        plt.ylabel("Loss")
        plt.legend()
        plt.savefig(f"loss_plots/loss_epoch_{epoch+1}.png")
        plt.close()

    # Log progress
    epoch_end_time = time.time()
    log_file.write(f"Epoch [{epoch+1}/{num_epochs}], Loss_D: {lossD.item():.4f}, Loss_G: {lossG.item():.4f}, Time: {epoch_end_time - epoch_start_time:.2f}s\n")

# Training end log and summary
end_time = time.time()
log_file.write(f"\nTraining completed in: {end_time - start_time:.2f} seconds\n")
log_file.close()

print("Training completed.")

[0/800][0/938] Loss_D: 1.4957 Loss_G: 0.7109
[0/800][100/938] Loss_D: 0.4886 Loss_G: 1.4221
[0/800][200/938] Loss_D: 0.2447 Loss_G: 2.3209
[0/800][300/938] Loss_D: 0.1972 Loss_G: 2.6530
[0/800][400/938] Loss_D: 0.0587 Loss_G: 3.6258
[0/800][500/938] Loss_D: 0.0419 Loss_G: 3.9892
[0/800][600/938] Loss_D: 0.0220 Loss_G: 4.5188
[0/800][700/938] Loss_D: 0.0181 Loss_G: 4.4892
[0/800][800/938] Loss_D: 0.3723 Loss_G: 3.7429
[0/800][900/938] Loss_D: 0.4660 Loss_G: 2.2332
[1/800][0/938] Loss_D: 0.5095 Loss_G: 2.6027
[1/800][100/938] Loss_D: 0.6308 Loss_G: 2.1922
[1/800][200/938] Loss_D: 0.4418 Loss_G: 2.2005
[1/800][300/938] Loss_D: 0.4200 Loss_G: 2.4827
[1/800][400/938] Loss_D: 0.5118 Loss_G: 2.0255
[1/800][500/938] Loss_D: 0.4203 Loss_G: 2.6706
[1/800][600/938] Loss_D: 0.4759 Loss_G: 1.7544
[1/800][700/938] Loss_D: 0.4113 Loss_G: 2.2203
[1/800][800/938] Loss_D: 0.6420 Loss_G: 1.8643
[1/800][900/938] Loss_D: 0.5659 Loss_G: 1.6935
[2/800][0/938] Loss_D: 0.5268 Loss_G: 1.6122
[2/800][100/938] Lo

KeyboardInterrupt: 

In [10]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [14]:
import shutil

# Path to the folder you want to zip
folder_to_zip = '/content/training_log.txt'

# Path where you want to save the zip file in Google Drive
output_zip_path = '/content/drive/My Drive/training_log.zip'

# Zipping the folder
shutil.make_archive(output_zip_path.replace('.zip', ''), 'zip', folder_to_zip)


'/content/drive/My Drive/training_log.zip'