In [25]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torchvision.utils import make_grid
from torchvision.utils import save_image
import matplotlib.pyplot as plt
import subprocess
import os
import torchvision.transforms as transforms
import numpy as np
from IPython.display import clear_output


transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.RandomResizedCrop(size=28, scale=(0.9, 1.1)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])  # Normalize to [-1, 1] for grayscale
])

from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

dataloader = torch.utils.data.DataLoader(
    torchvision.datasets.FashionMNIST(
        root='./data', train=True, download=True, transform=transform
    ),
    batch_size=128, shuffle=True
)

In [26]:
class Generator(nn.Module):
    def __init__(self):
        super().__init__()
        self.main = nn.Sequential(
            nn.ConvTranspose2d(100, 128, 7, 1, 0, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(True),

            nn.ConvTranspose2d(128, 64, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(True),

            nn.ConvTranspose2d(64, 1, 4, 2, 1, bias=False),
            nn.Tanh()
        )

    def forward(self, x):
        return self.main(x)

class Discriminator(nn.Module):
    def __init__(self):
        super().__init__()
        self.main = nn.Sequential(
            nn.Conv2d(1, 64, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(64, 128, 4, 2, 1, bias=False),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(128, 1, 7, 1, 0, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.main(x).view(-1)


In [27]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

netG = Generator().to(device)
netD = Discriminator().to(device)

# Loss and optimizers
criterion = nn.BCELoss()
optimizerD = torch.optim.Adam(netD.parameters(), lr=0.0002, betas=(0.5, 0.999))
optimizerG = torch.optim.Adam(netG.parameters(), lr=0.0002, betas=(0.5, 0.999))

# Hyperparameters
epochs = 20
batch_size = 128
z = 100
lr = 0.0002

RuntimeError: CUDA error: CUDA-capable device(s) is/are busy or unavailable
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [20]:
import os
import torch
import matplotlib.pyplot as plt
from torchvision.utils import save_image, make_grid

g_losses = []
d_losses = []

# Training loop
for epoch in range(20):
    for i, (real_images, _) in enumerate(dataloader):
        batch_size = real_images.size(0)
        real_images = real_images.to(device)

        real_labels = torch.ones(batch_size).to(device)
        fake_labels = torch.zeros(batch_size).to(device)

        # Train Discriminator
        netD.zero_grad()
        outputs = netD(real_images)
        d_loss_real = criterion(outputs, real_labels)

        z = torch.randn(batch_size, 100, 1, 1).to(device)
        fake_images = netG(z)
        outputs = netD(fake_images.detach())
        d_loss_fake = criterion(outputs, fake_labels)

        d_loss = d_loss_real + d_loss_fake
        d_loss.backward()
        optimizerD.step()

        # Train Generator
        netG.zero_grad()
        z = torch.randn(batch_size, 100, 1, 1).to(device)
        fake_images = netG(z)
        outputs = netD(fake_images)
        g_loss = criterion(outputs, real_labels)

        g_loss.backward()
        optimizerG.step()

    print(f"Epoch [{epoch+1}/20], D Loss: {d_loss.item():.4f}, G Loss: {g_loss.item():.4f}")
    g_losses.append((epoch + 1, g_loss.item()))
    d_losses.append((epoch + 1, d_loss.item()))

    # Show sample grid every 5 epochs
    """if (epoch + 1) % 5 == 0:
        netG.eval()
        with torch.no_grad():
            z = torch.randn(64, 100, 1, 1).to(device)
            fake_images = netG(z)
            grid = make_grid(fake_images[:64], nrow=8, normalize=True)
            plt.imshow(grid.permute(1, 2, 0).cpu())
            plt.axis("off")
            plt.title(f"Generated Samples at Epoch {epoch+1}")
            plt.show()"""
    netG.train()


RuntimeError: CUDA error: CUDA-capable device(s) is/are busy or unavailable
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [18]:
netG.eval()
z = torch.randn(64, 100, 1, 1).to(device)
fake_images = netG(z)
grid = make_grid(fake_images[:64], nrow=8, normalize=True)
plt.imshow(grid.permute(1, 2, 0).cpu())
plt.axis("off")
plt.show()

NameError: name 'netG' is not defined

In [22]:
os.environ['CUDA_VISIBLE_DEVICES']

'0'

In [23]:
import torch
print(torch.cuda.is_available())


True


In [21]:
import numpy as np
import os

# Set output folder
output_dir = "/projectnb/ds340/projects/leilani_hannah_final_project/fid_images_augmented"
os.makedirs(output_dir, exist_ok=True)

# Save real images
real_images_list = []
for idx, (real_img, _) in enumerate(dataloader):
    real_images_list.append(real_img)  # still normalized [-1,1]
    if idx >= 100:  # limit number of batches
        break

real_images_all = torch.cat(real_images_list, dim=0)  # shape (N, 1, 28, 28)
real_images_all = (real_images_all + 1) / 2  # normalize to [0,1] for FID
real_images_all = real_images_all.expand(-1, 3, -1, -1)  # repeat channels to make (N,3,28,28)
real_images_all = real_images_all.numpy()
np.save(os.path.join(output_dir, "real_images_fid.npy"), real_images_all)

# Save fake images
netG.eval()
fake_images_list = []
with torch.no_grad():
    for idx in range(100):
        z = torch.randn(batch_size, 100, 1, 1).to(device)
        fake_imgs = netG(z)
        fake_imgs = (fake_imgs + 1) / 2  # normalize to [0,1]
        fake_imgs = fake_imgs.expand(-1, 3, -1, -1)  # make 3 channels
        fake_images_list.append(fake_imgs.cpu())
        
fake_images_all = torch.cat(fake_images_list, dim=0)
fake_images_all = fake_images_all.numpy()
np.save(os.path.join(output_dir, "fake_images_fid.npy"), fake_images_all)


In [45]:
python fid_score.py --true /projectnb/ds340/projects/leilani_hannah_final_project/fid_images_augmented/real_images_fid.npy --fake /projectnb/ds340/projects/leilani_hannah_final_project/fid_images_augmented/fake_images_fid.npy --gpu 1



SyntaxError: invalid syntax (2234033974.py, line 1)

In [23]:
fid_scores = []  # <--- store (epoch, fid_score)
g_losses = []
d_losses = []
for epoch in range(epochs):
    for i, (real_images, _) in enumerate(dataloader):
        batch_size = real_images.size(0)
        real_images = real_images.to(device)

        real_labels = torch.ones(batch_size).to(device)
        fake_labels = torch.zeros(batch_size).to(device)

        # Train Discriminator
        netD.zero_grad()
        outputs = netD(real_images)
        d_loss_real = criterion(outputs, real_labels)

        z = torch.randn(batch_size, 100, 1, 1).to(device)
        fake_images = netG(z)
        outputs = netD(fake_images.detach())
        d_loss_fake = criterion(outputs, fake_labels)

        d_loss = d_loss_real + d_loss_fake
        d_loss.backward()
        optimizerD.step()

        # Train Generator
        netG.zero_grad()
        z = torch.randn(batch_size, 100, 1, 1).to(device)
        fake_images = netG(z)
        outputs = netD(fake_images)
        g_loss = criterion(outputs, real_labels)

        g_loss.backward()
        optimizerG.step()

    print(f"Epoch [{epoch+1}/20], D Loss: {d_loss.item():.4f}, G Loss: {g_loss.item():.4f}")
    g_losses.append((epoch + 1, g_loss.item()))
    d_losses.append((epoch + 1, d_loss.item()))

    # Every N epochs, calculate FID
    if (epoch + 1) % 1 == 0: 
        netG.eval()
        with torch.no_grad():
            # Generate fake images
            z = torch.randn(1000, 100, 1, 1).to(device)
            fake_images = netG(z)

            # Save fake images
            save_path = '/projectnb/ds340/projects/leilani_hannah_final_project/fid_images_augmented/fake_images_epoch_{:03d}.npy'.format(epoch+1)
            np.save(save_path, fake_images.cpu().numpy())

            result = subprocess.run([
                'python', 'fid_score.py',
                '--true', '/projectnb/ds340/projects/leilani_hannah_final_project/fid_images_augmented/real_images_fid.npy',
                '--fake', save_path,
                '--gpu', '0'
            ], capture_output=True, text=True)

            # Parse FID score from the output
            output = result.stdout
            print(output)

            for line in output.splitlines():
                if "FID" in line:
                    fid_value = float(line.split()[-1])
                    fid_scores.append((epoch+1, fid_value))
            if fid_scores:   
                clear_output(wait=True)  # Clear previous output
                epochs_plot, fids_plot = zip(*fid_scores)  # unzip
    
                plt.figure(figsize=(8,6))
                plt.plot(epochs_plot, fids_plot, marker='o')
                plt.title('FID Score vs Epoch')
                plt.xlabel('Epoch')
                plt.ylabel('FID Score')
                plt.grid(True)
                plt.show()

        netG.train()


Epoch [1/20], D Loss: 0.5116, G Loss: 1.2386
Namespace(true='/projectnb/ds340/projects/leilani_hannah_final_project/fid_images_augmented/real_images_fid.npy', fake=['/projectnb/ds340/projects/leilani_hannah_final_project/fid_images_augmented/fake_images_epoch_001.npy'], batch_size=50, dims=2048, gpu='0', model='inception')

Epoch [2/20], D Loss: 1.0105, G Loss: 1.0895
Namespace(true='/projectnb/ds340/projects/leilani_hannah_final_project/fid_images_augmented/real_images_fid.npy', fake=['/projectnb/ds340/projects/leilani_hannah_final_project/fid_images_augmented/fake_images_epoch_002.npy'], batch_size=50, dims=2048, gpu='0', model='inception')

Epoch [3/20], D Loss: 1.1562, G Loss: 0.7437
Namespace(true='/projectnb/ds340/projects/leilani_hannah_final_project/fid_images_augmented/real_images_fid.npy', fake=['/projectnb/ds340/projects/leilani_hannah_final_project/fid_images_augmented/fake_images_epoch_003.npy'], batch_size=50, dims=2048, gpu='0', model='inception')

Epoch [4/20], D Loss: 

In [6]:
import itertools

import torch.nn as nn
import torch

# Function to initialize weights
def weights_init(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
        # Applying a normal distribution with mean=0 and std=0.02 to weights
        nn.init.normal_(m.weight, 0.0, 0.02)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)
    elif isinstance(m, nn.BatchNorm2d):
        # Initialize batch norm weights
        nn.init.normal_(m.weight, 1.0, 0.02)
        nn.init.constant_(m.bias, 0)

# Then, you can use this function to initialize the weights of your networks.


# Parameter ranges to test
learning_rates = [0.0002, 0.0001]  # Example values
betas = [(0.5, 0.999), (0.3, 0.999)]  # Example values for beta1, beta2
epochs = 20  # Adjust according to your needs

# Define the training loop
g_losses = []
d_losses = []
fid_scores = [] 
results = []

os.makedirs('/projectnb/ds340/projects/leilani_hannah_final_project/parameter_tuning', exist_ok=True)

# Iterate over all combinations of learning rate and beta values
for lr, beta1_beta2 in itertools.product(learning_rates, betas):
    beta1, beta2 = beta1_beta2  # Unpack beta values
    
    # Initialize the networks and optimizers for each parameter combination
    netG = Generator().to(device)
    netD = Discriminator().to(device)
    optimizerG = torch.optim.Adam(netG.parameters(), lr=lr, betas=(beta1, beta2))
    optimizerD = torch.optim.Adam(netD.parameters(), lr=lr, betas=(beta1, beta2))

    # Re-initialize weights
    netG.apply(weights_init)
    netD.apply(weights_init)
    
    for epoch in range(epochs):
        for i, (real_images, _) in enumerate(dataloader):
            batch_size = real_images.size(0)
            real_images = real_images.to(device)

            real_labels = torch.ones(batch_size).to(device)
            fake_labels = torch.zeros(batch_size).to(device)

            # Train Discriminator
            netD.zero_grad()
            outputs = netD(real_images)
            d_loss_real = criterion(outputs, real_labels)

            z = torch.randn(batch_size, 100, 1, 1).to(device)
            fake_images = netG(z)
            outputs = netD(fake_images.detach())
            d_loss_fake = criterion(outputs, fake_labels)

            d_loss = d_loss_real + d_loss_fake
            d_loss.backward()
            optimizerD.step()

            # Train Generator
            netG.zero_grad()
            z = torch.randn(batch_size, 100, 1, 1).to(device)
            fake_images = netG(z)
            outputs = netD(fake_images)
            g_loss = criterion(outputs, real_labels)

            g_loss.backward()
            optimizerG.step()

        print(f"Epoch [{epoch+1}/{epochs}], D Loss: {d_loss.item():.4f}, G Loss: {g_loss.item():.4f}")
        g_losses.append((epoch + 1, g_loss.item()))
        d_losses.append((epoch + 1, d_loss.item()))

        # Save fake images for future FID calculation (if you want)
        if (epoch + 1) % 5 == 0:  # Every 5 epochs, save fake images for FID calculation
            netG.eval()
            with torch.no_grad():
                z = torch.randn(1000, 100, 1, 1).to(device)
                fake_images = netG(z)

                # Save fake images to the 'parameter_tuning' folder
                save_path = '/projectnb/ds340/projects/leilani_hannah_final_project/parameter_tuning/fake_images_epoch_{:03d}_lr_{:g}_beta1_{:g}.npy'.format(epoch+1, lr, beta1)
                np.save(save_path, fake_images.cpu().numpy())

            netG.train()

    # After training for the current set of parameters, you can manually calculate FID for specific epochs if you want
    # Append results for parameter combinations
    results.append((lr, beta1, g_losses, d_losses))
    
# You can now manually calculate FID for specific epochs after training and plot them.


Epoch [1/20], D Loss: 0.4208, G Loss: 1.1118
Epoch [2/20], D Loss: 0.5596, G Loss: 1.8297
Epoch [3/20], D Loss: 1.2694, G Loss: 2.8514
Epoch [4/20], D Loss: 1.1319, G Loss: 1.0446
Epoch [5/20], D Loss: 1.0717, G Loss: 1.0177
Epoch [6/20], D Loss: 1.0577, G Loss: 1.2817
Epoch [7/20], D Loss: 0.9714, G Loss: 0.8473
Epoch [8/20], D Loss: 0.9070, G Loss: 1.3033
Epoch [9/20], D Loss: 0.9271, G Loss: 1.0514
Epoch [10/20], D Loss: 1.1172, G Loss: 0.9994
Epoch [11/20], D Loss: 1.0465, G Loss: 0.7948
Epoch [12/20], D Loss: 1.1245, G Loss: 1.8056
Epoch [13/20], D Loss: 0.9888, G Loss: 1.0640
Epoch [14/20], D Loss: 0.8746, G Loss: 1.1976
Epoch [15/20], D Loss: 1.3910, G Loss: 1.9657
Epoch [16/20], D Loss: 0.9957, G Loss: 1.3943
Epoch [17/20], D Loss: 0.9537, G Loss: 1.1455
Epoch [18/20], D Loss: 0.8622, G Loss: 1.4306
Epoch [19/20], D Loss: 0.8739, G Loss: 1.6641
Epoch [20/20], D Loss: 0.7860, G Loss: 1.3203
Epoch [1/20], D Loss: 0.7698, G Loss: 0.7929
Epoch [2/20], D Loss: 0.7729, G Loss: 0.8856