In [1]:
import torch.nn as nn
import torch 

In [2]:
print(torch.__version__)

2.4.1+cu124


In [3]:
print(torch.cuda.is_available())

True


In [4]:
print(torch.version.cuda)

12.4


In [5]:
tensor_cpu = torch.randn(3, 3)

In [6]:
torch.cuda.empty_cache()
tensor_gpu = tensor_cpu.to('cuda')

In [7]:
print(tensor_gpu.device)

cuda:0


In [8]:
import torchvision
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.utils.data as dataloader
from torch.utils.tensorboard import SummaryWriter


In [9]:
class Discriminator(nn.Module):
    def __init__(self, channels_img, features_d):
        super(Discriminator, self).__init__()
        self.net = nn.Sequential(
            # is the convention for Conv in pytorch N, channels, height, width?
            nn.Conv2d(channels_img, features_d, kernel_size=4, stride=2, padding=1), # what does padding 1 correspond to?
            nn.LeakyReLU(0.2),
            
            nn.Conv2d(features_d, features_d * 2, kernel_size=4, stride=2, padding=1), # why features_d for filters? Why features_d * 2?
            nn.BatchNorm2d(features_d * 2), # because GANS are known for being notoriously unstable during training -- why are GANS known for this?
            nn.LeakyReLU(0.2),
            
            nn.Conv2d(features_d *2, features_d * 4, kernel_size=4, stride=2, padding=1), 
            nn.BatchNorm2d(features_d * 4), 
            nn.LeakyReLU(0.2),
            nn.Conv2d(features_d * 4, features_d * 8, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(features_d * 8),
            nn.LeakyReLU(0.2),
            
            # N x features_d * 8 x 4 x 4
            nn.Conv2d(features_d * 8, 1, kernel_size=4, stride=2, padding=0),
            
            # N x 1 x 1 x 1
            nn.Sigmoid()
        )
        
    
    def forward(self, x):
        return self.net(x)
    


In [10]:
class Generator(nn.Module):
    def __init__(self, channels_noise, channels_img, features_g):
        super(Generator, self).__init__()
        
        self.net = nn.Sequential(
            
            # N x channels_noise x 1 x 1
            nn.ConvTranspose2d(channels_noise, features_g * 16, kernel_size=4, stride=1, padding=0), 
            nn.BatchNorm2d(features_g * 16),
            nn.ReLU(),
            
            # N x features_g * 16 x 4 x 4
            nn.ConvTranspose2d(features_g * 16, features_g * 8, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(features_g * 8),
            nn.ReLU(),
            
            nn.ConvTranspose2d(features_g * 8, features_g* 4, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(features_g * 4),
            nn.ReLU(),
            
            nn.ConvTranspose2d(features_g * 4, features_g * 2, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(features_g * 2),
            nn.ReLU(),
            
            nn.ConvTranspose2d(features_g * 2, channels_img, kernel_size=4, stride=2, padding=1),
            
            # N x channels_img # 64 x 64
            nn.Tanh()
        )
        
    def forward(self, x):
        return self.net(x)

In [11]:
lr = 0.0002
batch_size = 64
image_size = 64 # 28 x 28 >>> 64x64
channels_img = 1
channels_noise = 256

features_d = 16 # was set at 64 in the paper but not needed for mnist might for celebrity faces though
features_g = 16 # was set at 64 in the paper but not needed for mnist might for celebrity faces though

In [12]:
my_transforms = transforms.Compose([ # what does the transforms do in pytorch??
    transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
])

In [13]:
dataset = datasets.MNIST(root='dataset/', train=True, transform=my_transforms, download=True) # seems to download the specified dataset to my directory
my_dataloader  = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True) # not sure what's going on thought I already imported this?


In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [15]:
# create discriminator and generator

netD = Discriminator(channels_img, features_d).to(device) #  Iguess every model has to be specified to a device? So I could run some on different gpus or my cpu? 
netG = Generator(channels_noise, channels_img, features_g).to(device)

In [16]:
# Setup Optimizer for G and D
optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(0.5, 0.999)) # why are we specifying? What is the default betas value? 0.9 and 0.999?
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(0.5, 0.999))

In [17]:

netD.train() # what is the difference being in training mode from otherwise in pytorch?

Discriminator(
  (net): Sequential(
    (0): Conv2d(1, 16, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.2)
    (2): Conv2d(16, 32, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): LeakyReLU(negative_slope=0.2)
    (5): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): LeakyReLU(negative_slope=0.2)
    (8): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): LeakyReLU(negative_slope=0.2)
    (11): Conv2d(128, 1, kernel_size=(4, 4), stride=(2, 2))
    (12): Sigmoid()
  )
)

In [18]:
netG.train() # apparently the models should be in training mode by default but we're doing it explicitly

Generator(
  (net): Sequential(
    (0): ConvTranspose2d(256, 256, kernel_size=(4, 4), stride=(1, 1))
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (7): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
    (9): ConvTranspose2d(64, 32, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (10): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU()
    (12): ConvTranspose2d(32, 1, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (13): Tanh()
  )
)

In [19]:
criterion = nn.BCELoss() # binary cross entropy -- should probably ask why this specific loss function?

In [20]:
real_label = 1
fake_label = 0

In [21]:
fixed_noise = torch.randn(64, channels_noise, 1, 1).to(device)

In [22]:
from torchvision.transforms import ToPILImage
import torchvision.utils as vutils

In [23]:
print("starting training...")
num_epochs = 50
img_idx = 0

# writer_real = SummaryWriter(log_dir='runs/GAN_MNIST/log_real')
# writer_fake = SummaryWriter(log_dir='runs/GAN_MNIST/log_fake')
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(my_dataloader):
        data = data.to(device)
        batch_size = data.shape[0]
        # apparently it's important that we train the Discriminator first
        # Train Discriminator : max log (D(x)) + log(1-D(G(z)))
        # We send in all real images first
        netD.zero_grad()
        label = (torch.ones(batch_size)*0.9).to(device) # apparently 0.9 helps? Would like to look at that again
        output = netD(data).reshape(-1)
        
        lossD_real = criterion(output, label)
        D_x = output.mean().item() # for evaluation purposes, could do something similar with the other models to get 
        
        # now we send in all fake images to the discriminator
        noise = torch.randn(batch_size, channels_noise, 1, 1).to(device)
        fake = netG(noise)
        label = (torch.ones(batch_size)*0.1).to(device) # would what to look at this again
        
        output = netD(fake.detach()).reshape(-1) # telling pytorch not to trace the gradients? Not exactly sure, does that mean we're not training the generator? 
        lossD_fake = criterion(output, label)
        
        lossD = lossD_real + lossD_fake
        lossD.backward() # not sure what these are doing -- look this up
        optimizerD.step() # need to ask about this
        
        # Train Generator: min log(1 - D(G(z)))
        netG.zero_grad()
        label = torch.ones(batch_size).to(device) # not multiplying by 0.9 here
        output = netD(fake).reshape(-1) # we actually want to train the generator now? So we don't do detach?
        lossG = criterion(output, label)
        lossG.backward()
        optimizerG.step()
        
        if batch_idx % 100 == 0:
            print(f"Epoch [{epoch}/{num_epochs}] Batch {batch_idx}/{len(my_dataloader)} \
             Loss D: {lossD:.4f}, Loss G: {lossG:.4f}, D(x): {D_x:.4f}")
            
            with torch.no_grad(): # what is happening here?
                fake = netG(fixed_noise)
                
                img_grid_real = torchvision.utils.make_grid(data[:32], normalize=True)
                img_grid_fake = torchvision.utils.make_grid(fake[:32], normalize=True)
                # writer_real.add_image('MNIST Real Images', img_grid_real)
                # writer_fake.add_image('MNIST Fake Images', img_grid_fake)
                
                #since Tensorboard site is still being allowed, I'll provide a locall method of viewing the pictures
                to_pil = ToPILImage()
                
                # img_real = to_pil(img_grid_real)
                # img_real.save(f'../images/experiment2/b/real/real_images_grid_{img_idx}.png')
                
                img_fake = to_pil(img_grid_fake)
                img_fake.save(f'../images/experiment2/fake/c/fake_images_grid_{img_idx}.png')
                img_idx += 1
        

starting training...
Epoch [0/50] Batch 0/938              Loss D: 1.2699, Loss G: 0.9837, D(x): 0.5941
Epoch [0/50] Batch 100/938              Loss D: 0.6907, Loss G: 3.7084, D(x): 0.8761
Epoch [0/50] Batch 200/938              Loss D: 0.6689, Loss G: 3.0151, D(x): 0.8928
Epoch [0/50] Batch 300/938              Loss D: 0.7162, Loss G: 2.2155, D(x): 0.8244
Epoch [0/50] Batch 400/938              Loss D: 0.7185, Loss G: 2.7692, D(x): 0.9333
Epoch [0/50] Batch 500/938              Loss D: 0.7202, Loss G: 2.6382, D(x): 0.8880
Epoch [0/50] Batch 600/938              Loss D: 0.7331, Loss G: 2.2013, D(x): 0.8094
Epoch [0/50] Batch 700/938              Loss D: 1.1902, Loss G: 4.9518, D(x): 0.9389
Epoch [0/50] Batch 800/938              Loss D: 0.7607, Loss G: 1.6347, D(x): 0.7367
Epoch [0/50] Batch 900/938              Loss D: 0.8507, Loss G: 0.9849, D(x): 0.6666
Epoch [1/50] Batch 0/938              Loss D: 0.9161, Loss G: 2.8618, D(x): 0.8890
Epoch [1/50] Batch 100/938              Loss D: 

In [24]:
from torchvision.models import inception_v3
import torch.nn.functional as F

# Load pre-trained Inception v3 model
inception_model = inception_v3(pretrained=True, transform_input=False).eval().to(device)



In [25]:
def calculate_inception_score(rgb_images, inception_model, splits=10):
    # rgb_images = images.repeat(1, 3, 1, 1) # not necessary since I'm doing that before
    
    rgb_images_resized = F.interpolate(rgb_images, size=(299, 299), mode='bilinear', align_corners=False)
    
    
    with torch.no_grad():
        preds = inception_model(rgb_images_resized).softmax(dim=1)
        
    split_scores = []
    for i in range(splits):
        part = preds[i * (len(preds) // splits): ((i + 1) * len(preds) // splits), :]
        p_y = part.mean(dim=0)
        split_scores.append(torch.exp((part * (part.log() - p_y.log())).sum(dim=1).mean()))
        
    return torch.mean(torch.tensor(split_scores)), torch.std(torch.tensor(split_scores))

In [26]:
import numpy as np

In [27]:
from scipy import linalg

In [28]:
num_images = 32

first_batch = next(iter(my_dataloader))

real_images = first_batch[0]
real_images = real_images[:num_images]
real_images = (real_images + 1) / 2
rgb_real_images = real_images.repeat(1, 3, 1, 1)
rgb_real_images = rgb_real_images.to(device)
print("rgb_real_images shape: ", rgb_real_images.shape)

with torch.no_grad():
    noise = torch.randn(num_images, channels_noise, 1, 1).to(device)
    fake_images = netG(noise).to(device)

    fake_images = (fake_images + 1) / 2
    rgb_fake_images = fake_images.repeat(1, 3, 1, 1)
    
print("rgb_fake_images shape, ", rgb_fake_images.shape)
    

rgb_real_images shape:  torch.Size([32, 3, 64, 64])
rgb_fake_images shape,  torch.Size([32, 3, 64, 64])


In [29]:
def calculate_activation_statistics(images, model, dims=2048, batch_size=128):
    model.eval()
    act = np.empty((len(images), dims))
    
    batch = images.cuda()
    
    pred = model(batch)[0]
    
    # if pred.size(2) != 1 or pred.size(3) != 1:
    #     # pred = adaptive_avg_pool2d(pred, output_size=(1, 1))
    #     print("triggered weird check I don't have a function for")
        
    act = pred.cpu().data.numpy().reshape(pred.size(0), -1)
    
    mu = np.mean(act, axis=0)
    sigma = np.cov(act, rowvar=False)
    return mu, sigma
    
    

In [30]:
def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
    mu1 = np.atleast_1d(mu1)
    mu2 = np.atleast_1d(mu2)
    
    sigma1 = np.atleast_2d(sigma1)
    sigma2 = np.atleast_2d(sigma2)
    
    assert mu1.shape == mu2.shape, \
        'Training and test mean vectors have different legnths'
    assert sigma1.shape == sigma2.shape, \
        'Training and test covariances have different dimensions'
    
    diff = mu1 - mu2
    
    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
    
    # does something similar where it checks to see if the covmean is valid and same as the positive semi definite
    # it adds an epsilon, though at a different point
    if not np.isinf(covmean).all():
        print("frechet distance failed; adding to diagonal of cov estimates")
        offset = np.eye(sigma1.shape[0]) * eps
        
        covmean, _ = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset), disp=False)
        
    if np.iscomplexobj(covmean):
        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
            m = np.max(np.abs(covmean.imag))
            raise ValueError('Imaginary component {}'.format(m))
        covmean = covmean.real
        
    tr_covmean = np.trace(covmean)
    fid = (diff.dot(diff)) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean
    return fid

In [31]:
def calculate_fid_score(real_images, fake_images, model):
    mu1, sigma1 = calculate_activation_statistics(real_images, model)
    mu2, sigma2 = calculate_activation_statistics(fake_images, model)
    
    fid_value = calculate_frechet_distance(mu1, sigma1, mu2, sigma2)
    return fid_value

In [32]:
rgb_real_images.shape

torch.Size([32, 3, 64, 64])

In [33]:
def resize_images(images):
    return F.interpolate(images, size=(299,  299), mode='bilinear', align_corners=False)

In [34]:
resized_rgb_real_images = resize_images(rgb_real_images)
resized_rgb_fake_images = resize_images(rgb_fake_images)

print("resized rgb_real_images shape: ", resized_rgb_real_images.shape)
print("resized rgb_fake_images shape: ", resized_rgb_fake_images.shape)

resized rgb_real_images shape:  torch.Size([32, 3, 299, 299])
resized rgb_fake_images shape:  torch.Size([32, 3, 299, 299])


In [35]:
fid_score = calculate_fid_score(resized_rgb_real_images, resized_rgb_fake_images, inception_model)

frechet distance failed; adding to diagonal of cov estimates


In [36]:
fid_score

0.01880484571625196

In [37]:
inception_score, is_std = calculate_inception_score(rgb_fake_images, inception_model)

In [38]:
inception_score

tensor(1.5159)

In [39]:
np_is_score = inception_score.numpy()


with open('../observations/dcgan_experiment_2_observations.txt', "a") as file:
    file.write("Base_dcgan measurements\n")
    file.write(f"inception score {np.array2string(np_is_score)}\n")
    file.write(f"fid score {np.array2string(fid_score)}\n")
