In [2]:
import torch 
import torchvision 
import torch.nn as nn 
import torch.optim as optim 
import torchvision.transforms as transforms 
import torchvision.datasets as datasets 
from torch.utils.data import DataLoader 
from torch.utils.tensorboard import SummaryWriter 
from model_utils import Discriminator , Generator 

In [4]:
# hyperparameters 
lr = 0.0005 
batch_size = 64 
image_size = 64 
channels_img = 1 
channels_noise = 256
num_epochs = 10  

# for hoy many times the discriminator and generator is used 
features_d = 16 
features_g = 16 

my_transforms = transforms.Compose(
    [
        transforms.Resize(image_size), 
        transforms.ToTensor(), 
        transforms.Normalize((0.5,) , (0.5,))
    ]
)
my_transforms

Compose(
    Resize(size=64, interpolation=bilinear, max_size=None, antialias=None)
    ToTensor()
    Normalize(mean=(0.5,), std=(0.5,))
)

In [24]:
dataset = datasets.MNIST( 
    root="mnist/" , train=True , transform=my_transforms , download=False
) 
dataloader = DataLoader(dataset , batch_size=batch_size , shuffle=True) 

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [7]:
# Create discriminator and generator 
netD = Discriminator(channels_img , features_d).to(device) 
netG = Generator(channels_noise , channels_img , features_g).to(device)

In [13]:
print(netD)
print(netG)
print("====================")
print(netD.parameters)
print(netD.state_dict)
print(netD.to_empty)

Discriminator(
  (net): Sequential(
    (0): Conv2d(1, 16, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.2)
    (2): Conv2d(16, 32, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): LeakyReLU(negative_slope=0.2)
    (5): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): LeakyReLU(negative_slope=0.2)
    (8): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): LeakyReLU(negative_slope=0.2)
    (11): Conv2d(128, 1, kernel_size=(4, 4), stride=(1, 1))
    (12): Sigmoid()
  )
)
Generator(
  (net): Sequential(
    (0): ConvTranspose2d(256, 256, kernel_size=(4, 4), stride=(1, 1))
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, 

In [14]:
# Setup Optimizer for G and D
optimizerD = optim.Adam(netD.parameters() , lr=lr , betas=(0.5 , 0.999)) 
optimizerG = optim.Adam(netG.parameters() , lr=lr , betas=(0.5 , 0.999)) 

## save the model

In [15]:
# Access the model and optimizer state_dict
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in netD.state_dict():
    print(param_tensor, "\t", netD.state_dict()[param_tensor].size())

print()

# Print optimizer's state_dict
print("Optimizer's state_dict:")
for var_name in optimizerD.state_dict():
    print(var_name, "\t", optimizerD.state_dict()[var_name])

Model's state_dict:
net.0.weight 	 torch.Size([16, 1, 4, 4])
net.0.bias 	 torch.Size([16])
net.2.weight 	 torch.Size([32, 16, 4, 4])
net.2.bias 	 torch.Size([32])
net.3.weight 	 torch.Size([32])
net.3.bias 	 torch.Size([32])
net.3.running_mean 	 torch.Size([32])
net.3.running_var 	 torch.Size([32])
net.3.num_batches_tracked 	 torch.Size([])
net.5.weight 	 torch.Size([64, 32, 4, 4])
net.5.bias 	 torch.Size([64])
net.6.weight 	 torch.Size([64])
net.6.bias 	 torch.Size([64])
net.6.running_mean 	 torch.Size([64])
net.6.running_var 	 torch.Size([64])
net.6.num_batches_tracked 	 torch.Size([])
net.8.weight 	 torch.Size([128, 64, 4, 4])
net.8.bias 	 torch.Size([128])
net.9.weight 	 torch.Size([128])
net.9.bias 	 torch.Size([128])
net.9.running_mean 	 torch.Size([128])
net.9.running_var 	 torch.Size([128])
net.9.num_batches_tracked 	 torch.Size([])
net.11.weight 	 torch.Size([1, 128, 4, 4])
net.11.bias 	 torch.Size([1])

Optimizer's state_dict:
state 	 {}
param_groups 	 [{'lr': 0.0005, 'betas'

In [16]:
# Access the model and optimizer state_dict
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in netG.state_dict():
    print(param_tensor, "\t", netG.state_dict()[param_tensor].size())

print()

# Print optimizer's state_dict
print("Optimizer's state_dict:")
for var_name in optimizerG.state_dict():
    print(var_name, "\t", optimizerG.state_dict()[var_name])

Model's state_dict:
net.0.weight 	 torch.Size([256, 256, 4, 4])
net.0.bias 	 torch.Size([256])
net.1.weight 	 torch.Size([256])
net.1.bias 	 torch.Size([256])
net.1.running_mean 	 torch.Size([256])
net.1.running_var 	 torch.Size([256])
net.1.num_batches_tracked 	 torch.Size([])
net.3.weight 	 torch.Size([256, 128, 4, 4])
net.3.bias 	 torch.Size([128])
net.4.weight 	 torch.Size([128])
net.4.bias 	 torch.Size([128])
net.4.running_mean 	 torch.Size([128])
net.4.running_var 	 torch.Size([128])
net.4.num_batches_tracked 	 torch.Size([])
net.6.weight 	 torch.Size([128, 64, 4, 4])
net.6.bias 	 torch.Size([64])
net.7.weight 	 torch.Size([64])
net.7.bias 	 torch.Size([64])
net.7.running_mean 	 torch.Size([64])
net.7.running_var 	 torch.Size([64])
net.7.num_batches_tracked 	 torch.Size([])
net.9.weight 	 torch.Size([64, 32, 4, 4])
net.9.bias 	 torch.Size([32])
net.10.weight 	 torch.Size([32])
net.10.bias 	 torch.Size([32])
net.10.running_mean 	 torch.Size([32])
net.10.running_var 	 torch.Size([3

In [17]:
PATH = "DCGANS.pth" 
torch.save({
    "model_state_dictD" : netD.state_dict(), 
    "optimizer_state_dictD" : optimizerD.state_dict(),
    "model_state_dictG" : netG.state_dict(), 
    "optimizer_state_dictG" : optimizerG.state_dict() 
} , PATH)


## load model

In [16]:
loaded_netD = Discriminator(channels_img , features_d).to(device) 
loaded_netG = Generator(channels_noise , channels_img , features_g).to(device)
loaded_optimizerD = optim.Adam(loaded_netD.parameters() , lr=lr , betas=(0.5 , 0.999)) 
loaded_optimizerG = optim.Adam(loaded_netG.parameters() , lr=lr , betas=(0.5 , 0.999)) 

In [17]:
PATH = "DCGANS.pth" 
checkpoint = torch.load(PATH) 
device = torch.device("cuda")
loaded_netD.load_state_dict(checkpoint["model_state_dictD"])
loaded_netG.load_state_dict(checkpoint["model_state_dictG"])
loaded_optimizerD.load_state_dict(checkpoint["optimizer_state_dictD"]) 
loaded_optimizerG.load_state_dict(checkpoint["optimizer_state_dictG"])

In [18]:
print(loaded_netD.eval())
print()
print(loaded_netG.eval())

Discriminator(
  (net): Sequential(
    (0): Conv2d(1, 16, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.2)
    (2): Conv2d(16, 32, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): LeakyReLU(negative_slope=0.2)
    (5): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): LeakyReLU(negative_slope=0.2)
    (8): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): LeakyReLU(negative_slope=0.2)
    (11): Conv2d(128, 1, kernel_size=(4, 4), stride=(1, 1))
    (12): Sigmoid()
  )
)

Generator(
  (net): Sequential(
    (0): ConvTranspose2d(256, 256, kernel_size=(4, 4), stride=(1, 1))
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.1,

In [19]:
print(loaded_optimizerD)
print()
print(loaded_optimizerG)

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.5, 0.999)
    eps: 1e-08
    lr: 0.0005
    maximize: False
    weight_decay: 0
)

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.5, 0.999)
    eps: 1e-08
    lr: 0.0005
    maximize: False
    weight_decay: 0
)


In [21]:
loaded_netD.train()
loaded_netG.train()

Generator(
  (net): Sequential(
    (0): ConvTranspose2d(256, 256, kernel_size=(4, 4), stride=(1, 1))
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (7): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
    (9): ConvTranspose2d(64, 32, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (10): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU()
    (12): ConvTranspose2d(32, 1, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (13): Tanh()
  )
)

In [22]:
criterion = nn.BCELoss() 
real_label =1 
fake_label = 0

fixed_noise = torch.randn(64 , channels_noise , 1 , 1).to(device=device) 
writer_real = SummaryWriter(f"runs/GAN_MNIST/test_real") 
writer_fake = SummaryWriter(f"runs/GAN_MNIST/test_fake")  
step = 0

# training

In [30]:
for epoch in range(num_epochs): 
    for batch_idx , (data , targets) in enumerate(dataloader):
        data = data.to(device) 
        batch_size = data.shape[0]

        # train discriminator : max log(D(x)) + log(1 - D(G(z))) 
        loaded_netD.zero_grad() 
        label = (torch.ones(batch_size) * 0.9).to(device) 
        output = loaded_netD(data).reshape(-1) 
        lossD_real = criterion(output , label) 
        D_x = output.mean().item() 

        noise = torch.randn(batch_size , channels_noise , 1 ,1).to(device) 
        fake = loaded_netG(noise) 
        label = (torch.ones(batch_size) * 0.1).to(device)

        output = loaded_netD(fake.detach()).reshape(-1) 
        lossD_fake = criterion(output , label) 

        lossD = lossD_real + lossD_fake
        lossD.backward() 
        loaded_optimizerD.step() 

        # train generator : max log(D(G(z))) 
        loaded_netG.zero_grad() 
        label = torch.ones(batch_size).to(device) 
        output = loaded_netD(fake).reshape(-1) 
        lossG = criterion(output , label) 
        lossG.backward() 
        loaded_optimizerG.step()  

        # print losses ocassionally and print to tensorboard 
        if batch_idx % 100 == 0: 
            step += 1 
            print( 
                f"Epoch [{epoch}/{num_epochs}]\n Batch [{batch_idx}/{len(dataloader)}]\n Loss D : {lossD:.4f} , loss G : {lossG:.4f} D(x) : {D_x:.4f}" 
            )

            with torch.no_grad():
                fake = loaded_netG(fixed_noise) 
                img_grid_real = torchvision.utils.make_grid(data[:32] , normalize=True)
                img_grid_fake = torchvision.utils.make_grid(fake[:32] , normalize=True) 
                writer_real.add_image(
                    "MNIST Real Image" , img_grid_real , global_step=step
                )
                writer_fake.add_image(
                    "MNIST Fake Image" , img_grid_fake , global_step=step
                )


Epoch [0/10]
 Batch [0/938]
 Loss D : 1.6801 , loss G : 0.9684 D(x) : 0.8062
Epoch [0/10]
 Batch [100/938]
 Loss D : 0.8226 , loss G : 2.8311 D(x) : 0.6771
Epoch [0/10]
 Batch [200/938]
 Loss D : 1.0125 , loss G : 1.8210 D(x) : 0.7029
Epoch [0/10]
 Batch [300/938]
 Loss D : 1.1409 , loss G : 1.8153 D(x) : 0.8363
Epoch [0/10]
 Batch [400/938]
 Loss D : 0.9591 , loss G : 1.1022 D(x) : 0.6452
Epoch [0/10]
 Batch [500/938]
 Loss D : 1.0623 , loss G : 0.9418 D(x) : 0.5143
Epoch [0/10]
 Batch [600/938]
 Loss D : 1.0656 , loss G : 1.2515 D(x) : 0.7072
Epoch [0/10]
 Batch [700/938]
 Loss D : 1.0725 , loss G : 1.1241 D(x) : 0.5796
Epoch [0/10]
 Batch [800/938]
 Loss D : 1.4796 , loss G : 3.5061 D(x) : 0.9161
Epoch [0/10]
 Batch [900/938]
 Loss D : 0.9189 , loss G : 1.4611 D(x) : 0.7424
Epoch [1/10]
 Batch [0/938]
 Loss D : 0.9991 , loss G : 1.1838 D(x) : 0.6449
Epoch [1/10]
 Batch [100/938]
 Loss D : 0.8582 , loss G : 1.3547 D(x) : 0.6976
Epoch [1/10]
 Batch [200/938]
 Loss D : 0.8539 , loss G 

In [32]:
loaded_netD.state_dict

<bound method Module.state_dict of Discriminator(
  (net): Sequential(
    (0): Conv2d(1, 16, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.2)
    (2): Conv2d(16, 32, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): LeakyReLU(negative_slope=0.2)
    (5): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): LeakyReLU(negative_slope=0.2)
    (8): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): LeakyReLU(negative_slope=0.2)
    (11): Conv2d(128, 1, kernel_size=(4, 4), stride=(1, 1))
    (12): Sigmoid()
  )
)>

### save the trained model

In [33]:
PATH = "DCGANS.pth"
torch.save({
    "model_state_dictD" : loaded_netD.state_dict(), 
    "optimizer_state_dictD" : loaded_optimizerD.state_dict(),
    "model_state_dictG" : loaded_netG.state_dict(), 
    "optimizer_state_dictG" : loaded_optimizerG.state_dict() 
} , PATH)

# load the saved model and resume the training again

In [34]:
checkpoint = torch.load(PATH) 
device = torch.device("cuda")
loaded_netD.load_state_dict(checkpoint["model_state_dictD"])
loaded_netG.load_state_dict(checkpoint["model_state_dictG"])
loaded_optimizerD.load_state_dict(checkpoint["optimizer_state_dictD"]) 
loaded_optimizerG.load_state_dict(checkpoint["optimizer_state_dictG"])

In [35]:
criterion = nn.BCELoss() 
real_label =1 
fake_label = 0

fixed_noise = torch.randn(64 , channels_noise , 1 , 1).to(device=device) 
writer_real = SummaryWriter(f"runs/GAN_MNIST/test_real") 
writer_fake = SummaryWriter(f"runs/GAN_MNIST/test_fake")  
step = 0

In [36]:
# hyperparameters 
lr = 0.0005  
image_size = 64 
channels_img = 1 
channels_noise = 256
new_epoch = 5 
batch_size = 64 
num_epochs = new_epoch 

for epoch in range(num_epochs): 
    for batch_idx , (data , targets) in enumerate(dataloader):
        data = data.to(device) 
        batch_size = data.shape[0]

        # train discriminator : max log(D(x)) + log(1 - D(G(z))) 
        loaded_netD.zero_grad() 
        label = (torch.ones(batch_size) * 0.9).to(device) 
        output = loaded_netD(data).reshape(-1) 
        lossD_real = criterion(output , label) 
        D_x = output.mean().item() 

        noise = torch.randn(batch_size , channels_noise , 1 ,1).to(device) 
        fake = loaded_netG(noise) 
        label = (torch.ones(batch_size) * 0.1).to(device)

        output = loaded_netD(fake.detach()).reshape(-1) 
        lossD_fake = criterion(output , label) 

        lossD = lossD_real + lossD_fake
        lossD.backward() 
        loaded_optimizerD.step() 

        # train generator : max log(D(G(z))) 
        loaded_netG.zero_grad() 
        label = torch.ones(batch_size).to(device) 
        output = loaded_netD(fake).reshape(-1) 
        lossG = criterion(output , label) 
        lossG.backward() 
        loaded_optimizerG.step()  

        # print losses ocassionally and print to tensorboard 
        if batch_idx % 100 == 0: 
            step += 1 
            print( 
                f"Epoch [{epoch}/{num_epochs}]\n Batch [{batch_idx}/{len(dataloader)}]\n Loss D : {lossD:.4f} , loss G : {lossG:.4f} D(x) : {D_x:.4f}" 
            )

            with torch.no_grad():
                fake = loaded_netG(fixed_noise) 
                img_grid_real = torchvision.utils.make_grid(data[:32] , normalize=True)
                img_grid_fake = torchvision.utils.make_grid(fake[:32] , normalize=True) 
                writer_real.add_image(
                    "MNIST Real Image" , img_grid_real , global_step=step
                )
                writer_fake.add_image(
                    "MNIST Fake Image" , img_grid_fake , global_step=step
                )


Epoch [0/5]
 Batch [0/938]
 Loss D : 0.6636 , loss G : 2.3946 D(x) : 0.9013
Epoch [0/5]
 Batch [100/938]
 Loss D : 0.6770 , loss G : 2.4668 D(x) : 0.9242
Epoch [0/5]
 Batch [200/938]
 Loss D : 0.6897 , loss G : 2.3000 D(x) : 0.8605
Epoch [0/5]
 Batch [300/938]
 Loss D : 0.6656 , loss G : 2.6390 D(x) : 0.8963
Epoch [0/5]
 Batch [400/938]
 Loss D : 0.6693 , loss G : 2.1864 D(x) : 0.8631
Epoch [0/5]
 Batch [500/938]
 Loss D : 0.6971 , loss G : 2.8882 D(x) : 0.9380
Epoch [0/5]
 Batch [600/938]
 Loss D : 0.6704 , loss G : 1.8668 D(x) : 0.8578
Epoch [0/5]
 Batch [700/938]
 Loss D : 0.9962 , loss G : 3.1742 D(x) : 0.8996
Epoch [0/5]
 Batch [800/938]
 Loss D : 0.8058 , loss G : 2.8210 D(x) : 0.9100
Epoch [0/5]
 Batch [900/938]
 Loss D : 0.7034 , loss G : 2.4620 D(x) : 0.8937
Epoch [1/5]
 Batch [0/938]
 Loss D : 0.6880 , loss G : 1.6849 D(x) : 0.8296
Epoch [1/5]
 Batch [100/938]
 Loss D : 0.6873 , loss G : 2.1649 D(x) : 0.8431
Epoch [1/5]
 Batch [200/938]
 Loss D : 2.0462 , loss G : 0.2729 D(x)

# Analysis 

*1st training*

- [ Epoch 0 ] : Loss D : 0.9189 , loss G : 1.4611 D(x) : 0.7424 
- [ Epoch 1 ] : Loss D : 0.8110 , loss G : 1.7509 D(x) : 0.6906
- [ Epoch 2 ] : Loss D : 0.7487 , loss G : 1.6760 D(x) : 0.8071
- [ Epoch 3 ] : Loss D : 0.7806 , loss G : 2.0930 D(x) : 0.7306
- [ Epoch 4 ] : Loss D : 0.6992 , loss G : 2.2726 D(x) : 0.8391
- [ Epoch 5 ] : Loss D : 0.7156 , loss G : 2.5960 D(x) : 0.9119
- [ Epoch 6 ] : Loss D : 0.7528 , loss G : 2.9606 D(x) : 0.9242
- [ Epoch 7 ] : Loss D : 0.6711 , loss G : 2.3881 D(x) : 0.9012
- [ Epoch 8 ] : Loss D : 0.7655 , loss G : 1.4169 D(x) : 0.7406
- [ Epoch 9 ] : Loss D : 0.6765 , loss G : 2.8193 D(x) : 0.8996  

*2nd training*

- [Epoch 1] : Loss D : 0.7034 , loss G : 2.4620 D(x) : 0.8937
- [Epoch 2] : Loss D : 0.6707 , loss G : 2.2625 D(x) : 0.8762
- [Epoch 3] : Loss D : 0.6790 , loss G : 2.3214 D(x) : 0.8544
- [Epoch 4] : Loss D : 0.7046 , loss G : 1.9998 D(x) : 0.8115
- [Epoch 5] : Loss D : 0.7857 , loss G : 1.6160 D(x) : 0.7198