In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision import models
from torch.utils.data import DataLoader, Subset
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
import random
import os

In [2]:
import math
import argparse

In [3]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [4]:
transform_test = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4865, 0.4409), (0.2673, 0.2564, 0.2762)),
])

In [5]:
test_dataset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test)

Files already downloaded and verified


In [12]:
teacher_weights_path = "best_resnet34_cifar100.pth"

In [13]:
test_indices = list(range(len(test_dataset)))
test_split_10 = test_indices[:len(test_indices)//10]
test_split_20 = test_indices[:len(test_indices)//5]

test_dataset_10 = Subset(test_dataset, test_split_10)
test_dataset_20 = Subset(test_dataset, test_split_20)

In [14]:
test_loader_10 = DataLoader(test_dataset_10, batch_size=100, shuffle=False, num_workers=2)
test_loader_20 = DataLoader(test_dataset_20, batch_size=100, shuffle=False, num_workers=2)
test_loader_full = DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=2)

In [15]:
def load_teacher_model(weights_path):
    teacher = models.resnet34(pretrained=False)
    teacher.fc = nn.Linear(teacher.fc.in_features, 100)
    teacher.load_state_dict(torch.load(weights_path, map_location=device))
    teacher = teacher.to(device)
    teacher.eval()  # Set to evaluation mode
    return teacher

In [16]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters())

In [17]:
teacher_model = load_teacher_model(teacher_weights_path)
teacher_params = count_parameters(teacher_model)
print(f"Teacher model total parameters: {teacher_params}")

Teacher model total parameters: 21335972


In [18]:
print(teacher_model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [19]:
class Flatten(nn.Module):
    def __init__(self):
        super(Flatten, self).__init__()

    def forward(self, x):
        return x.view(x.shape[0], -1)

class GeneratorA(nn.Module):
    def __init__(self, nz=100, ngf=64, nc=1, img_size=32):
        super(GeneratorA, self).__init__()

        self.init_size = img_size//4
        self.l1 = nn.Sequential(nn.Linear(nz, ngf*2*self.init_size**2))

        self.conv_blocks0 = nn.Sequential(
            nn.BatchNorm2d(ngf*2),
        )
        self.conv_blocks1 = nn.Sequential(
            nn.Conv2d(ngf*2, ngf*2, 3, stride=1, padding=1),
            nn.BatchNorm2d(ngf*2),
            nn.LeakyReLU(0.2, inplace=True),
        )
        self.conv_blocks2 = nn.Sequential(
            nn.Conv2d(ngf*2, ngf, 3, stride=1, padding=1),
            nn.BatchNorm2d(ngf),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ngf, nc, 3, stride=1, padding=1),
            nn.Tanh(),
            nn.BatchNorm2d(nc, affine=False)
        )

    def forward(self, z):
        out = self.l1(z.view(z.shape[0],-1))
        out = out.view(out.shape[0], -1, self.init_size, self.init_size)
        img = self.conv_blocks0(out)
        img = nn.functional.interpolate(img,scale_factor=2)
        img = self.conv_blocks1(img)
        img = nn.functional.interpolate(img,scale_factor=2)
        img = self.conv_blocks2(img)
        return img

In [20]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion * planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion * planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=100):  # ✅ Changed to 100 for CIFAR-100
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)

        self.linear = nn.Linear(512 * block.expansion, num_classes)

        # Initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x, out_feature=False):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.adaptive_avg_pool2d(out, 1)  # ✅ Replaced fixed pooling with adaptive
        feature = out.view(out.size(0), -1)
        out = self.linear(feature)
        if not out_feature:
            return out
        else:
            return out, feature


def ResNet18_8x(num_classes=100):
    return ResNet(BasicBlock, [2, 2, 2, 2], num_classes)


In [21]:
def pack_images(images, col=None, channel_last=False):
    # N, C, H, W
    if isinstance(images, (list, tuple) ):
        images = np.stack(images, 0)
    if channel_last:
        images = images.transpose(0,3,1,2) # make it channel first
    assert len(images.shape)==4
    assert isinstance(images, np.ndarray)

    N,C,H,W = images.shape
    if col is None:
        col = int(math.ceil(math.sqrt(N)))
    row = int(math.ceil(N / col))
    pack = np.zeros( (C, H*row, W*col), dtype=images.dtype )
    for idx, img in enumerate(images):
        h = (idx//col) * H
        w = (idx% col) * W
        pack[:, h:h+H, w:w+W] = img
    return pack


def denormalize(tensor, mean, std):
    _mean = [ -m / s for m, s in zip(mean, std) ]
    _std = [ 1/s for s in std ]

    _mean = torch.as_tensor(_mean, dtype=tensor.dtype, device=tensor.device)
    _std = torch.as_tensor(_std, dtype=tensor.dtype, device=tensor.device)
    tensor.sub_(_mean[None, :, None, None]).div_(_std[None, :, None, None])
    return tensor

    #torchvision.transforms.functional.normalize
    #return normalize( tensor, _mean, _std ) #torchvision.transforms.functional.normalize(tensor, _mean, _std)

In [22]:
student = ResNet18_8x(num_classes=100)

In [23]:
student = student.to(device)

In [38]:
class Args:
    def __init__(self):
        # Training Hyperparameters
        self.lr_G = 1e-3  # Reference default LR for Generator
        self.lr_S = 0.1   # Reference default LR for Student
        self.epochs = 100 # Reference default epochs
        self.epoch_itrs = 50 # Reference iterations per epoch
        self.batch_size = 256 # Reference default batch size
        self.nz = 256     # Reference default latent vector size
        self.weight_decay = 5e-4 # Reference default weight decay for SGD
        self.momentum = 0.9    # Reference default momentum for SGD

        # Model and Data
        self.num_classes = 100 # For CIFAR-100
        self.img_size = 32   # Assuming generator outputs 32x32 for CIFAR

        # Scheduler
        self.scheduler = True # Use scheduler like in reference
        self.milestones = [100, 200] # Example milestones for MultiStepLR (adjust as needed)
        self.lr_gamma = 0.1 # Multiplicative factor for scheduler

        # Saving and Logging
        self.save_dir = './checkpoints_dfad_cifar100' # Directory to save results
        self.log_interval = 10 # How often to print logs within an epoch (ref default)
        self.vis_interval = 100 # How often to save generated images

In [39]:
args = Args()
os.makedirs(args.save_dir, exist_ok=True)

In [40]:
generator = GeneratorA(nz=args.nz, nc=3, img_size=32).to(device)
print(f"Generator parameters: {count_parameters(generator)}")

Generator parameters: 2329091


In [41]:
optimizer_S = optim.SGD( student.parameters(), lr=args.lr_S, weight_decay=args.weight_decay, momentum=0.9 )
optimizer_G = optim.Adam( generator.parameters(), lr=args.lr_G )

In [42]:
scheduler_G = optim.lr_scheduler.StepLR(optimizer_G, step_size=30, gamma=0.5)
scheduler_S = optim.lr_scheduler.StepLR(optimizer_S, step_size=30, gamma=0.5)

In [43]:
criterion_cls = nn.CrossEntropyLoss().to(device)
criterion_mse = nn.MSELoss().to(device)

In [44]:
import torch.nn.functional as F

# Testing function
def test(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    accuracy = 100. * correct / total
    return accuracy

In [45]:
import torch.nn.functional as F

In [46]:
cifar100_mean = (0.5071, 0.4865, 0.4409)
cifar100_std = (0.2673, 0.2564, 0.2762)

In [47]:
criterion_l1 = nn.L1Loss().to(device)

In [48]:
def train(epoch):
    teacher_model.eval() # Teacher is fixed
    student.train()
    generator.train()

    total_loss_s = 0.0
    total_loss_g = 0.0

    # Use tqdm for the iterations within an epoch
    pbar = tqdm(range(args.epoch_itrs), desc=f"Epoch {epoch}/{args.epochs}")
    for i in pbar:
        # --- Train Student (k=5 times) ---
        current_loss_s = 0.0
        for _ in range(5): # Reference trains student 5 times per generator step
            # Generate latent vectors
            z = torch.randn(args.batch_size, args.nz).to(device) # Assuming generator takes (Batch, nz)
            # z = torch.randn(args.batch_size, args.nz, 1, 1).to(device) # Use this if generator takes (Batch, nz, 1, 1)

            optimizer_S.zero_grad()

            # Generate fake images and detach from generator's graph
            with torch.no_grad():
                fake_images = generator(z).detach()
                # Optional: Resize fake_images if teacher expects different size
                # fake_images_resized = F.interpolate(fake_images, size=(224, 224), mode='bilinear', align_corners=False)
                # t_logits = teacher(fake_images_resized).detach()
                t_logits = teacher_model(fake_images).detach() # Get teacher logits (detached)

            # Get student logits
            s_logits = student(fake_images)

            # Calculate L1 loss between student and teacher logits
            loss_S = criterion_l1(s_logits, t_logits)
            loss_S.backward()
            optimizer_S.step()
            current_loss_s += loss_S.item()

        avg_loss_s_batch = current_loss_s / 5 # Average student loss over the 5 steps
        total_loss_s += avg_loss_s_batch

        # --- Train Generator ---
        # Generate new latent vectors
        z = torch.randn(args.batch_size, args.nz).to(device)
        # z = torch.randn(args.batch_size, args.nz, 1, 1).to(device) # If generator takes (Batch, nz, 1, 1)

        optimizer_G.zero_grad()

        # Generate fake images (DO NOT detach for generator training)
        fake_images = generator(z)
        # Optional: Resize if teacher expects different size
        # fake_images_resized = F.interpolate(fake_images, size=(224, 224), mode='bilinear', align_corners=False)

        # Get teacher and student logits
        with torch.no_grad(): # Teacher is fixed, no need for gradients
             # t_logits = teacher(fake_images_resized)
             t_logits = teacher_model(fake_images)
        s_logits = student(fake_images) # Student grads needed for generator update

        # Calculate Generator loss: Negative L1 distance (maximize difference from teacher)
        # The reference uses -L1 loss. Maximizing -L1 is equivalent to minimizing L1 in this adversarial setup.
        # Let's stick to the reference: -F.l1_loss(s_logit, t_logit)
        loss_G = -criterion_l1(s_logits, t_logits)
        loss_G.backward()
        optimizer_G.step()
        total_loss_g += loss_G.item()

        # --- Logging ---
        if i % args.log_interval == 0:
            pbar.set_postfix({
                "Loss_G": loss_G.item(),
                "Loss_S (avg)": avg_loss_s_batch,
                "LR_S": optimizer_S.param_groups[0]['lr'],
                "LR_G": optimizer_G.param_groups[0]['lr']
            })

        # --- Visualize Images Periodically ---
                # --- Visualize Images Periodically ---
        if i % args.vis_interval == 0:
            with torch.no_grad():
                generator.eval() # Set generator to eval for visualization
                vis_z = torch.randn(16, args.nz).to(device)
                # vis_z = torch.randn(16, args.nz, 1, 1).to(device) # If needed
                fake_images_sample = generator(vis_z)
                # !!! PROBLEM: denormalize function provided earlier might be incorrect or modify in place !!!
                # Let's redefine a safer denormalize just for visualization
                def denormalize_for_vis(tensor): # Assumes input is raw [-1, 1] from Tanh
                    return ((tensor + 1) / 2).clamp(0, 1) # Convert to [0, 1]

                fake_images_display = denormalize_for_vis(fake_images_sample)

                # Convert to numpy BEFORE pack_images
                images_np = fake_images_display.detach().cpu().numpy() # Detach, move to CPU, convert to NumPy

                grid = pack_images(images_np) # Pass the numpy array

                plt.figure(figsize=(8, 8)) # Smaller figure size
                plt.imshow(np.transpose(grid, (1, 2, 0))) # Transpose C, H, W -> H, W, C
                plt.title(f"Epoch {epoch}, Iter {i}")
                plt.axis('off')
                plt.savefig(f"{args.save_dir}/images_epoch_{epoch}_iter_{i}.png")
                plt.close()
                generator.train() # Set generator back to train mode

    avg_loss_g_epoch = total_loss_g / args.epoch_itrs
    avg_loss_s_epoch = total_loss_s / args.epoch_itrs
    print(f"Epoch {epoch} Avg Losses -> G: {avg_loss_g_epoch:.4f}, S: {avg_loss_s_epoch:.4f}")
    return avg_loss_g_epoch, avg_loss_s_epoch

In [49]:
def evaluate():
    test_acc_10 = test(student, test_loader_10)
    test_acc_20 = test(student, test_loader_20)
    test_acc_full = test(student, test_loader_full)

    print(f"Test accuracy (10%): {test_acc_10:.2f}%")
    print(f"Test accuracy (20%): {test_acc_20:.2f}%")
    print(f"Test accuracy (full): {test_acc_full:.2f}%")

    return test_acc_10, test_acc_20, test_acc_full

In [50]:
best_acc = 0
losses_g = []
losses_s = []
accuracies = []

In [51]:
print("Starting DFAD training...")
for epoch in range(1, args.epochs + 1):
    loss_g, loss_s = train(epoch)
    losses_g.append(loss_g)
    losses_s.append(loss_s)

    # Step the schedulers after each epoch
    if args.scheduler:
        scheduler_S.step()
        scheduler_G.step()

    # Evaluate the student model periodically (e.g., every epoch or few epochs)
    current_acc = test(student, test_loader_full)
    accuracies.append(current_acc)

    # Save checkpoint if better than the best accuracy found so far
    if current_acc > best_acc:
        print(f"*** New best accuracy: {current_acc:.2f}% (Epoch {epoch}) ***")
        best_acc = current_acc
        torch.save({
            'epoch': epoch,
            'generator_state_dict': generator.state_dict(),
            'student_state_dict': student.state_dict(),
            'optimizer_G_state_dict': optimizer_G.state_dict(),
            'optimizer_S_state_dict': optimizer_S.state_dict(),
            'acc': best_acc,
            'args': args.__dict__ # Save args for reproducibility
        }, f"{args.save_dir}/best_model_epoch_{epoch}_acc_{best_acc:.2f}.pth")
        # Also save a generic 'best_model.pth' for easy loading
        torch.save({
            'generator_state_dict': generator.state_dict(),
            'student_state_dict': student.state_dict(),
        }, f"{args.save_dir}/best_model.pth")


    # --- Plotting Progress ---
    # Plot losses and accuracy (optional: plot every few epochs to save time)
    if epoch % 5 == 0 or epoch == args.epochs: # Plot every 5 epochs and at the end
        plt.figure(figsize=(18, 5))

        plt.subplot(1, 3, 1)
        plt.plot(losses_g, label='Generator Loss (-L1)')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.title('Generator Loss over Epochs')
        plt.legend()
        plt.grid(True)

        plt.subplot(1, 3, 2)
        plt.plot(losses_s, label='Student Loss (L1)')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.title('Student Loss over Epochs')
        plt.legend()
        plt.grid(True)

        plt.subplot(1, 3, 3)
        plt.plot(accuracies, label='Student Test Accuracy')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy (%)')
        plt.title('Student Accuracy over Epochs')
        plt.legend()
        plt.grid(True)

        plt.tight_layout()
        plt.savefig(f"{args.save_dir}/training_progress.png")
        plt.close()

print(f"Training complete! Best Test Accuracy: {best_acc:.2f}%")


Starting DFAD training...


Epoch 1/100: 100%|██████████| 50/50 [00:16<00:00,  3.07it/s, Loss_G=-0.223, Loss_S (avg)=0.223, LR_S=0.1, LR_G=0.001]

Epoch 1 Avg Losses -> G: -0.2448, S: 0.2527





*** New best accuracy: 1.44% (Epoch 1) ***


Epoch 2/100: 100%|██████████| 50/50 [00:16<00:00,  2.97it/s, Loss_G=-0.213, Loss_S (avg)=0.209, LR_S=0.1, LR_G=0.001]

Epoch 2 Avg Losses -> G: -0.2154, S: 0.2156





*** New best accuracy: 1.57% (Epoch 2) ***


Epoch 3/100: 100%|██████████| 50/50 [00:16<00:00,  2.96it/s, Loss_G=-0.201, Loss_S (avg)=0.196, LR_S=0.1, LR_G=0.001]

Epoch 3 Avg Losses -> G: -0.1997, S: 0.1988



Epoch 4/100: 100%|██████████| 50/50 [00:16<00:00,  2.96it/s, Loss_G=-0.187, Loss_S (avg)=0.184, LR_S=0.1, LR_G=0.001]

Epoch 4 Avg Losses -> G: -0.1909, S: 0.1906





*** New best accuracy: 1.65% (Epoch 4) ***


Epoch 5/100: 100%|██████████| 50/50 [00:16<00:00,  2.96it/s, Loss_G=-0.176, Loss_S (avg)=0.174, LR_S=0.1, LR_G=0.001]

Epoch 5 Avg Losses -> G: -0.1768, S: 0.1762



Epoch 6/100: 100%|██████████| 50/50 [00:16<00:00,  2.97it/s, Loss_G=-0.169, Loss_S (avg)=0.171, LR_S=0.1, LR_G=0.001]

Epoch 6 Avg Losses -> G: -0.1710, S: 0.1713



Epoch 7/100: 100%|██████████| 50/50 [00:16<00:00,  2.96it/s, Loss_G=-0.163, Loss_S (avg)=0.167, LR_S=0.1, LR_G=0.001]

Epoch 7 Avg Losses -> G: -0.1675, S: 0.1680



Epoch 8/100: 100%|██████████| 50/50 [00:16<00:00,  2.97it/s, Loss_G=-0.166, Loss_S (avg)=0.164, LR_S=0.1, LR_G=0.001]

Epoch 8 Avg Losses -> G: -0.1644, S: 0.1645



Epoch 9/100: 100%|██████████| 50/50 [00:16<00:00,  2.97it/s, Loss_G=-0.161, Loss_S (avg)=0.162, LR_S=0.1, LR_G=0.001]

Epoch 9 Avg Losses -> G: -0.1594, S: 0.1594



Epoch 10/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.158, Loss_S (avg)=0.155, LR_S=0.1, LR_G=0.001]

Epoch 10 Avg Losses -> G: -0.1566, S: 0.1566



Epoch 11/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.159, Loss_S (avg)=0.155, LR_S=0.1, LR_G=0.001]

Epoch 11 Avg Losses -> G: -0.1583, S: 0.1587



Epoch 12/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.151, Loss_S (avg)=0.151, LR_S=0.1, LR_G=0.001]

Epoch 12 Avg Losses -> G: -0.1541, S: 0.1539





*** New best accuracy: 1.79% (Epoch 12) ***


Epoch 13/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.158, Loss_S (avg)=0.159, LR_S=0.1, LR_G=0.001]

Epoch 13 Avg Losses -> G: -0.1561, S: 0.1566



Epoch 14/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.151, Loss_S (avg)=0.148, LR_S=0.1, LR_G=0.001]

Epoch 14 Avg Losses -> G: -0.1515, S: 0.1510



Epoch 15/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.153, Loss_S (avg)=0.151, LR_S=0.1, LR_G=0.001]

Epoch 15 Avg Losses -> G: -0.1524, S: 0.1528





*** New best accuracy: 1.98% (Epoch 15) ***


Epoch 16/100: 100%|██████████| 50/50 [00:16<00:00,  3.03it/s, Loss_G=-0.138, Loss_S (avg)=0.142, LR_S=0.1, LR_G=0.001]

Epoch 16 Avg Losses -> G: -0.1445, S: 0.1452



Epoch 17/100: 100%|██████████| 50/50 [00:16<00:00,  3.01it/s, Loss_G=-0.141, Loss_S (avg)=0.138, LR_S=0.1, LR_G=0.001]

Epoch 17 Avg Losses -> G: -0.1396, S: 0.1397



Epoch 18/100: 100%|██████████| 50/50 [00:16<00:00,  2.98it/s, Loss_G=-0.139, Loss_S (avg)=0.137, LR_S=0.1, LR_G=0.001]

Epoch 18 Avg Losses -> G: -0.1389, S: 0.1385



Epoch 19/100: 100%|██████████| 50/50 [00:16<00:00,  3.01it/s, Loss_G=-0.136, Loss_S (avg)=0.138, LR_S=0.1, LR_G=0.001]

Epoch 19 Avg Losses -> G: -0.1369, S: 0.1374



Epoch 20/100: 100%|██████████| 50/50 [00:16<00:00,  3.01it/s, Loss_G=-0.138, Loss_S (avg)=0.134, LR_S=0.1, LR_G=0.001]

Epoch 20 Avg Losses -> G: -0.1363, S: 0.1365



Epoch 21/100: 100%|██████████| 50/50 [00:16<00:00,  3.01it/s, Loss_G=-0.133, Loss_S (avg)=0.135, LR_S=0.1, LR_G=0.001]

Epoch 21 Avg Losses -> G: -0.1349, S: 0.1348



Epoch 22/100: 100%|██████████| 50/50 [00:16<00:00,  3.01it/s, Loss_G=-0.134, Loss_S (avg)=0.134, LR_S=0.1, LR_G=0.001]

Epoch 22 Avg Losses -> G: -0.1353, S: 0.1348



Epoch 23/100: 100%|██████████| 50/50 [00:16<00:00,  3.01it/s, Loss_G=-0.135, Loss_S (avg)=0.137, LR_S=0.1, LR_G=0.001]

Epoch 23 Avg Losses -> G: -0.1370, S: 0.1366



Epoch 24/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.138, Loss_S (avg)=0.132, LR_S=0.1, LR_G=0.001]

Epoch 24 Avg Losses -> G: -0.1350, S: 0.1340



Epoch 25/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.13, Loss_S (avg)=0.131, LR_S=0.1, LR_G=0.001] 

Epoch 25 Avg Losses -> G: -0.1331, S: 0.1330



Epoch 26/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.129, Loss_S (avg)=0.133, LR_S=0.1, LR_G=0.001]

Epoch 26 Avg Losses -> G: -0.1326, S: 0.1322



Epoch 27/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.134, Loss_S (avg)=0.132, LR_S=0.1, LR_G=0.001]

Epoch 27 Avg Losses -> G: -0.1311, S: 0.1313



Epoch 28/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.132, Loss_S (avg)=0.13, LR_S=0.1, LR_G=0.001] 

Epoch 28 Avg Losses -> G: -0.1309, S: 0.1314



Epoch 29/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.129, Loss_S (avg)=0.128, LR_S=0.1, LR_G=0.001]

Epoch 29 Avg Losses -> G: -0.1304, S: 0.1309



Epoch 30/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.131, Loss_S (avg)=0.129, LR_S=0.1, LR_G=0.001]

Epoch 30 Avg Losses -> G: -0.1311, S: 0.1311



Epoch 31/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.119, Loss_S (avg)=0.121, LR_S=0.05, LR_G=0.0005]

Epoch 31 Avg Losses -> G: -0.1217, S: 0.1216



Epoch 32/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.121, Loss_S (avg)=0.119, LR_S=0.05, LR_G=0.0005]

Epoch 32 Avg Losses -> G: -0.1207, S: 0.1203



Epoch 33/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.12, Loss_S (avg)=0.12, LR_S=0.05, LR_G=0.0005]  

Epoch 33 Avg Losses -> G: -0.1206, S: 0.1204



Epoch 34/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.122, Loss_S (avg)=0.121, LR_S=0.05, LR_G=0.0005]

Epoch 34 Avg Losses -> G: -0.1198, S: 0.1202



Epoch 35/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.123, Loss_S (avg)=0.119, LR_S=0.05, LR_G=0.0005]

Epoch 35 Avg Losses -> G: -0.1205, S: 0.1206



Epoch 36/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.124, Loss_S (avg)=0.123, LR_S=0.05, LR_G=0.0005]

Epoch 36 Avg Losses -> G: -0.1202, S: 0.1207



Epoch 37/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.122, Loss_S (avg)=0.121, LR_S=0.05, LR_G=0.0005]

Epoch 37 Avg Losses -> G: -0.1207, S: 0.1211



Epoch 38/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.124, Loss_S (avg)=0.121, LR_S=0.05, LR_G=0.0005]

Epoch 38 Avg Losses -> G: -0.1218, S: 0.1210



Epoch 39/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.119, Loss_S (avg)=0.12, LR_S=0.05, LR_G=0.0005] 

Epoch 39 Avg Losses -> G: -0.1206, S: 0.1203



Epoch 40/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.12, Loss_S (avg)=0.122, LR_S=0.05, LR_G=0.0005] 

Epoch 40 Avg Losses -> G: -0.1211, S: 0.1206



Epoch 41/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.125, Loss_S (avg)=0.12, LR_S=0.05, LR_G=0.0005]

Epoch 41 Avg Losses -> G: -0.1205, S: 0.1206



Epoch 42/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.125, Loss_S (avg)=0.122, LR_S=0.05, LR_G=0.0005]

Epoch 42 Avg Losses -> G: -0.1207, S: 0.1214



Epoch 43/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.118, Loss_S (avg)=0.12, LR_S=0.05, LR_G=0.0005] 

Epoch 43 Avg Losses -> G: -0.1210, S: 0.1210



Epoch 44/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.122, Loss_S (avg)=0.123, LR_S=0.05, LR_G=0.0005]

Epoch 44 Avg Losses -> G: -0.1204, S: 0.1208



Epoch 45/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.117, Loss_S (avg)=0.119, LR_S=0.05, LR_G=0.0005]

Epoch 45 Avg Losses -> G: -0.1205, S: 0.1202



Epoch 46/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.117, Loss_S (avg)=0.117, LR_S=0.05, LR_G=0.0005]

Epoch 46 Avg Losses -> G: -0.1194, S: 0.1196



Epoch 47/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.117, Loss_S (avg)=0.121, LR_S=0.05, LR_G=0.0005]

Epoch 47 Avg Losses -> G: -0.1204, S: 0.1205



Epoch 48/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.119, Loss_S (avg)=0.12, LR_S=0.05, LR_G=0.0005] 

Epoch 48 Avg Losses -> G: -0.1207, S: 0.1207



Epoch 49/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.121, Loss_S (avg)=0.12, LR_S=0.05, LR_G=0.0005] 

Epoch 49 Avg Losses -> G: -0.1209, S: 0.1207



Epoch 50/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.119, Loss_S (avg)=0.12, LR_S=0.05, LR_G=0.0005] 

Epoch 50 Avg Losses -> G: -0.1205, S: 0.1205



Epoch 51/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.119, Loss_S (avg)=0.12, LR_S=0.05, LR_G=0.0005] 

Epoch 51 Avg Losses -> G: -0.1205, S: 0.1204



Epoch 52/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.121, Loss_S (avg)=0.119, LR_S=0.05, LR_G=0.0005]

Epoch 52 Avg Losses -> G: -0.1208, S: 0.1211



Epoch 53/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.118, Loss_S (avg)=0.12, LR_S=0.05, LR_G=0.0005] 

Epoch 53 Avg Losses -> G: -0.1214, S: 0.1212



Epoch 54/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.117, Loss_S (avg)=0.121, LR_S=0.05, LR_G=0.0005]

Epoch 54 Avg Losses -> G: -0.1204, S: 0.1205



Epoch 55/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.12, Loss_S (avg)=0.123, LR_S=0.05, LR_G=0.0005] 

Epoch 55 Avg Losses -> G: -0.1209, S: 0.1208



Epoch 56/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.121, Loss_S (avg)=0.121, LR_S=0.05, LR_G=0.0005]

Epoch 56 Avg Losses -> G: -0.1213, S: 0.1209



Epoch 57/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.119, Loss_S (avg)=0.121, LR_S=0.05, LR_G=0.0005]

Epoch 57 Avg Losses -> G: -0.1205, S: 0.1210



Epoch 58/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.119, Loss_S (avg)=0.121, LR_S=0.05, LR_G=0.0005]

Epoch 58 Avg Losses -> G: -0.1210, S: 0.1210



Epoch 59/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.118, Loss_S (avg)=0.12, LR_S=0.05, LR_G=0.0005] 

Epoch 59 Avg Losses -> G: -0.1209, S: 0.1212



Epoch 60/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.117, Loss_S (avg)=0.122, LR_S=0.05, LR_G=0.0005]

Epoch 60 Avg Losses -> G: -0.1222, S: 0.1217



Epoch 61/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.122, Loss_S (avg)=0.114, LR_S=0.025, LR_G=0.00025]

Epoch 61 Avg Losses -> G: -0.1146, S: 0.1147



Epoch 62/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.111, Loss_S (avg)=0.111, LR_S=0.025, LR_G=0.00025]

Epoch 62 Avg Losses -> G: -0.1136, S: 0.1138



Epoch 63/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.117, Loss_S (avg)=0.113, LR_S=0.025, LR_G=0.00025]

Epoch 63 Avg Losses -> G: -0.1137, S: 0.1136



Epoch 64/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.114, Loss_S (avg)=0.114, LR_S=0.025, LR_G=0.00025]

Epoch 64 Avg Losses -> G: -0.1131, S: 0.1134



Epoch 65/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.114, Loss_S (avg)=0.113, LR_S=0.025, LR_G=0.00025]

Epoch 65 Avg Losses -> G: -0.1139, S: 0.1137



Epoch 66/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.122, Loss_S (avg)=0.114, LR_S=0.025, LR_G=0.00025]

Epoch 66 Avg Losses -> G: -0.1132, S: 0.1137



Epoch 67/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.114, Loss_S (avg)=0.117, LR_S=0.025, LR_G=0.00025]

Epoch 67 Avg Losses -> G: -0.1140, S: 0.1134



Epoch 68/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.112, Loss_S (avg)=0.113, LR_S=0.025, LR_G=0.00025]

Epoch 68 Avg Losses -> G: -0.1141, S: 0.1136



Epoch 69/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.114, Loss_S (avg)=0.114, LR_S=0.025, LR_G=0.00025]

Epoch 69 Avg Losses -> G: -0.1137, S: 0.1136



Epoch 70/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.113, Loss_S (avg)=0.113, LR_S=0.025, LR_G=0.00025]

Epoch 70 Avg Losses -> G: -0.1127, S: 0.1130



Epoch 71/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.113, Loss_S (avg)=0.112, LR_S=0.025, LR_G=0.00025]

Epoch 71 Avg Losses -> G: -0.1130, S: 0.1127



Epoch 72/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.115, Loss_S (avg)=0.113, LR_S=0.025, LR_G=0.00025]

Epoch 72 Avg Losses -> G: -0.1132, S: 0.1131



Epoch 73/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.113, Loss_S (avg)=0.114, LR_S=0.025, LR_G=0.00025]

Epoch 73 Avg Losses -> G: -0.1136, S: 0.1129



Epoch 74/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.115, Loss_S (avg)=0.112, LR_S=0.025, LR_G=0.00025]

Epoch 74 Avg Losses -> G: -0.1133, S: 0.1131



Epoch 75/100: 100%|██████████| 50/50 [00:16<00:00,  2.98it/s, Loss_G=-0.115, Loss_S (avg)=0.116, LR_S=0.025, LR_G=0.00025]

Epoch 75 Avg Losses -> G: -0.1128, S: 0.1134



Epoch 76/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.111, Loss_S (avg)=0.114, LR_S=0.025, LR_G=0.00025]

Epoch 76 Avg Losses -> G: -0.1136, S: 0.1136



Epoch 77/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.116, Loss_S (avg)=0.113, LR_S=0.025, LR_G=0.00025]

Epoch 77 Avg Losses -> G: -0.1138, S: 0.1137



Epoch 78/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.118, Loss_S (avg)=0.116, LR_S=0.025, LR_G=0.00025]

Epoch 78 Avg Losses -> G: -0.1135, S: 0.1133



Epoch 79/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.114, Loss_S (avg)=0.114, LR_S=0.025, LR_G=0.00025]

Epoch 79 Avg Losses -> G: -0.1130, S: 0.1128



Epoch 80/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.117, Loss_S (avg)=0.116, LR_S=0.025, LR_G=0.00025]

Epoch 80 Avg Losses -> G: -0.1130, S: 0.1131



Epoch 81/100: 100%|██████████| 50/50 [00:16<00:00,  3.01it/s, Loss_G=-0.112, Loss_S (avg)=0.115, LR_S=0.025, LR_G=0.00025]

Epoch 81 Avg Losses -> G: -0.1132, S: 0.1130



Epoch 82/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.114, Loss_S (avg)=0.114, LR_S=0.025, LR_G=0.00025]

Epoch 82 Avg Losses -> G: -0.1130, S: 0.1129



Epoch 83/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.118, Loss_S (avg)=0.115, LR_S=0.025, LR_G=0.00025]

Epoch 83 Avg Losses -> G: -0.1130, S: 0.1131



Epoch 84/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.114, Loss_S (avg)=0.112, LR_S=0.025, LR_G=0.00025]

Epoch 84 Avg Losses -> G: -0.1135, S: 0.1129



Epoch 85/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.115, Loss_S (avg)=0.114, LR_S=0.025, LR_G=0.00025]

Epoch 85 Avg Losses -> G: -0.1133, S: 0.1134



Epoch 86/100: 100%|██████████| 50/50 [00:16<00:00,  3.01it/s, Loss_G=-0.111, Loss_S (avg)=0.11, LR_S=0.025, LR_G=0.00025] 

Epoch 86 Avg Losses -> G: -0.1130, S: 0.1130



Epoch 87/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.115, Loss_S (avg)=0.115, LR_S=0.025, LR_G=0.00025]

Epoch 87 Avg Losses -> G: -0.1137, S: 0.1131



Epoch 88/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.109, Loss_S (avg)=0.114, LR_S=0.025, LR_G=0.00025]

Epoch 88 Avg Losses -> G: -0.1135, S: 0.1138



Epoch 89/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.112, Loss_S (avg)=0.115, LR_S=0.025, LR_G=0.00025]

Epoch 89 Avg Losses -> G: -0.1137, S: 0.1139



Epoch 90/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.115, Loss_S (avg)=0.112, LR_S=0.025, LR_G=0.00025]

Epoch 90 Avg Losses -> G: -0.1132, S: 0.1136



Epoch 91/100: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, Loss_G=-0.107, Loss_S (avg)=0.106, LR_S=0.0125, LR_G=0.000125]

Epoch 91 Avg Losses -> G: -0.1077, S: 0.1080



Epoch 92/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.108, Loss_S (avg)=0.108, LR_S=0.0125, LR_G=0.000125]

Epoch 92 Avg Losses -> G: -0.1065, S: 0.1068



Epoch 93/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.108, Loss_S (avg)=0.107, LR_S=0.0125, LR_G=0.000125]

Epoch 93 Avg Losses -> G: -0.1068, S: 0.1063



Epoch 94/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.101, Loss_S (avg)=0.105, LR_S=0.0125, LR_G=0.000125]

Epoch 94 Avg Losses -> G: -0.1067, S: 0.1064



Epoch 95/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.105, Loss_S (avg)=0.105, LR_S=0.0125, LR_G=0.000125]

Epoch 95 Avg Losses -> G: -0.1062, S: 0.1062



Epoch 96/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.106, Loss_S (avg)=0.105, LR_S=0.0125, LR_G=0.000125]

Epoch 96 Avg Losses -> G: -0.1057, S: 0.1061



Epoch 97/100: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, Loss_G=-0.104, Loss_S (avg)=0.106, LR_S=0.0125, LR_G=0.000125]

Epoch 97 Avg Losses -> G: -0.1062, S: 0.1066



Epoch 98/100: 100%|██████████| 50/50 [00:16<00:00,  2.98it/s, Loss_G=-0.107, Loss_S (avg)=0.106, LR_S=0.0125, LR_G=0.000125] 

Epoch 98 Avg Losses -> G: -0.1056, S: 0.1067



Epoch 99/100: 100%|██████████| 50/50 [00:16<00:00,  2.98it/s, Loss_G=-0.106, Loss_S (avg)=0.107, LR_S=0.0125, LR_G=0.000125]

Epoch 99 Avg Losses -> G: -0.1065, S: 0.1068



Epoch 100/100: 100%|██████████| 50/50 [00:16<00:00,  2.98it/s, Loss_G=-0.113, Loss_S (avg)=0.105, LR_S=0.0125, LR_G=0.000125]

Epoch 100 Avg Losses -> G: -0.1071, S: 0.1072





Training complete! Best Test Accuracy: 1.98%
