<a href="https://colab.research.google.com/github/martinpius/PYTORCH/blob/main/WGAN_With_GRADIENT_PENALTY_(GP)_Pytorch_implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [59]:
from google.colab import drive
drive.mount("/content/drive", force_remount = True)
try:
  COLAB = True
  import torch
  print(f">>>>You are on Google CoLaB with torch version {torch.__version__}")
except Exception as e:
  print(f">>>>{type(e)}: {e}\n>>>>please correct {type(e)} and reload")
  COLAB = False
if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")
def time_fmt(t: float = 213.981)->float:
  h = int(t / (60 * 60))
  m = int(t % (60 * 60) / 60)
  s = int(t % 60)
  return f"{h} hrs: {m:>02} min: {s:>05.2f} sec"
print(f">>>>time formating\tplease wait.....\n>>>>time elapsed\t{time_fmt()}")

Mounted at /content/drive
>>>>You are on Google CoLaB with torch version 1.8.1+cu101
>>>>time formating	please wait.....
>>>>time elapsed	0 hrs: 03 min: 33.00 sec


In [60]:
#In this notebook we are going to implement WGAN network with gradient penalty (GP) to 
#improve the Wasserstein Loss. The architecture is purely WGAN with additional features
#and Adam optimizer with zero momentum.

In [61]:
import torch
import torch.optim as optim
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
import numpy as np
import math, sys,time,random, torchvision

In [62]:
#Setup the seed for reproducability and GPU to deterministic to avoid errors during training:
seed = 1234
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True

In [63]:
#The architecture of this model is purelly WGAN where the discriminator is a CNN with Convolutional layers
#and the generator is using de-convolution layers to reconstruct fake images/data.

In [64]:
class Discriminator(nn.Module):
  def __init__(self, img_channels, d_features):
    super(Discriminator, self).__init__()
    self.discriminator = nn.Sequential(
        nn.Conv2d(img_channels, d_features, kernel_size = 4, stride = 2, padding = 1),
        nn.LeakyReLU(0.2),
        self.__dblock__(d_features, 2*d_features, 4, 2, 1),
        self.__dblock__(2*d_features, 4*d_features, 4, 2, 1),
        self.__dblock__(4*d_features, 8*d_features, 4, 2, 1),
        nn.Conv2d(8*d_features, 1,kernel_size = 4, stride = 2, padding = 0))
  def __dblock__(self, in_channels, out_channels, kernel_size, stride, padding):
    return nn.Sequential(
        nn.Conv2d(in_channels,
                  out_channels,
                  kernel_size,
                  stride,
                  padding,
                  bias = False),
        nn.InstanceNorm2d(out_channels, affine = True),
        nn.LeakyReLU(0.2))
  
  def forward(self, input_tensor):
    return self.discriminator(input_tensor)

In [65]:
class Generator(nn.Module):
  def __init__(self, z_dim, img_channels, g_features):
    super(Generator, self).__init__()
    self.generator = nn.Sequential(
        self.__gblock__(z_dim, g_features * 16, 4, 2, 0),
        self.__gblock__(g_features*16, g_features*8, 4, 2, 1),
        self.__gblock__(g_features*8, g_features*4, 4, 2, 1),
        self.__gblock__(g_features*4, g_features*2, 4, 2, 1),
        nn.ConvTranspose2d(2*g_features, img_channels, kernel_size = 4, stride = 2, padding = 1),
        nn.Tanh())
    
  def __gblock__(self, in_channels, out_channels, kernel_size, stride, padding):
    return nn.Sequential(
        nn.ConvTranspose2d(in_channels,
                           out_channels,
                           kernel_size,
                           stride,
                           padding,
                           bias = False),
        nn.BatchNorm2d(out_channels),
        nn.ReLU())
  
  def forward(self, input_tensor):
    return self.generator(input_tensor)

In [66]:
#Testing the discriminator and the generator networks to see if they produce the intended results.

In [67]:
def __test__():
  z_dim = 100
  batch_size = 128
  img_channels = 3
  W, H = 64, 64
  lamda_gp = 10
  noise_img = torch.randn(batch_size, z_dim, 1,1)
  rand_img = torch.randn(batch_size, img_channels, W,H)
  disc = Discriminator(img_channels, 8)
  gen = Generator(z_dim, img_channels, 8)
  disc_out = disc(rand_img)
  gen_out = gen(noise_img)
  return gen_out.shape, disc_out.shape


In [68]:
__test__()

(torch.Size([128, 3, 64, 64]), torch.Size([128, 1, 1, 1]))

In [69]:
#Initialize the parameters of the model to random normal:
def __par__(model):
  for m in model.modules():
    if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d, nn.BatchNorm2d)):
      nn.init.normal_(m.weight.data, mean = 0.00, std = 0.02)

In [70]:
#Instantiate the model classes and initialize the parameters
z_dim = 100
img_size = 64
lambda_gp = 10
batch_size = 64
g_features = 64
d_features = 64
img_channels = 1
learning_rate = 1e-4
discriminator = Discriminator(img_channels, d_features).to(device = device)
generator = Generator(z_dim, img_channels, g_features).to(device = device)
fixed_noise = torch.randn(batch_size, z_dim, 1,1).to(device = device) #to generate fake images after training the generator
EPOCHS = 10
disc_iter = 5
__par__(discriminator)
__par__(generator)


In [71]:
#Now we define the gradient penalty like as follow
def __gp__(discriminator, real_img, fake_img, device = device):
  '''this method/function will performs interpolation of the two images (real + fake)
  and then linear combination of the Wasserstein Loss will be computed to replace 
  the art of gradient cliping that affect training by lower the slopes.'''
  batch_size, img_channels, W, H = real_img.shape
  e = torch.rand((batch_size, 1, 1, 1)).repeat(1, img_channels, W, H).to(device = device) #generate the epsilon(rand # ~U[0,1])
  ipltn_images = e*real_img + (1 - e)* fake_img #This interpolate images to create a mixed kind of data (both real and fake)
  #Computing the discriminant scores using the new mixed data
  disc_scores = discriminator(ipltn_images)
  #computing the gradient using autograd:
  grads = torch.autograd.grad(
      inputs = ipltn_images,
      outputs = disc_scores,
      grad_outputs = torch.ones_like(disc_scores),
      create_graph = True, retain_graph = True)[0]
  #Reshaping the gradient tensor and apply L2-norm as suggested in the WGAN with Gradient penalty paper
  grads = grads.view(grads.shape[0], -1) #grabing the slopes and flattening to 1d
  grad_norm = grads.norm(2, dim = 1) #compute norm-2
  gp = torch.mean((grad_norm - 1)**2) #computing the gradient penalty value (g-p)
  return gp


In [72]:
#Get the optimizers (ADAM) for both network with no momentum and the tensoboard writers

In [73]:
disc_optimizer = optim.Adam(params = discriminator.parameters(), lr = learning_rate, betas = (0.00, 0.9))
gen_optimizer = optim.Adam(params = generator.parameters(), lr = learning_rate, betas = (0.00, 0.9))
real_writer = SummaryWriter(f"logs/real_images")
fake_writer = SummaryWriter(f"loga=s/fake_images")

In [74]:
#Loading and preprocessing the mnist data from torchvision using the transform method
transforms = transforms.Compose([
                                 transforms.ToTensor(),
                                 transforms.Resize(img_size),
                                 transforms.Normalize(
                                [0.5 for _ in range(img_channels)], [0.5 for _ in range(img_channels)])])
dfm = datasets.MNIST(root = "mnist_data/", transform = transforms, download = True)
loader = DataLoader(dataset = dfm, shuffle = True, batch_size = batch_size)
x_loader, y_loader = next(iter(loader))
print(f">>>>x_loader_shape: {x_loader.shape}\ty_loader_shape: {y_loader.shape}")

>>>>x_loader_shape: torch.Size([64, 1, 64, 64])	y_loader_shape: torch.Size([64])


In [None]:
#The training loop:
global_tic = time.time()
step = 0
for epoch in range(EPOCHS):
  tic = time.time()
  print(f"\n>>>>training starts for epoch: {epoch + 1}\tplease wait while the model is training.......\n>>>>training on progress: KEEP YOUR SCREEN ACTIVE.......")
  for idx, (data,_) in enumerate(tqdm(loader)):
    data = data.to(device = device)
    batch_size = data.shape[0]
    noise = torch.randn(batch_size, z_dim, 1, 1).to(device = device)
    fake = generator(noise)
    
    #training the discriminator we maximize the loss [log(D(x)) + log(1-D(G(z)))]:
    for _ in range(disc_iter):
      noise = torch.randn(batch_size, z_dim, 1, 1).to(device = device)
      fake = generator(noise)
      disc_out_real = discriminator(data).reshape(-1)
      disc_out_fake = discriminator(fake).reshape(-1)
      gp = __gp__(discriminator, data, fake, device)
      disc_loss = (-(torch.mean(disc_out_real) - torch.mean(disc_out_fake)) + lambda_gp*gp) #Wasserstein Loss with gradient penalty
      discriminator.zero_grad()#initialize the gradients for the discriminator model to 0
      disc_loss.backward(retain_graph = True) #backward pass for the discriminator
      disc_optimizer.step()#gradient descent using adam optimizer with no mementum

    #training the generator: we maximize [log(D(G(z)))] instead of minimizing log(1 - D(G(z)))
    out_gen = discriminator(fake)
    gen_loss = -(torch.mean(out_gen)) #Normal Wasserstein Loss with no gp
    generator.zero_grad() #initialize the slopes to zeros
    gen_loss.backward() #backward pass for the generator
    gen_optimizer.step()# generator's gradient descent with adam optimizer with zero momrntum
    toc = time.time()

    if idx % 100 == 0:
      print(f"\n>>>> time at the end of epoch: {epoch + 1} for batch {idx} is: {time_fmt(toc - tic)}")
      print(f"\n>>>> generator loss: {gen_loss:.4f} | generator PPL: {math.exp(gen_loss):7.4f}")
      print(f">>>> discriminator loss: {disc_loss:.4f} | discriminator PPL: {math.exp(disc_loss):7.4f}")

    with torch.no_grad():
      fake_img = generator(fixed_noise)
      real_img_grid = torchvision.utils.make_grid(data[:128], normalize = True)
      fake_img_grid = torchvision.utils.make_grid(fake_img[:128], normalize = True)
      real_writer.add_image("real_image",real_img_grid, global_step = step)
      fake_writer.add_image("fake_image", fake_img_grid, global_step = step)
    step+=1
global_toc = time.time()
print(f"\n>>>>total training time for 5 epochs: {time_fmt(global_toc - global_tic)}")












  0%|          | 0/938 [00:00<?, ?it/s][A[A[A[A[A[A


>>>>training starts for epoch: 1	please wait while the model is training.......
>>>>training on progress: KEEP YOUR SCREEN ACTIVE.......

>>>> time at the end of epoch: 1 for batch 0 is: 0 hrs: 00 min: 01.00 sec

>>>> generator loss: 0.5765 | generator PPL:  1.7799
>>>> discriminator loss: 64.6102 | discriminator PPL: 11477735245519738681165873152.0000








  0%|          | 1/938 [00:01<26:02,  1.67s/it][A[A[A[A[A[A





  0%|          | 2/938 [00:03<26:13,  1.68s/it][A[A[A[A[A[A





  0%|          | 3/938 [00:05<26:19,  1.69s/it][A[A[A[A[A[A





  0%|          | 4/938 [00:06<26:22,  1.69s/it][A[A[A[A[A[A





  1%|          | 5/938 [00:08<26:22,  1.70s/it][A[A[A[A[A[A





  1%|          | 6/938 [00:10<26:31,  1.71s/it][A[A[A[A[A[A





  1%|          | 7/938 [00:11<26:33,  1.71s/it][A[A[A[A[A[A





  1%|          | 8/938 [00:13<26:40,  1.72s/it][A[A[A[A[A[A





  1%|          | 9/938 [00:15<26:43,  1.73s/it][A[A[A[A[A[A





  1%|          | 10/938 [00:17<26:47,  1.73s/it][A[A[A[A[A[A





  1%|          | 11/938 [00:18<26:52,  1.74s/it][A[A[A[A[A[A





  1%|▏         | 12/938 [00:20<26:56,  1.75s/it][A[A[A[A[A[A





  1%|▏         | 13/938 [00:22<26:55,  1.75s/it][A[A[A[A[A[A





  1%|▏         | 14/938 [00:24<26:53,  1.75s/it][A[A[A[A[A[A



>>>> time at the end of epoch: 1 for batch 100 is: 0 hrs: 02 min: 51.00 sec

>>>> generator loss: 107.9240 | generator PPL: 74264739470520096126691301342876539379240665088.0000
>>>> discriminator loss: -122.5349 | discriminator PPL:  0.0000








 11%|█         | 101/938 [02:51<23:13,  1.67s/it][A[A[A[A[A[A





 11%|█         | 102/938 [02:53<23:09,  1.66s/it][A[A[A[A[A[A





 11%|█         | 103/938 [02:55<23:08,  1.66s/it][A[A[A[A[A[A





 11%|█         | 104/938 [02:56<23:03,  1.66s/it][A[A[A[A[A[A





 11%|█         | 105/938 [02:58<23:03,  1.66s/it][A[A[A[A[A[A





 11%|█▏        | 106/938 [02:59<22:59,  1.66s/it][A[A[A[A[A[A





 11%|█▏        | 107/938 [03:01<22:57,  1.66s/it][A[A[A[A[A[A





 12%|█▏        | 108/938 [03:03<22:56,  1.66s/it][A[A[A[A[A[A





 12%|█▏        | 109/938 [03:04<22:52,  1.66s/it][A[A[A[A[A[A





 12%|█▏        | 110/938 [03:06<22:53,  1.66s/it][A[A[A[A[A[A





 12%|█▏        | 111/938 [03:08<22:51,  1.66s/it][A[A[A[A[A[A





 12%|█▏        | 112/938 [03:09<22:51,  1.66s/it][A[A[A[A[A[A





 12%|█▏        | 113/938 [03:11<22:50,  1.66s/it][A[A[A[A[A[A





 12%|█▏        | 114/938 [03:13<22:47,  1.66s


>>>> time at the end of epoch: 1 for batch 200 is: 0 hrs: 05 min: 36.00 sec

>>>> generator loss: 87.8391 | generator PPL: 140615993059151623870221023074383298560.0000
>>>> discriminator loss: -85.7965 | discriminator PPL:  0.0000








 21%|██▏       | 201/938 [05:37<20:19,  1.65s/it][A[A[A[A[A[A





 22%|██▏       | 202/938 [05:38<20:17,  1.65s/it][A[A[A[A[A[A





 22%|██▏       | 203/938 [05:40<20:17,  1.66s/it][A[A[A[A[A[A





 22%|██▏       | 204/938 [05:42<20:17,  1.66s/it][A[A[A[A[A[A





 22%|██▏       | 205/938 [05:43<20:15,  1.66s/it][A[A[A[A[A[A





 22%|██▏       | 206/938 [05:45<20:11,  1.65s/it][A[A[A[A[A[A





 22%|██▏       | 207/938 [05:46<20:10,  1.66s/it][A[A[A[A[A[A





 22%|██▏       | 208/938 [05:48<20:08,  1.66s/it][A[A[A[A[A[A





 22%|██▏       | 209/938 [05:50<20:08,  1.66s/it][A[A[A[A[A[A





 22%|██▏       | 210/938 [05:51<20:04,  1.65s/it][A[A[A[A[A[A





 22%|██▏       | 211/938 [05:53<20:03,  1.66s/it][A[A[A[A[A[A





 23%|██▎       | 212/938 [05:55<19:59,  1.65s/it][A[A[A[A[A[A





 23%|██▎       | 213/938 [05:56<19:58,  1.65s/it][A[A[A[A[A[A





 23%|██▎       | 214/938 [05:58<19:58,  1.65s


>>>> time at the end of epoch: 1 for batch 300 is: 0 hrs: 08 min: 21.00 sec

>>>> generator loss: 80.7059 | generator PPL: 112238329049515770183194761305784320.0000
>>>> discriminator loss: -54.9656 | discriminator PPL:  0.0000








 32%|███▏      | 301/938 [08:22<17:29,  1.65s/it][A[A[A[A[A[A





 32%|███▏      | 302/938 [08:23<17:28,  1.65s/it][A[A[A[A[A[A





 32%|███▏      | 303/938 [08:25<17:27,  1.65s/it][A[A[A[A[A[A





 32%|███▏      | 304/938 [08:27<17:26,  1.65s/it][A[A[A[A[A[A





 33%|███▎      | 305/938 [08:28<17:24,  1.65s/it][A[A[A[A[A[A





 33%|███▎      | 306/938 [08:30<17:23,  1.65s/it][A[A[A[A[A[A





 33%|███▎      | 307/938 [08:31<17:21,  1.65s/it][A[A[A[A[A[A





 33%|███▎      | 308/938 [08:33<17:21,  1.65s/it][A[A[A[A[A[A





 33%|███▎      | 309/938 [08:35<17:18,  1.65s/it][A[A[A[A[A[A





 33%|███▎      | 310/938 [08:36<17:14,  1.65s/it][A[A[A[A[A[A





 33%|███▎      | 311/938 [08:38<17:16,  1.65s/it][A[A[A[A[A[A





 33%|███▎      | 312/938 [08:40<17:12,  1.65s/it][A[A[A[A[A[A





 33%|███▎      | 313/938 [08:41<17:12,  1.65s/it][A[A[A[A[A[A





 33%|███▎      | 314/938 [08:43<17:07,  1.65s


>>>> time at the end of epoch: 1 for batch 400 is: 0 hrs: 11 min: 06.00 sec

>>>> generator loss: 74.4790 | generator PPL: 221719477828811644511531953029120.0000
>>>> discriminator loss: -39.3195 | discriminator PPL:  0.0000








 43%|████▎     | 401/938 [11:06<14:42,  1.64s/it][A[A[A[A[A[A





 43%|████▎     | 402/938 [11:08<14:38,  1.64s/it][A[A[A[A[A[A





 43%|████▎     | 403/938 [11:09<14:37,  1.64s/it][A[A[A[A[A[A





 43%|████▎     | 404/938 [11:11<14:36,  1.64s/it][A[A[A[A[A[A





 43%|████▎     | 405/938 [11:13<14:35,  1.64s/it][A[A[A[A[A[A





 43%|████▎     | 406/938 [11:14<14:33,  1.64s/it][A[A[A[A[A[A





 43%|████▎     | 407/938 [11:16<14:33,  1.64s/it][A[A[A[A[A[A





 43%|████▎     | 408/938 [11:18<14:31,  1.64s/it][A[A[A[A[A[A





 44%|████▎     | 409/938 [11:19<14:30,  1.65s/it][A[A[A[A[A[A





 44%|████▎     | 410/938 [11:21<14:27,  1.64s/it][A[A[A[A[A[A





 44%|████▍     | 411/938 [11:22<14:24,  1.64s/it][A[A[A[A[A[A





 44%|████▍     | 412/938 [11:24<14:21,  1.64s/it][A[A[A[A[A[A





 44%|████▍     | 413/938 [11:26<14:19,  1.64s/it][A[A[A[A[A[A





 44%|████▍     | 414/938 [11:27<14:17,  1.64s


>>>> time at the end of epoch: 1 for batch 500 is: 0 hrs: 13 min: 49.00 sec

>>>> generator loss: 75.4561 | generator PPL: 589053585426057619222018259943424.0000
>>>> discriminator loss: -34.0384 | discriminator PPL:  0.0000








 53%|█████▎    | 501/938 [13:50<11:51,  1.63s/it][A[A[A[A[A[A





 54%|█████▎    | 502/938 [13:51<11:49,  1.63s/it][A[A[A[A[A[A





 54%|█████▎    | 503/938 [13:53<11:47,  1.63s/it][A[A[A[A[A[A





 54%|█████▎    | 504/938 [13:54<11:46,  1.63s/it][A[A[A[A[A[A





 54%|█████▍    | 505/938 [13:56<11:44,  1.63s/it][A[A[A[A[A[A





 54%|█████▍    | 506/938 [13:58<11:42,  1.63s/it][A[A[A[A[A[A





 54%|█████▍    | 507/938 [13:59<11:41,  1.63s/it][A[A[A[A[A[A





 54%|█████▍    | 508/938 [14:01<11:40,  1.63s/it][A[A[A[A[A[A





 54%|█████▍    | 509/938 [14:03<11:37,  1.63s/it][A[A[A[A[A[A





 54%|█████▍    | 510/938 [14:04<11:35,  1.62s/it][A[A[A[A[A[A





 54%|█████▍    | 511/938 [14:06<11:33,  1.63s/it][A[A[A[A[A[A





 55%|█████▍    | 512/938 [14:07<11:31,  1.62s/it][A[A[A[A[A[A





 55%|█████▍    | 513/938 [14:09<11:30,  1.62s/it][A[A[A[A[A[A





 55%|█████▍    | 514/938 [14:11<11:27,  1.62s


>>>> time at the end of epoch: 1 for batch 600 is: 0 hrs: 16 min: 32.00 sec

>>>> generator loss: 70.5172 | generator PPL: 4219128569743243930182453559296.0000
>>>> discriminator loss: -28.7694 | discriminator PPL:  0.0000








 64%|██████▍   | 601/938 [16:32<09:05,  1.62s/it][A[A[A[A[A[A





 64%|██████▍   | 602/938 [16:34<09:04,  1.62s/it][A[A[A[A[A[A





 64%|██████▍   | 603/938 [16:35<09:02,  1.62s/it][A[A[A[A[A[A





 64%|██████▍   | 604/938 [16:37<09:01,  1.62s/it][A[A[A[A[A[A





 64%|██████▍   | 605/938 [16:38<09:00,  1.62s/it][A[A[A[A[A[A





 65%|██████▍   | 606/938 [16:40<08:58,  1.62s/it][A[A[A[A[A[A





 65%|██████▍   | 607/938 [16:42<08:56,  1.62s/it][A[A[A[A[A[A





 65%|██████▍   | 608/938 [16:43<08:55,  1.62s/it][A[A[A[A[A[A





 65%|██████▍   | 609/938 [16:45<08:55,  1.63s/it][A[A[A[A[A[A





 65%|██████▌   | 610/938 [16:47<08:52,  1.62s/it][A[A[A[A[A[A





 65%|██████▌   | 611/938 [16:48<08:50,  1.62s/it][A[A[A[A[A[A





 65%|██████▌   | 612/938 [16:50<08:49,  1.62s/it][A[A[A[A[A[A





 65%|██████▌   | 613/938 [16:51<08:47,  1.62s/it][A[A[A[A[A[A





 65%|██████▌   | 614/938 [16:53<08:45,  1.62s


>>>> time at the end of epoch: 1 for batch 700 is: 0 hrs: 19 min: 14.00 sec

>>>> generator loss: 71.5280 | generator PPL: 11593705794155149402699248697344.0000
>>>> discriminator loss: -22.8851 | discriminator PPL:  0.0000








 75%|███████▍  | 701/938 [19:14<06:24,  1.62s/it][A[A[A[A[A[A





 75%|███████▍  | 702/938 [19:16<06:21,  1.62s/it][A[A[A[A[A[A





 75%|███████▍  | 703/938 [19:17<06:20,  1.62s/it][A[A[A[A[A[A





 75%|███████▌  | 704/938 [19:19<06:18,  1.62s/it][A[A[A[A[A[A





 75%|███████▌  | 705/938 [19:21<06:17,  1.62s/it][A[A[A[A[A[A





 75%|███████▌  | 706/938 [19:22<06:15,  1.62s/it][A[A[A[A[A[A





 75%|███████▌  | 707/938 [19:24<06:13,  1.62s/it][A[A[A[A[A[A





 75%|███████▌  | 708/938 [19:25<06:11,  1.62s/it][A[A[A[A[A[A





 76%|███████▌  | 709/938 [19:27<06:11,  1.62s/it][A[A[A[A[A[A





 76%|███████▌  | 710/938 [19:29<06:08,  1.62s/it][A[A[A[A[A[A





 76%|███████▌  | 711/938 [19:30<06:07,  1.62s/it][A[A[A[A[A[A





 76%|███████▌  | 712/938 [19:32<06:05,  1.62s/it][A[A[A[A[A[A





 76%|███████▌  | 713/938 [19:34<06:04,  1.62s/it][A[A[A[A[A[A





 76%|███████▌  | 714/938 [19:35<06:02,  1.62s


>>>> time at the end of epoch: 1 for batch 800 is: 0 hrs: 21 min: 55.00 sec

>>>> generator loss: 62.8545 | generator PPL: 1983257020376264287776145408.0000
>>>> discriminator loss: -23.2279 | discriminator PPL:  0.0000








 85%|████████▌ | 801/938 [21:56<03:41,  1.61s/it][A[A[A[A[A[A





 86%|████████▌ | 802/938 [21:57<03:39,  1.61s/it][A[A[A[A[A[A





 86%|████████▌ | 803/938 [21:59<03:37,  1.61s/it][A[A[A[A[A[A





 86%|████████▌ | 804/938 [22:00<03:36,  1.61s/it][A[A[A[A[A[A





 86%|████████▌ | 805/938 [22:02<03:34,  1.61s/it][A[A[A[A[A[A





 86%|████████▌ | 806/938 [22:04<03:32,  1.61s/it][A[A[A[A[A[A





 86%|████████▌ | 807/938 [22:05<03:30,  1.61s/it][A[A[A[A[A[A





 86%|████████▌ | 808/938 [22:07<03:28,  1.61s/it][A[A[A[A[A[A





 86%|████████▌ | 809/938 [22:08<03:27,  1.61s/it][A[A[A[A[A[A





 86%|████████▋ | 810/938 [22:10<03:25,  1.61s/it][A[A[A[A[A[A





 86%|████████▋ | 811/938 [22:12<03:24,  1.61s/it][A[A[A[A[A[A





 87%|████████▋ | 812/938 [22:13<03:22,  1.61s/it][A[A[A[A[A[A





 87%|████████▋ | 813/938 [22:15<03:20,  1.61s/it][A[A[A[A[A[A





 87%|████████▋ | 814/938 [22:16<03:19,  1.61s


>>>> time at the end of epoch: 1 for batch 900 is: 0 hrs: 24 min: 36.00 sec

>>>> generator loss: 68.9171 | generator PPL: 851796172924246239230027628544.0000
>>>> discriminator loss: -19.8729 | discriminator PPL:  0.0000








 96%|█████████▌| 901/938 [24:37<00:59,  1.61s/it][A[A[A[A[A[A





 96%|█████████▌| 902/938 [24:38<00:57,  1.61s/it][A[A[A[A[A[A





 96%|█████████▋| 903/938 [24:40<00:56,  1.61s/it][A[A[A[A[A[A





 96%|█████████▋| 904/938 [24:41<00:54,  1.61s/it][A[A[A[A[A[A





 96%|█████████▋| 905/938 [24:43<00:53,  1.61s/it][A[A[A[A[A[A





 97%|█████████▋| 906/938 [24:45<00:51,  1.61s/it][A[A[A[A[A[A





 97%|█████████▋| 907/938 [24:46<00:49,  1.61s/it][A[A[A[A[A[A





 97%|█████████▋| 908/938 [24:48<00:48,  1.61s/it][A[A[A[A[A[A





 97%|█████████▋| 909/938 [24:50<00:46,  1.61s/it][A[A[A[A[A[A





 97%|█████████▋| 910/938 [24:51<00:45,  1.61s/it][A[A[A[A[A[A





 97%|█████████▋| 911/938 [24:53<00:43,  1.61s/it][A[A[A[A[A[A





 97%|█████████▋| 912/938 [24:54<00:41,  1.61s/it][A[A[A[A[A[A





 97%|█████████▋| 913/938 [24:56<00:40,  1.61s/it][A[A[A[A[A[A





 97%|█████████▋| 914/938 [24:58<00:38,  1.61s


>>>>training starts for epoch: 2	please wait while the model is training.......
>>>>training on progress: KEEP YOUR SCREEN ACTIVE.......

>>>> time at the end of epoch: 2 for batch 0 is: 0 hrs: 00 min: 01.00 sec

>>>> generator loss: 67.5648 | generator PPL: 220310316907979591359720849408.0000
>>>> discriminator loss: -17.8588 | discriminator PPL:  0.0000








  0%|          | 1/938 [00:01<24:32,  1.57s/it][A[A[A[A[A[A





  0%|          | 2/938 [00:03<24:43,  1.59s/it][A[A[A[A[A[A





  0%|          | 3/938 [00:04<24:47,  1.59s/it][A[A[A[A[A[A





  0%|          | 4/938 [00:06<24:49,  1.60s/it][A[A[A[A[A[A





  1%|          | 5/938 [00:08<24:54,  1.60s/it][A[A[A[A[A[A





  1%|          | 6/938 [00:09<24:53,  1.60s/it][A[A[A[A[A[A





  1%|          | 7/938 [00:11<24:51,  1.60s/it][A[A[A[A[A[A





  1%|          | 8/938 [00:12<24:55,  1.61s/it][A[A[A[A[A[A





  1%|          | 9/938 [00:14<24:53,  1.61s/it][A[A[A[A[A[A





  1%|          | 10/938 [00:16<24:49,  1.60s/it][A[A[A[A[A[A





  1%|          | 11/938 [00:17<24:46,  1.60s/it][A[A[A[A[A[A





  1%|▏         | 12/938 [00:19<24:44,  1.60s/it][A[A[A[A[A[A





  1%|▏         | 13/938 [00:20<24:45,  1.61s/it][A[A[A[A[A[A





  1%|▏         | 14/938 [00:22<24:45,  1.61s/it][A[A[A[A[A[A



>>>> time at the end of epoch: 2 for batch 100 is: 0 hrs: 02 min: 41.00 sec

>>>> generator loss: 67.7111 | generator PPL: 255015185567269799824719872000.0000
>>>> discriminator loss: -14.9616 | discriminator PPL:  0.0000








 11%|█         | 101/938 [02:42<22:22,  1.60s/it][A[A[A[A[A[A





 11%|█         | 102/938 [02:43<22:20,  1.60s/it][A[A[A[A[A[A





 11%|█         | 103/938 [02:45<22:17,  1.60s/it][A[A[A[A[A[A





 11%|█         | 104/938 [02:47<22:15,  1.60s/it][A[A[A[A[A[A





 11%|█         | 105/938 [02:48<22:13,  1.60s/it][A[A[A[A[A[A





 11%|█▏        | 106/938 [02:50<22:11,  1.60s/it][A[A[A[A[A[A





 11%|█▏        | 107/938 [02:51<22:13,  1.60s/it][A[A[A[A[A[A





 12%|█▏        | 108/938 [02:53<22:11,  1.60s/it][A[A[A[A[A[A





 12%|█▏        | 109/938 [02:55<22:08,  1.60s/it][A[A[A[A[A[A





 12%|█▏        | 110/938 [02:56<22:06,  1.60s/it][A[A[A[A[A[A





 12%|█▏        | 111/938 [02:58<22:04,  1.60s/it][A[A[A[A[A[A





 12%|█▏        | 112/938 [02:59<22:03,  1.60s/it][A[A[A[A[A[A





 12%|█▏        | 113/938 [03:01<22:01,  1.60s/it][A[A[A[A[A[A





 12%|█▏        | 114/938 [03:03<21:58,  1.60s


>>>> time at the end of epoch: 2 for batch 200 is: 0 hrs: 05 min: 22.00 sec

>>>> generator loss: 60.2162 | generator PPL: 141761788940394223248605184.0000
>>>> discriminator loss: -14.4920 | discriminator PPL:  0.0000








 21%|██▏       | 201/938 [05:22<19:43,  1.61s/it][A[A[A[A[A[A





 22%|██▏       | 202/938 [05:24<19:38,  1.60s/it][A[A[A[A[A[A





 22%|██▏       | 203/938 [05:25<19:34,  1.60s/it][A[A[A[A[A[A





 22%|██▏       | 204/938 [05:27<19:35,  1.60s/it][A[A[A[A[A[A





 22%|██▏       | 205/938 [05:28<19:37,  1.61s/it][A[A[A[A[A[A





 22%|██▏       | 206/938 [05:30<19:35,  1.61s/it][A[A[A[A[A[A





 22%|██▏       | 207/938 [05:32<19:29,  1.60s/it][A[A[A[A[A[A





 22%|██▏       | 208/938 [05:33<19:26,  1.60s/it][A[A[A[A[A[A





 22%|██▏       | 209/938 [05:35<19:25,  1.60s/it][A[A[A[A[A[A





 22%|██▏       | 210/938 [05:36<19:24,  1.60s/it][A[A[A[A[A[A





 22%|██▏       | 211/938 [05:38<19:20,  1.60s/it][A[A[A[A[A[A





 23%|██▎       | 212/938 [05:40<19:20,  1.60s/it][A[A[A[A[A[A





 23%|██▎       | 213/938 [05:41<19:17,  1.60s/it][A[A[A[A[A[A





 23%|██▎       | 214/938 [05:43<19:15,  1.60s


>>>> time at the end of epoch: 2 for batch 300 is: 0 hrs: 08 min: 02.00 sec

>>>> generator loss: 64.3257 | generator PPL: 8635472709396809663394086912.0000
>>>> discriminator loss: -15.8381 | discriminator PPL:  0.0000








 32%|███▏      | 301/938 [08:02<16:56,  1.60s/it][A[A[A[A[A[A





 32%|███▏      | 302/938 [08:03<16:55,  1.60s/it][A[A[A[A[A[A





 32%|███▏      | 303/938 [08:05<16:54,  1.60s/it][A[A[A[A[A[A





 32%|███▏      | 304/938 [08:07<16:53,  1.60s/it][A[A[A[A[A[A





 33%|███▎      | 305/938 [08:08<16:51,  1.60s/it][A[A[A[A[A[A





 33%|███▎      | 306/938 [08:10<16:49,  1.60s/it][A[A[A[A[A[A





 33%|███▎      | 307/938 [08:11<16:47,  1.60s/it][A[A[A[A[A[A





 33%|███▎      | 308/938 [08:13<16:45,  1.60s/it][A[A[A[A[A[A





 33%|███▎      | 309/938 [08:15<16:45,  1.60s/it][A[A[A[A[A[A





 33%|███▎      | 310/938 [08:16<16:45,  1.60s/it][A[A[A[A[A[A





 33%|███▎      | 311/938 [08:18<16:41,  1.60s/it][A[A[A[A[A[A





 33%|███▎      | 312/938 [08:19<16:39,  1.60s/it][A[A[A[A[A[A





 33%|███▎      | 313/938 [08:21<16:40,  1.60s/it][A[A[A[A[A[A





 33%|███▎      | 314/938 [08:23<16:37,  1.60s


>>>> time at the end of epoch: 2 for batch 400 is: 0 hrs: 10 min: 41.00 sec

>>>> generator loss: 62.1324 | generator PPL: 963264236703078241330528256.0000
>>>> discriminator loss: -15.2191 | discriminator PPL:  0.0000








 43%|████▎     | 401/938 [10:41<14:13,  1.59s/it][A[A[A[A[A[A





 43%|████▎     | 402/938 [10:43<14:11,  1.59s/it][A[A[A[A[A[A





 43%|████▎     | 403/938 [10:44<14:11,  1.59s/it][A[A[A[A[A[A





 43%|████▎     | 404/938 [10:46<14:10,  1.59s/it][A[A[A[A[A[A





 43%|████▎     | 405/938 [10:48<14:09,  1.59s/it][A[A[A[A[A[A





 43%|████▎     | 406/938 [10:49<14:06,  1.59s/it][A[A[A[A[A[A





 43%|████▎     | 407/938 [10:51<14:05,  1.59s/it][A[A[A[A[A[A





 43%|████▎     | 408/938 [10:52<14:04,  1.59s/it][A[A[A[A[A[A





 44%|████▎     | 409/938 [10:54<14:01,  1.59s/it][A[A[A[A[A[A





 44%|████▎     | 410/938 [10:56<13:58,  1.59s/it][A[A[A[A[A[A





 44%|████▍     | 411/938 [10:57<13:56,  1.59s/it][A[A[A[A[A[A





 44%|████▍     | 412/938 [10:59<13:56,  1.59s/it][A[A[A[A[A[A





 44%|████▍     | 413/938 [11:00<13:56,  1.59s/it][A[A[A[A[A[A





 44%|████▍     | 414/938 [11:02<13:55,  1.59s


>>>> time at the end of epoch: 2 for batch 500 is: 0 hrs: 13 min: 20.00 sec

>>>> generator loss: 66.8261 | generator PPL: 105250565617140678401800011776.0000
>>>> discriminator loss: -13.9352 | discriminator PPL:  0.0000








 53%|█████▎    | 501/938 [13:20<11:35,  1.59s/it][A[A[A[A[A[A





 54%|█████▎    | 502/938 [13:22<11:33,  1.59s/it][A[A[A[A[A[A





 54%|█████▎    | 503/938 [13:24<11:31,  1.59s/it][A[A[A[A[A[A





 54%|█████▎    | 504/938 [13:25<11:30,  1.59s/it][A[A[A[A[A[A





 54%|█████▍    | 505/938 [13:27<11:29,  1.59s/it][A[A[A[A[A[A





 54%|█████▍    | 506/938 [13:28<11:26,  1.59s/it][A[A[A[A[A[A





 54%|█████▍    | 507/938 [13:30<11:24,  1.59s/it][A[A[A[A[A[A





 54%|█████▍    | 508/938 [13:32<11:23,  1.59s/it][A[A[A[A[A[A





 54%|█████▍    | 509/938 [13:33<11:21,  1.59s/it][A[A[A[A[A[A





 54%|█████▍    | 510/938 [13:35<11:20,  1.59s/it][A[A[A[A[A[A





 54%|█████▍    | 511/938 [13:36<11:19,  1.59s/it][A[A[A[A[A[A





 55%|█████▍    | 512/938 [13:38<11:18,  1.59s/it][A[A[A[A[A[A





 55%|█████▍    | 513/938 [13:40<11:17,  1.59s/it][A[A[A[A[A[A





 55%|█████▍    | 514/938 [13:41<11:13,  1.59s


>>>> time at the end of epoch: 2 for batch 600 is: 0 hrs: 15 min: 59.00 sec

>>>> generator loss: 62.7269 | generator PPL: 1745564142671819352399937536.0000
>>>> discriminator loss: -11.8576 | discriminator PPL:  0.0000








 64%|██████▍   | 601/938 [16:00<08:56,  1.59s/it][A[A[A[A[A[A





 64%|██████▍   | 602/938 [16:01<08:55,  1.59s/it][A[A[A[A[A[A





 64%|██████▍   | 603/938 [16:03<08:53,  1.59s/it][A[A[A[A[A[A





 64%|██████▍   | 604/938 [16:04<08:52,  1.59s/it][A[A[A[A[A[A





 64%|██████▍   | 605/938 [16:06<08:50,  1.59s/it][A[A[A[A[A[A





 65%|██████▍   | 606/938 [16:08<08:49,  1.59s/it][A[A[A[A[A[A





 65%|██████▍   | 607/938 [16:09<08:46,  1.59s/it][A[A[A[A[A[A





 65%|██████▍   | 608/938 [16:11<08:45,  1.59s/it][A[A[A[A[A[A





 65%|██████▍   | 609/938 [16:12<08:44,  1.60s/it][A[A[A[A[A[A





 65%|██████▌   | 610/938 [16:14<08:44,  1.60s/it][A[A[A[A[A[A





 65%|██████▌   | 611/938 [16:16<08:42,  1.60s/it][A[A[A[A[A[A





 65%|██████▌   | 612/938 [16:17<08:40,  1.60s/it][A[A[A[A[A[A





 65%|██████▌   | 613/938 [16:19<08:37,  1.59s/it][A[A[A[A[A[A





 65%|██████▌   | 614/938 [16:20<08:35,  1.59s


>>>> time at the end of epoch: 2 for batch 700 is: 0 hrs: 18 min: 38.00 sec

>>>> generator loss: 61.0017 | generator PPL: 310963139271624583886667776.0000
>>>> discriminator loss: -14.3357 | discriminator PPL:  0.0000








 75%|███████▍  | 701/938 [18:38<06:18,  1.59s/it][A[A[A[A[A[A





 75%|███████▍  | 702/938 [18:40<06:15,  1.59s/it][A[A[A[A[A[A





 75%|███████▍  | 703/938 [18:42<06:13,  1.59s/it][A[A[A[A[A[A





 75%|███████▌  | 704/938 [18:43<06:12,  1.59s/it][A[A[A[A[A[A





 75%|███████▌  | 705/938 [18:45<06:10,  1.59s/it][A[A[A[A[A[A





 75%|███████▌  | 706/938 [18:46<06:08,  1.59s/it][A[A[A[A[A[A





 75%|███████▌  | 707/938 [18:48<06:07,  1.59s/it][A[A[A[A[A[A





 75%|███████▌  | 708/938 [18:50<06:05,  1.59s/it][A[A[A[A[A[A





 76%|███████▌  | 709/938 [18:51<06:04,  1.59s/it][A[A[A[A[A[A





 76%|███████▌  | 710/938 [18:53<06:02,  1.59s/it][A[A[A[A[A[A





 76%|███████▌  | 711/938 [18:54<06:00,  1.59s/it][A[A[A[A[A[A





 76%|███████▌  | 712/938 [18:56<05:58,  1.59s/it][A[A[A[A[A[A





 76%|███████▌  | 713/938 [18:58<05:56,  1.59s/it][A[A[A[A[A[A





 76%|███████▌  | 714/938 [18:59<05:56,  1.59s


>>>> time at the end of epoch: 2 for batch 800 is: 0 hrs: 21 min: 17.00 sec

>>>> generator loss: 65.6904 | generator PPL: 33806308782772021144651300864.0000
>>>> discriminator loss: -12.6528 | discriminator PPL:  0.0000








 85%|████████▌ | 801/938 [21:17<03:37,  1.59s/it][A[A[A[A[A[A





 86%|████████▌ | 802/938 [21:19<03:35,  1.59s/it][A[A[A[A[A[A





 86%|████████▌ | 803/938 [21:21<03:34,  1.59s/it][A[A[A[A[A[A





 86%|████████▌ | 804/938 [21:22<03:31,  1.58s/it][A[A[A[A[A[A





 86%|████████▌ | 805/938 [21:24<03:30,  1.58s/it][A[A[A[A[A[A





 86%|████████▌ | 806/938 [21:25<03:29,  1.58s/it][A[A[A[A[A[A





 86%|████████▌ | 807/938 [21:27<03:27,  1.58s/it][A[A[A[A[A[A





 86%|████████▌ | 808/938 [21:29<03:25,  1.58s/it][A[A[A[A[A[A





 86%|████████▌ | 809/938 [21:30<03:24,  1.59s/it][A[A[A[A[A[A





 86%|████████▋ | 810/938 [21:32<03:23,  1.59s/it][A[A[A[A[A[A





 86%|████████▋ | 811/938 [21:33<03:21,  1.58s/it][A[A[A[A[A[A





 87%|████████▋ | 812/938 [21:35<03:19,  1.58s/it][A[A[A[A[A[A





 87%|████████▋ | 813/938 [21:36<03:17,  1.58s/it][A[A[A[A[A[A





 87%|████████▋ | 814/938 [21:38<03:16,  1.58s


>>>> time at the end of epoch: 2 for batch 900 is: 0 hrs: 23 min: 56.00 sec

>>>> generator loss: 65.3561 | generator PPL: 24198834695067534362186612736.0000
>>>> discriminator loss: -12.6943 | discriminator PPL:  0.0000








 96%|█████████▌| 901/938 [23:56<00:58,  1.59s/it][A[A[A[A[A[A





 96%|█████████▌| 902/938 [23:58<00:57,  1.59s/it][A[A[A[A[A[A





 96%|█████████▋| 903/938 [23:59<00:55,  1.59s/it][A[A[A[A[A[A





 96%|█████████▋| 904/938 [24:01<00:54,  1.59s/it][A[A[A[A[A[A





 96%|█████████▋| 905/938 [24:03<00:52,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 906/938 [24:04<00:50,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 907/938 [24:06<00:49,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 908/938 [24:07<00:47,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 909/938 [24:09<00:46,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 910/938 [24:10<00:44,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 911/938 [24:12<00:42,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 912/938 [24:14<00:41,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 913/938 [24:15<00:39,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 914/938 [24:17<00:38,  1.59s


>>>>training starts for epoch: 3	please wait while the model is training.......
>>>>training on progress: KEEP YOUR SCREEN ACTIVE.......

>>>> time at the end of epoch: 3 for batch 0 is: 0 hrs: 00 min: 01.00 sec

>>>> generator loss: 66.8901 | generator PPL: 112203739311157307651810918400.0000
>>>> discriminator loss: -13.5860 | discriminator PPL:  0.0000








  0%|          | 1/938 [00:01<24:19,  1.56s/it][A[A[A[A[A[A





  0%|          | 2/938 [00:03<24:29,  1.57s/it][A[A[A[A[A[A





  0%|          | 3/938 [00:04<24:30,  1.57s/it][A[A[A[A[A[A





  0%|          | 4/938 [00:06<24:34,  1.58s/it][A[A[A[A[A[A





  1%|          | 5/938 [00:07<24:34,  1.58s/it][A[A[A[A[A[A





  1%|          | 6/938 [00:09<24:34,  1.58s/it][A[A[A[A[A[A





  1%|          | 7/938 [00:11<24:32,  1.58s/it][A[A[A[A[A[A





  1%|          | 8/938 [00:12<24:33,  1.58s/it][A[A[A[A[A[A





  1%|          | 9/938 [00:14<24:34,  1.59s/it][A[A[A[A[A[A





  1%|          | 10/938 [00:15<24:31,  1.59s/it][A[A[A[A[A[A





  1%|          | 11/938 [00:17<24:31,  1.59s/it][A[A[A[A[A[A





  1%|▏         | 12/938 [00:19<24:30,  1.59s/it][A[A[A[A[A[A





  1%|▏         | 13/938 [00:20<24:28,  1.59s/it][A[A[A[A[A[A





  1%|▏         | 14/938 [00:22<24:26,  1.59s/it][A[A[A[A[A[A



>>>> time at the end of epoch: 3 for batch 100 is: 0 hrs: 02 min: 40.00 sec

>>>> generator loss: 70.0468 | generator PPL: 2635955978094889072508740304896.0000
>>>> discriminator loss: -12.9464 | discriminator PPL:  0.0000








 11%|█         | 101/938 [02:40<22:09,  1.59s/it][A[A[A[A[A[A





 11%|█         | 102/938 [02:42<22:05,  1.58s/it][A[A[A[A[A[A





 11%|█         | 103/938 [02:43<22:02,  1.58s/it][A[A[A[A[A[A





 11%|█         | 104/938 [02:45<22:03,  1.59s/it][A[A[A[A[A[A





 11%|█         | 105/938 [02:46<22:02,  1.59s/it][A[A[A[A[A[A





 11%|█▏        | 106/938 [02:48<22:00,  1.59s/it][A[A[A[A[A[A





 11%|█▏        | 107/938 [02:49<21:57,  1.59s/it][A[A[A[A[A[A





 12%|█▏        | 108/938 [02:51<21:56,  1.59s/it][A[A[A[A[A[A





 12%|█▏        | 109/938 [02:53<21:55,  1.59s/it][A[A[A[A[A[A





 12%|█▏        | 110/938 [02:54<21:54,  1.59s/it][A[A[A[A[A[A





 12%|█▏        | 111/938 [02:56<21:51,  1.59s/it][A[A[A[A[A[A





 12%|█▏        | 112/938 [02:57<21:49,  1.59s/it][A[A[A[A[A[A





 12%|█▏        | 113/938 [02:59<21:47,  1.58s/it][A[A[A[A[A[A





 12%|█▏        | 114/938 [03:01<21:47,  1.59s


>>>> time at the end of epoch: 3 for batch 200 is: 0 hrs: 05 min: 18.00 sec

>>>> generator loss: 57.8845 | generator PPL: 13769347420765646005207040.0000
>>>> discriminator loss: -11.9203 | discriminator PPL:  0.0000








 21%|██▏       | 201/938 [05:19<19:32,  1.59s/it][A[A[A[A[A[A





 22%|██▏       | 202/938 [05:20<19:30,  1.59s/it][A[A[A[A[A[A





 22%|██▏       | 203/938 [05:22<19:30,  1.59s/it][A[A[A[A[A[A





 22%|██▏       | 204/938 [05:24<19:27,  1.59s/it][A[A[A[A[A[A





 22%|██▏       | 205/938 [05:25<19:26,  1.59s/it][A[A[A[A[A[A





 22%|██▏       | 206/938 [05:27<19:25,  1.59s/it][A[A[A[A[A[A





 22%|██▏       | 207/938 [05:28<19:21,  1.59s/it][A[A[A[A[A[A





 22%|██▏       | 208/938 [05:30<19:16,  1.58s/it][A[A[A[A[A[A





 22%|██▏       | 209/938 [05:31<19:14,  1.58s/it][A[A[A[A[A[A





 22%|██▏       | 210/938 [05:33<19:13,  1.58s/it][A[A[A[A[A[A





 22%|██▏       | 211/938 [05:35<19:11,  1.58s/it][A[A[A[A[A[A





 23%|██▎       | 212/938 [05:36<19:08,  1.58s/it][A[A[A[A[A[A





 23%|██▎       | 213/938 [05:38<19:06,  1.58s/it][A[A[A[A[A[A





 23%|██▎       | 214/938 [05:39<19:05,  1.58s


>>>> time at the end of epoch: 3 for batch 300 is: 0 hrs: 07 min: 57.00 sec

>>>> generator loss: 63.6664 | generator PPL: 4466339039840852724225671168.0000
>>>> discriminator loss: -13.0823 | discriminator PPL:  0.0000








 32%|███▏      | 301/938 [07:57<16:53,  1.59s/it][A[A[A[A[A[A





 32%|███▏      | 302/938 [07:59<16:49,  1.59s/it][A[A[A[A[A[A





 32%|███▏      | 303/938 [08:01<16:48,  1.59s/it][A[A[A[A[A[A





 32%|███▏      | 304/938 [08:02<16:46,  1.59s/it][A[A[A[A[A[A





 33%|███▎      | 305/938 [08:04<16:44,  1.59s/it][A[A[A[A[A[A





 33%|███▎      | 306/938 [08:05<16:43,  1.59s/it][A[A[A[A[A[A





 33%|███▎      | 307/938 [08:07<16:41,  1.59s/it][A[A[A[A[A[A





 33%|███▎      | 308/938 [08:09<16:41,  1.59s/it][A[A[A[A[A[A





 33%|███▎      | 309/938 [08:10<16:39,  1.59s/it][A[A[A[A[A[A





 33%|███▎      | 310/938 [08:12<16:35,  1.58s/it][A[A[A[A[A[A





 33%|███▎      | 311/938 [08:13<16:33,  1.58s/it][A[A[A[A[A[A





 33%|███▎      | 312/938 [08:15<16:31,  1.58s/it][A[A[A[A[A[A





 33%|███▎      | 313/938 [08:16<16:30,  1.58s/it][A[A[A[A[A[A





 33%|███▎      | 314/938 [08:18<16:27,  1.58s


>>>> time at the end of epoch: 3 for batch 400 is: 0 hrs: 10 min: 36.00 sec

>>>> generator loss: 72.1801 | generator PPL: 22254554793523125058711567990784.0000
>>>> discriminator loss: -9.8347 | discriminator PPL:  0.0001








 43%|████▎     | 401/938 [10:36<14:12,  1.59s/it][A[A[A[A[A[A





 43%|████▎     | 402/938 [10:38<14:10,  1.59s/it][A[A[A[A[A[A





 43%|████▎     | 403/938 [10:39<14:08,  1.59s/it][A[A[A[A[A[A





 43%|████▎     | 404/938 [10:41<14:07,  1.59s/it][A[A[A[A[A[A





 43%|████▎     | 405/938 [10:42<14:06,  1.59s/it][A[A[A[A[A[A





 43%|████▎     | 406/938 [10:44<14:06,  1.59s/it][A[A[A[A[A[A





 43%|████▎     | 407/938 [10:46<14:04,  1.59s/it][A[A[A[A[A[A





 43%|████▎     | 408/938 [10:47<14:03,  1.59s/it][A[A[A[A[A[A





 44%|████▎     | 409/938 [10:49<14:02,  1.59s/it][A[A[A[A[A[A





 44%|████▎     | 410/938 [10:50<14:02,  1.60s/it][A[A[A[A[A[A





 44%|████▍     | 411/938 [10:52<13:59,  1.59s/it][A[A[A[A[A[A





 44%|████▍     | 412/938 [10:54<13:56,  1.59s/it][A[A[A[A[A[A





 44%|████▍     | 413/938 [10:55<13:54,  1.59s/it][A[A[A[A[A[A





 44%|████▍     | 414/938 [10:57<13:51,  1.59s


>>>> time at the end of epoch: 3 for batch 500 is: 0 hrs: 13 min: 14.00 sec

>>>> generator loss: 65.9528 | generator PPL: 43948667862653270797018202112.0000
>>>> discriminator loss: -10.8296 | discriminator PPL:  0.0000








 53%|█████▎    | 501/938 [13:15<11:36,  1.59s/it][A[A[A[A[A[A





 54%|█████▎    | 502/938 [13:16<11:33,  1.59s/it][A[A[A[A[A[A





 54%|█████▎    | 503/938 [13:18<11:32,  1.59s/it][A[A[A[A[A[A





 54%|█████▎    | 504/938 [13:19<11:30,  1.59s/it][A[A[A[A[A[A





 54%|█████▍    | 505/938 [13:21<11:31,  1.60s/it][A[A[A[A[A[A





 54%|█████▍    | 506/938 [13:23<11:30,  1.60s/it][A[A[A[A[A[A





 54%|█████▍    | 507/938 [13:24<11:28,  1.60s/it][A[A[A[A[A[A





 54%|█████▍    | 508/938 [13:26<11:26,  1.60s/it][A[A[A[A[A[A





 54%|█████▍    | 509/938 [13:27<11:25,  1.60s/it][A[A[A[A[A[A





 54%|█████▍    | 510/938 [13:29<11:22,  1.60s/it][A[A[A[A[A[A





 54%|█████▍    | 511/938 [13:31<11:21,  1.60s/it][A[A[A[A[A[A





 55%|█████▍    | 512/938 [13:32<11:20,  1.60s/it][A[A[A[A[A[A





 55%|█████▍    | 513/938 [13:34<11:17,  1.59s/it][A[A[A[A[A[A





 55%|█████▍    | 514/938 [13:35<11:15,  1.59s


>>>> time at the end of epoch: 3 for batch 600 is: 0 hrs: 15 min: 53.00 sec

>>>> generator loss: 69.1379 | generator PPL: 1062182366714165736465565220864.0000
>>>> discriminator loss: -12.3792 | discriminator PPL:  0.0000








 64%|██████▍   | 601/938 [15:54<08:54,  1.59s/it][A[A[A[A[A[A





 64%|██████▍   | 602/938 [15:55<08:52,  1.58s/it][A[A[A[A[A[A





 64%|██████▍   | 603/938 [15:57<08:50,  1.58s/it][A[A[A[A[A[A





 64%|██████▍   | 604/938 [15:58<08:49,  1.59s/it][A[A[A[A[A[A





 64%|██████▍   | 605/938 [16:00<08:48,  1.59s/it][A[A[A[A[A[A





 65%|██████▍   | 606/938 [16:01<08:47,  1.59s/it][A[A[A[A[A[A





 65%|██████▍   | 607/938 [16:03<08:44,  1.58s/it][A[A[A[A[A[A





 65%|██████▍   | 608/938 [16:05<08:43,  1.59s/it][A[A[A[A[A[A





 65%|██████▍   | 609/938 [16:06<08:42,  1.59s/it][A[A[A[A[A[A





 65%|██████▌   | 610/938 [16:08<08:40,  1.59s/it][A[A[A[A[A[A





 65%|██████▌   | 611/938 [16:09<08:38,  1.59s/it][A[A[A[A[A[A





 65%|██████▌   | 612/938 [16:11<08:37,  1.59s/it][A[A[A[A[A[A





 65%|██████▌   | 613/938 [16:13<08:35,  1.59s/it][A[A[A[A[A[A





 65%|██████▌   | 614/938 [16:14<08:34,  1.59s


>>>> time at the end of epoch: 3 for batch 700 is: 0 hrs: 18 min: 32.00 sec

>>>> generator loss: 62.1724 | generator PPL: 1002576713840746332392783872.0000
>>>> discriminator loss: -10.1908 | discriminator PPL:  0.0000








 75%|███████▍  | 701/938 [18:32<06:16,  1.59s/it][A[A[A[A[A[A





 75%|███████▍  | 702/938 [18:34<06:14,  1.59s/it][A[A[A[A[A[A





 75%|███████▍  | 703/938 [18:35<06:12,  1.59s/it][A[A[A[A[A[A





 75%|███████▌  | 704/938 [18:37<06:10,  1.58s/it][A[A[A[A[A[A





 75%|███████▌  | 705/938 [18:38<06:08,  1.58s/it][A[A[A[A[A[A





 75%|███████▌  | 706/938 [18:40<06:07,  1.58s/it][A[A[A[A[A[A





 75%|███████▌  | 707/938 [18:42<06:05,  1.58s/it][A[A[A[A[A[A





 75%|███████▌  | 708/938 [18:43<06:05,  1.59s/it][A[A[A[A[A[A





 76%|███████▌  | 709/938 [18:45<06:03,  1.59s/it][A[A[A[A[A[A





 76%|███████▌  | 710/938 [18:46<06:02,  1.59s/it][A[A[A[A[A[A





 76%|███████▌  | 711/938 [18:48<06:01,  1.59s/it][A[A[A[A[A[A





 76%|███████▌  | 712/938 [18:50<05:59,  1.59s/it][A[A[A[A[A[A





 76%|███████▌  | 713/938 [18:51<05:57,  1.59s/it][A[A[A[A[A[A





 76%|███████▌  | 714/938 [18:53<05:55,  1.59s


>>>> time at the end of epoch: 3 for batch 800 is: 0 hrs: 21 min: 10.00 sec

>>>> generator loss: 65.1516 | generator PPL: 19724161076743398128495886336.0000
>>>> discriminator loss: -13.2604 | discriminator PPL:  0.0000








 85%|████████▌ | 801/938 [21:11<03:37,  1.59s/it][A[A[A[A[A[A





 86%|████████▌ | 802/938 [21:12<03:35,  1.59s/it][A[A[A[A[A[A





 86%|████████▌ | 803/938 [21:14<03:34,  1.59s/it][A[A[A[A[A[A





 86%|████████▌ | 804/938 [21:15<03:32,  1.59s/it][A[A[A[A[A[A





 86%|████████▌ | 805/938 [21:17<03:30,  1.58s/it][A[A[A[A[A[A





 86%|████████▌ | 806/938 [21:19<03:29,  1.59s/it][A[A[A[A[A[A





 86%|████████▌ | 807/938 [21:20<03:28,  1.59s/it][A[A[A[A[A[A





 86%|████████▌ | 808/938 [21:22<03:26,  1.59s/it][A[A[A[A[A[A





 86%|████████▌ | 809/938 [21:23<03:24,  1.59s/it][A[A[A[A[A[A





 86%|████████▋ | 810/938 [21:25<03:23,  1.59s/it][A[A[A[A[A[A





 86%|████████▋ | 811/938 [21:27<03:21,  1.59s/it][A[A[A[A[A[A





 87%|████████▋ | 812/938 [21:28<03:19,  1.59s/it][A[A[A[A[A[A





 87%|████████▋ | 813/938 [21:30<03:18,  1.58s/it][A[A[A[A[A[A





 87%|████████▋ | 814/938 [21:31<03:16,  1.59s


>>>> time at the end of epoch: 3 for batch 900 is: 0 hrs: 23 min: 49.00 sec

>>>> generator loss: 61.0279 | generator PPL: 319222701017205621630435328.0000
>>>> discriminator loss: -9.6321 | discriminator PPL:  0.0001








 96%|█████████▌| 901/938 [23:50<00:58,  1.59s/it][A[A[A[A[A[A





 96%|█████████▌| 902/938 [23:51<00:57,  1.59s/it][A[A[A[A[A[A





 96%|█████████▋| 903/938 [23:53<00:55,  1.59s/it][A[A[A[A[A[A





 96%|█████████▋| 904/938 [23:54<00:53,  1.59s/it][A[A[A[A[A[A





 96%|█████████▋| 905/938 [23:56<00:52,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 906/938 [23:57<00:50,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 907/938 [23:59<00:49,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 908/938 [24:01<00:47,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 909/938 [24:02<00:46,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 910/938 [24:04<00:44,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 911/938 [24:05<00:42,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 912/938 [24:07<00:41,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 913/938 [24:09<00:39,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 914/938 [24:10<00:38,  1.59s


>>>>training starts for epoch: 4	please wait while the model is training.......
>>>>training on progress: KEEP YOUR SCREEN ACTIVE.......

>>>> time at the end of epoch: 4 for batch 0 is: 0 hrs: 00 min: 01.00 sec

>>>> generator loss: 63.9948 | generator PPL: 6202884965740254547608076288.0000
>>>> discriminator loss: -9.5599 | discriminator PPL:  0.0001








  0%|          | 1/938 [00:01<24:22,  1.56s/it][A[A[A[A[A[A





  0%|          | 2/938 [00:03<24:27,  1.57s/it][A[A[A[A[A[A





  0%|          | 3/938 [00:04<24:30,  1.57s/it][A[A[A[A[A[A





  0%|          | 4/938 [00:06<24:33,  1.58s/it][A[A[A[A[A[A





  1%|          | 5/938 [00:07<24:33,  1.58s/it][A[A[A[A[A[A





  1%|          | 6/938 [00:09<24:34,  1.58s/it][A[A[A[A[A[A





  1%|          | 7/938 [00:11<24:35,  1.59s/it][A[A[A[A[A[A





  1%|          | 8/938 [00:12<24:34,  1.59s/it][A[A[A[A[A[A





  1%|          | 9/938 [00:14<24:31,  1.58s/it][A[A[A[A[A[A





  1%|          | 10/938 [00:15<24:28,  1.58s/it][A[A[A[A[A[A





  1%|          | 11/938 [00:17<24:26,  1.58s/it][A[A[A[A[A[A





  1%|▏         | 12/938 [00:19<24:26,  1.58s/it][A[A[A[A[A[A





  1%|▏         | 13/938 [00:20<24:26,  1.59s/it][A[A[A[A[A[A





  1%|▏         | 14/938 [00:22<24:25,  1.59s/it][A[A[A[A[A[A



>>>> time at the end of epoch: 4 for batch 100 is: 0 hrs: 02 min: 39.00 sec

>>>> generator loss: 61.3618 | generator PPL: 445743492129391285553332224.0000
>>>> discriminator loss: -9.3326 | discriminator PPL:  0.0001








 11%|█         | 101/938 [02:40<22:03,  1.58s/it][A[A[A[A[A[A





 11%|█         | 102/938 [02:41<22:02,  1.58s/it][A[A[A[A[A[A





 11%|█         | 103/938 [02:43<22:00,  1.58s/it][A[A[A[A[A[A





 11%|█         | 104/938 [02:44<21:58,  1.58s/it][A[A[A[A[A[A





 11%|█         | 105/938 [02:46<22:00,  1.58s/it][A[A[A[A[A[A





 11%|█▏        | 106/938 [02:48<21:58,  1.58s/it][A[A[A[A[A[A





 11%|█▏        | 107/938 [02:49<21:55,  1.58s/it][A[A[A[A[A[A





 12%|█▏        | 108/938 [02:51<21:56,  1.59s/it][A[A[A[A[A[A





 12%|█▏        | 109/938 [02:52<21:53,  1.58s/it][A[A[A[A[A[A





 12%|█▏        | 110/938 [02:54<21:53,  1.59s/it][A[A[A[A[A[A





 12%|█▏        | 111/938 [02:56<21:53,  1.59s/it][A[A[A[A[A[A





 12%|█▏        | 112/938 [02:57<21:51,  1.59s/it][A[A[A[A[A[A





 12%|█▏        | 113/938 [02:59<21:51,  1.59s/it][A[A[A[A[A[A





 12%|█▏        | 114/938 [03:00<21:47,  1.59s


>>>> time at the end of epoch: 4 for batch 200 is: 0 hrs: 05 min: 18.00 sec

>>>> generator loss: 58.9994 | generator PPL: 41987751053255231529811968.0000
>>>> discriminator loss: -11.4791 | discriminator PPL:  0.0000








 21%|██▏       | 201/938 [05:19<19:29,  1.59s/it][A[A[A[A[A[A





 22%|██▏       | 202/938 [05:20<19:27,  1.59s/it][A[A[A[A[A[A





 22%|██▏       | 203/938 [05:22<19:25,  1.59s/it][A[A[A[A[A[A





 22%|██▏       | 204/938 [05:23<19:25,  1.59s/it][A[A[A[A[A[A





 22%|██▏       | 205/938 [05:25<19:22,  1.59s/it][A[A[A[A[A[A





 22%|██▏       | 206/938 [05:27<19:22,  1.59s/it][A[A[A[A[A[A





 22%|██▏       | 207/938 [05:28<19:22,  1.59s/it][A[A[A[A[A[A





 22%|██▏       | 208/938 [05:30<19:23,  1.59s/it][A[A[A[A[A[A





 22%|██▏       | 209/938 [05:31<19:21,  1.59s/it][A[A[A[A[A[A





 22%|██▏       | 210/938 [05:33<19:25,  1.60s/it][A[A[A[A[A[A





 22%|██▏       | 211/938 [05:35<19:22,  1.60s/it][A[A[A[A[A[A





 23%|██▎       | 212/938 [05:36<19:21,  1.60s/it][A[A[A[A[A[A





 23%|██▎       | 213/938 [05:38<19:18,  1.60s/it][A[A[A[A[A[A





 23%|██▎       | 214/938 [05:39<19:16,  1.60s


>>>> time at the end of epoch: 4 for batch 300 is: 0 hrs: 07 min: 57.00 sec

>>>> generator loss: 61.0232 | generator PPL: 317715059635459873115734016.0000
>>>> discriminator loss: -9.5769 | discriminator PPL:  0.0001








 32%|███▏      | 301/938 [07:58<16:52,  1.59s/it][A[A[A[A[A[A





 32%|███▏      | 302/938 [07:59<16:49,  1.59s/it][A[A[A[A[A[A





 32%|███▏      | 303/938 [08:01<16:48,  1.59s/it][A[A[A[A[A[A





 32%|███▏      | 304/938 [08:02<16:48,  1.59s/it][A[A[A[A[A[A





 33%|███▎      | 305/938 [08:04<16:44,  1.59s/it][A[A[A[A[A[A





 33%|███▎      | 306/938 [08:05<16:40,  1.58s/it][A[A[A[A[A[A





 33%|███▎      | 307/938 [08:07<16:40,  1.59s/it][A[A[A[A[A[A





 33%|███▎      | 308/938 [08:09<16:39,  1.59s/it][A[A[A[A[A[A





 33%|███▎      | 309/938 [08:10<16:41,  1.59s/it][A[A[A[A[A[A





 33%|███▎      | 310/938 [08:12<16:38,  1.59s/it][A[A[A[A[A[A





 33%|███▎      | 311/938 [08:13<16:34,  1.59s/it][A[A[A[A[A[A





 33%|███▎      | 312/938 [08:15<16:34,  1.59s/it][A[A[A[A[A[A





 33%|███▎      | 313/938 [08:17<16:32,  1.59s/it][A[A[A[A[A[A





 33%|███▎      | 314/938 [08:18<16:30,  1.59s


>>>> time at the end of epoch: 4 for batch 400 is: 0 hrs: 10 min: 36.00 sec

>>>> generator loss: 57.4615 | generator PPL: 9020002259281367315513344.0000
>>>> discriminator loss: -10.1635 | discriminator PPL:  0.0000








 43%|████▎     | 401/938 [10:36<14:12,  1.59s/it][A[A[A[A[A[A





 43%|████▎     | 402/938 [10:38<14:09,  1.59s/it][A[A[A[A[A[A





 43%|████▎     | 403/938 [10:40<14:07,  1.59s/it][A[A[A[A[A[A





 43%|████▎     | 404/938 [10:41<14:08,  1.59s/it][A[A[A[A[A[A





 43%|████▎     | 405/938 [10:43<14:07,  1.59s/it][A[A[A[A[A[A





 43%|████▎     | 406/938 [10:44<14:08,  1.59s/it][A[A[A[A[A[A





 43%|████▎     | 407/938 [10:46<14:06,  1.59s/it][A[A[A[A[A[A





 43%|████▎     | 408/938 [10:48<14:05,  1.60s/it][A[A[A[A[A[A





 44%|████▎     | 409/938 [10:49<14:04,  1.60s/it][A[A[A[A[A[A





 44%|████▎     | 410/938 [10:51<14:01,  1.59s/it][A[A[A[A[A[A





 44%|████▍     | 411/938 [10:52<13:58,  1.59s/it][A[A[A[A[A[A





 44%|████▍     | 412/938 [10:54<13:55,  1.59s/it][A[A[A[A[A[A





 44%|████▍     | 413/938 [10:56<13:54,  1.59s/it][A[A[A[A[A[A





 44%|████▍     | 414/938 [10:57<13:52,  1.59s


>>>> time at the end of epoch: 4 for batch 500 is: 0 hrs: 13 min: 15.00 sec

>>>> generator loss: 56.6702 | generator PPL: 4088620827131947327160320.0000
>>>> discriminator loss: -8.1785 | discriminator PPL:  0.0003








 53%|█████▎    | 501/938 [13:15<11:38,  1.60s/it][A[A[A[A[A[A





 54%|█████▎    | 502/938 [13:17<11:35,  1.60s/it][A[A[A[A[A[A





 54%|█████▎    | 503/938 [13:18<11:32,  1.59s/it][A[A[A[A[A[A





 54%|█████▎    | 504/938 [13:20<11:30,  1.59s/it][A[A[A[A[A[A





 54%|█████▍    | 505/938 [13:22<11:29,  1.59s/it][A[A[A[A[A[A





 54%|█████▍    | 506/938 [13:23<11:28,  1.59s/it][A[A[A[A[A[A





 54%|█████▍    | 507/938 [13:25<11:26,  1.59s/it][A[A[A[A[A[A





 54%|█████▍    | 508/938 [13:26<11:25,  1.59s/it][A[A[A[A[A[A





 54%|█████▍    | 509/938 [13:28<11:22,  1.59s/it][A[A[A[A[A[A





 54%|█████▍    | 510/938 [13:30<11:19,  1.59s/it][A[A[A[A[A[A





 54%|█████▍    | 511/938 [13:31<11:18,  1.59s/it][A[A[A[A[A[A





 55%|█████▍    | 512/938 [13:33<11:17,  1.59s/it][A[A[A[A[A[A





 55%|█████▍    | 513/938 [13:34<11:16,  1.59s/it][A[A[A[A[A[A





 55%|█████▍    | 514/938 [13:36<11:13,  1.59s


>>>> time at the end of epoch: 4 for batch 600 is: 0 hrs: 15 min: 54.00 sec

>>>> generator loss: 58.0492 | generator PPL: 16234471392728435728056320.0000
>>>> discriminator loss: -11.8862 | discriminator PPL:  0.0000








 64%|██████▍   | 601/938 [15:54<08:56,  1.59s/it][A[A[A[A[A[A





 64%|██████▍   | 602/938 [15:56<08:54,  1.59s/it][A[A[A[A[A[A





 64%|██████▍   | 603/938 [15:57<08:52,  1.59s/it][A[A[A[A[A[A





 64%|██████▍   | 604/938 [15:59<08:50,  1.59s/it][A[A[A[A[A[A





 64%|██████▍   | 605/938 [16:01<08:48,  1.59s/it][A[A[A[A[A[A





 65%|██████▍   | 606/938 [16:02<08:46,  1.59s/it][A[A[A[A[A[A





 65%|██████▍   | 607/938 [16:04<08:45,  1.59s/it][A[A[A[A[A[A





 65%|██████▍   | 608/938 [16:05<08:44,  1.59s/it][A[A[A[A[A[A





 65%|██████▍   | 609/938 [16:07<08:43,  1.59s/it][A[A[A[A[A[A





 65%|██████▌   | 610/938 [16:08<08:40,  1.59s/it][A[A[A[A[A[A





 65%|██████▌   | 611/938 [16:10<08:38,  1.58s/it][A[A[A[A[A[A





 65%|██████▌   | 612/938 [16:12<08:36,  1.58s/it][A[A[A[A[A[A





 65%|██████▌   | 613/938 [16:13<08:34,  1.58s/it][A[A[A[A[A[A





 65%|██████▌   | 614/938 [16:15<08:33,  1.58s


>>>> time at the end of epoch: 4 for batch 700 is: 0 hrs: 18 min: 33.00 sec

>>>> generator loss: 64.4484 | generator PPL: 9763273874404171957869215744.0000
>>>> discriminator loss: -9.4803 | discriminator PPL:  0.0001








 75%|███████▍  | 701/938 [18:33<06:15,  1.58s/it][A[A[A[A[A[A





 75%|███████▍  | 702/938 [18:35<06:14,  1.59s/it][A[A[A[A[A[A





 75%|███████▍  | 703/938 [18:36<06:12,  1.59s/it][A[A[A[A[A[A





 75%|███████▌  | 704/938 [18:38<06:10,  1.58s/it][A[A[A[A[A[A





 75%|███████▌  | 705/938 [18:39<06:09,  1.59s/it][A[A[A[A[A[A





 75%|███████▌  | 706/938 [18:41<06:08,  1.59s/it][A[A[A[A[A[A





 75%|███████▌  | 707/938 [18:43<06:07,  1.59s/it][A[A[A[A[A[A





 75%|███████▌  | 708/938 [18:44<06:06,  1.59s/it][A[A[A[A[A[A





 76%|███████▌  | 709/938 [18:46<06:04,  1.59s/it][A[A[A[A[A[A





 76%|███████▌  | 710/938 [18:47<06:01,  1.59s/it][A[A[A[A[A[A





 76%|███████▌  | 711/938 [18:49<05:59,  1.59s/it][A[A[A[A[A[A





 76%|███████▌  | 712/938 [18:50<05:58,  1.58s/it][A[A[A[A[A[A





 76%|███████▌  | 713/938 [18:52<05:56,  1.59s/it][A[A[A[A[A[A





 76%|███████▌  | 714/938 [18:54<05:55,  1.59s


>>>> time at the end of epoch: 4 for batch 800 is: 0 hrs: 21 min: 12.00 sec

>>>> generator loss: 65.2486 | generator PPL: 21732279298801794399569182720.0000
>>>> discriminator loss: -9.6145 | discriminator PPL:  0.0001








 85%|████████▌ | 801/938 [21:12<03:37,  1.59s/it][A[A[A[A[A[A





 86%|████████▌ | 802/938 [21:13<03:35,  1.59s/it][A[A[A[A[A[A





 86%|████████▌ | 803/938 [21:15<03:34,  1.59s/it][A[A[A[A[A[A





 86%|████████▌ | 804/938 [21:17<03:32,  1.59s/it][A[A[A[A[A[A





 86%|████████▌ | 805/938 [21:18<03:30,  1.58s/it][A[A[A[A[A[A





 86%|████████▌ | 806/938 [21:20<03:29,  1.59s/it][A[A[A[A[A[A





 86%|████████▌ | 807/938 [21:21<03:27,  1.59s/it][A[A[A[A[A[A





 86%|████████▌ | 808/938 [21:23<03:26,  1.59s/it][A[A[A[A[A[A





 86%|████████▌ | 809/938 [21:24<03:24,  1.58s/it][A[A[A[A[A[A





 86%|████████▋ | 810/938 [21:26<03:22,  1.58s/it][A[A[A[A[A[A





 86%|████████▋ | 811/938 [21:28<03:21,  1.59s/it][A[A[A[A[A[A





 87%|████████▋ | 812/938 [21:29<03:19,  1.59s/it][A[A[A[A[A[A





 87%|████████▋ | 813/938 [21:31<03:18,  1.59s/it][A[A[A[A[A[A





 87%|████████▋ | 814/938 [21:32<03:17,  1.59s


>>>> time at the end of epoch: 4 for batch 900 is: 0 hrs: 23 min: 50.00 sec

>>>> generator loss: 57.5690 | generator PPL: 10043441810323134444732416.0000
>>>> discriminator loss: -8.1838 | discriminator PPL:  0.0003








 96%|█████████▌| 901/938 [23:51<00:58,  1.59s/it][A[A[A[A[A[A





 96%|█████████▌| 902/938 [23:52<00:57,  1.59s/it][A[A[A[A[A[A





 96%|█████████▋| 903/938 [23:54<00:55,  1.59s/it][A[A[A[A[A[A





 96%|█████████▋| 904/938 [23:55<00:53,  1.59s/it][A[A[A[A[A[A





 96%|█████████▋| 905/938 [23:57<00:52,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 906/938 [23:58<00:50,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 907/938 [24:00<00:49,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 908/938 [24:02<00:47,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 909/938 [24:03<00:46,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 910/938 [24:05<00:44,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 911/938 [24:06<00:42,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 912/938 [24:08<00:41,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 913/938 [24:10<00:39,  1.59s/it][A[A[A[A[A[A





 97%|█████████▋| 914/938 [24:11<00:38,  1.59s


>>>>training starts for epoch: 5	please wait while the model is training.......
>>>>training on progress: KEEP YOUR SCREEN ACTIVE.......

>>>> time at the end of epoch: 5 for batch 0 is: 0 hrs: 00 min: 01.00 sec

>>>> generator loss: 58.3313 | generator PPL: 21525730066325513130999808.0000
>>>> discriminator loss: -5.7285 | discriminator PPL:  0.0033








  0%|          | 1/938 [00:01<24:05,  1.54s/it][A[A[A[A[A[A





  0%|          | 2/938 [00:03<24:18,  1.56s/it][A[A[A[A[A[A





  0%|          | 3/938 [00:04<24:25,  1.57s/it][A[A[A[A[A[A





  0%|          | 4/938 [00:06<24:27,  1.57s/it][A[A[A[A[A[A





  1%|          | 5/938 [00:07<24:28,  1.57s/it][A[A[A[A[A[A





  1%|          | 6/938 [00:09<24:28,  1.58s/it][A[A[A[A[A[A





  1%|          | 7/938 [00:11<24:29,  1.58s/it][A[A[A[A[A[A





  1%|          | 8/938 [00:12<24:27,  1.58s/it][A[A[A[A[A[A





  1%|          | 9/938 [00:14<24:28,  1.58s/it][A[A[A[A[A[A





  1%|          | 10/938 [00:15<24:32,  1.59s/it][A[A[A[A[A[A





  1%|          | 11/938 [00:17<24:28,  1.58s/it][A[A[A[A[A[A





  1%|▏         | 12/938 [00:19<24:25,  1.58s/it][A[A[A[A[A[A





  1%|▏         | 13/938 [00:20<24:21,  1.58s/it][A[A[A[A[A[A







In [None]:
!tensorboard -logdir = logs