In [1]:
import torch
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
from datasets import load_dataset
from transformers import BertTokenizer, BertModel, BertForSequenceClassification
from tqdm import tqdm
import nltk

# Ensure nltk is installed and download necessary resources
import os
import subprocess
import sys

def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

try:
    import nltk
    nltk.download('punkt')
except ImportError:
    install('nltk')
    import nltk
    nltk.download('punkt')

print("NLTK Done, onto Generator")

# Generator and Discriminator Models
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.model = BertModel.from_pretrained('bert-base-uncased')
        self.linear = nn.Linear(self.model.config.hidden_size, self.model.config.vocab_size)
    
    def forward(self, input_ids, attention_mask):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        logits = self.linear(outputs.last_hidden_state)
        return logits
    
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=1)
    
    def forward(self, input_ids, attention_mask):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        return outputs.logits

print("LOADING DATASETS")
# Load dataset and split into training and validation sets
dataset = load_dataset('wmt14', 'fr-en', split='train')
val_dataset = load_dataset('wmt14', 'fr-en', split='validation')

small_dataset = dataset.shuffle(seed=42).select(range(25000))
small_val_dataset = val_dataset.shuffle(seed=42).select(range(1000))

# Tokenization
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

print("STARTED PREPROCESSING")
def preprocess(examples):
    inputs = tokenizer(examples['translation']['en'], return_tensors='pt', padding='max_length', truncation=True, max_length=128)
    targets = tokenizer(examples['translation']['fr'], return_tensors='pt', padding='max_length', truncation=True, max_length=128)
    return {
        'input_ids': inputs['input_ids'].squeeze(),
        'attention_mask': inputs['attention_mask'].squeeze(),
        'target_ids': targets['input_ids'].squeeze()
    }

# Use multiprocessing to speed up the preprocessing
print("train preprocess")
train_dataset = small_dataset.map(preprocess, remove_columns=['translation'], num_proc=6)
print("val preprocess")
val_dataset = small_val_dataset.map(preprocess, remove_columns=['translation'], num_proc=6)

# Define your dataset class
class CustomDataset(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        item = self.dataset[idx]
        return {key: torch.tensor(val) for key, val in item.items()}

# Data collation function for efficient batching
def collate_fn(batch):
    input_ids = torch.stack([item['input_ids'] for item in batch])
    attention_mask = torch.stack([item['attention_mask'] for item in batch])
    target_ids = torch.stack([item['target_ids'] for item in batch])
    return {'input_ids': input_ids, 'attention_mask': attention_mask, 'target_ids': target_ids}

# Create DataLoaders
train_custom_dataset = CustomDataset(train_dataset)
val_custom_dataset = CustomDataset(val_dataset)
train_dataloader = DataLoader(train_custom_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
validation_dataloader = DataLoader(val_custom_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

# Initialize models
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
generator = Generator().to(device)
discriminator = Discriminator().to(device)

# Optimizers
optimizer_G = Adam(generator.parameters(), lr=0.0002)
optimizer_D = Adam(discriminator.parameters(), lr=0.0004)

# Loss functions
adversarial_loss = nn.BCEWithLogitsLoss().to(device)
translation_loss = nn.CrossEntropyLoss(ignore_index=tokenizer.pad_token_id).to(device)

# Gradient clipping
max_grad_norm = 1.0

# Mixed precision training
scaler = torch.cuda.amp.GradScaler()

# Training loop
num_epochs = 25
for epoch in tqdm(range(1, num_epochs + 1)):
    generator.train()
    discriminator.train()
    
    for batch_idx, batch in enumerate(train_dataloader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        target_ids = batch['target_ids'].to(device)

        # Train Discriminator
        optimizer_D.zero_grad()
        with torch.cuda.amp.autocast():
            real_labels = torch.ones(input_ids.size(0), 1).to(device) * 0.9  # label smoothing
            real_output = discriminator(input_ids, attention_mask)
            d_loss_real = adversarial_loss(real_output, real_labels)
            
            fake_logits = generator(input_ids, attention_mask)
            fake_translations = torch.argmax(fake_logits, dim=-1)
            fake_labels = torch.zeros(input_ids.size(0), 1).to(device)
            fake_output = discriminator(fake_translations.detach(), attention_mask)
            d_loss_fake = adversarial_loss(fake_output, fake_labels)
            
            d_loss = (d_loss_real + d_loss_fake) / 2

        scaler.scale(d_loss).backward()
        torch.nn.utils.clip_grad_norm_(discriminator.parameters(), max_grad_norm)
        scaler.step(optimizer_D)
        scaler.update()

        # Train Generator
        optimizer_G.zero_grad()
        with torch.cuda.amp.autocast():
            translated_logits = generator(input_ids, attention_mask)
            g_loss_adv = adversarial_loss(discriminator(torch.argmax(translated_logits, dim=-1), attention_mask), real_labels)
            g_loss_trans = translation_loss(translated_logits.view(-1, translated_logits.size(-1)), target_ids.view(-1))
            g_loss = g_loss_adv + g_loss_trans

        scaler.scale(g_loss).backward()
        torch.nn.utils.clip_grad_norm_(generator.parameters(), max_grad_norm)
        scaler.step(optimizer_G)
        scaler.update()

        # Print batch losses
        if batch_idx % 10 == 0:  # Print every 10 batches
            print(f"Epoch [{epoch}/{num_epochs}], Batch [{batch_idx}/{len(train_dataloader)}], d_loss: {d_loss.item():.4f}, g_loss: {g_loss.item():.4f}")

    # Print epoch summary after each epoch
    print(f"Epoch [{epoch}/{num_epochs}], d_loss: {d_loss.item():.4f}, g_loss: {g_loss.item():.4f}")


# Save the trained models
torch.save(generator.state_dict(), 'generator_model.pth')
torch.save(discriminator.state_dict(), 'discriminator_model.pth')

# Translation and BLEU computation functions
def translate_texts(generator, dataloader):
    generator.eval()
    all_translations = []
    all_references = []

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            target_ids = batch['target_ids'].to(device)

            translations = generator(input_ids, attention_mask)
            translations = translations.argmax(dim=-1).cpu().numpy()

            for i in range(len(translations)):
                decoded_translation = tokenizer.decode(translations[i], skip_special_tokens=True)
                decoded_reference = tokenizer.decode(target_ids[i].cpu().numpy(), skip_special_tokens=True)

                all_translations.append(decoded_translation)
                all_references.append([decoded_reference])

    return all_translations, all_references

# Load generator model
generator.load_state_dict(torch.load('generator_model.pth'))

# After training
validation_translations, validation_references = translate_texts(generator, validation_dataloader)

# Compute BLEU score using nltk
def compute_bleu_nltk(references, translations):
    references = [[ref.split()] for ref in references]
    translations = [trans.split() for trans in translations]
    bleu_score = nltk.translate.bleu_score.corpus_bleu(references, translations)
    return bleu_score

validation_bleu = compute_bleu_nltk([ref[0] for ref in validation_references], validation_translations)
print(f"Validation BLEU score: {validation_bleu * 100:.2f}")

[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
NLTK Done, onto Generator
LOADING DATASETS


Downloading readme:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

Resolving data files:   0%|          | 0/30 [00:00<?, ?it/s]

Downloading data:   0%|          | 0/30 [00:00<?, ?files/s]

Downloading data:   0%|          | 0.00/475k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/536k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/40836715 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/3000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3003 [00:00<?, ? examples/s]

Loading dataset shards:   0%|          | 0/30 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/30 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]



config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

STARTED PREPROCESSING
train preprocess


Map (num_proc=6):   0%|          | 0/25000 [00:00<?, ? examples/s]

val preprocess


Map (num_proc=6):   0%|          | 0/1000 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [1/25], Batch [0/782], d_loss: 0.6378, g_loss: 11.4587
Epoch [1/25], Batch [10/782], d_loss: 0.1681, g_loss: 14.8389
Epoch [1/25], Batch [20/782], d_loss: 0.1636, g_loss: 16.0466
Epoch [1/25], Batch [30/782], d_loss: 0.1636, g_loss: 16.5142
Epoch [1/25], Batch [40/782], d_loss: 0.1635, g_loss: 16.5945
Epoch [1/25], Batch [50/782], d_loss: 0.1643, g_loss: 16.2232
Epoch [1/25], Batch [60/782], d_loss: 0.1633, g_loss: 15.7516
Epoch [1/25], Batch [70/782], d_loss: 0.1631, g_loss: 15.4637
Epoch [1/25], Batch [80/782], d_loss: 0.1853, g_loss: 15.0204
Epoch [1/25], Batch [90/782], d_loss: 0.1636, g_loss: 15.0946
Epoch [1/25], Batch [100/782], d_loss: 0.1636, g_loss: 15.0316
Epoch [1/25], Batch [110/782], d_loss: 0.1634, g_loss: 15.1075
Epoch [1/25], Batch [120/782], d_loss: 0.1633, g_loss: 15.1045
Epoch [1/25], Batch [130/782], d_loss: 0.1632, g_loss: 15.1249
Epoch [1/25], Batch [140/782], d_loss: 0.1631, g_loss: 15.0377
Epoch [1/25], Batch [150/782], d_loss: 0.1647, g_loss: 14.9379
Epo

  4%|▍         | 1/25 [20:44<8:17:44, 1244.36s/it]

Epoch [1/25], d_loss: 0.1627, g_loss: 14.8610
Epoch [2/25], Batch [0/782], d_loss: 0.1627, g_loss: 15.0112
Epoch [2/25], Batch [10/782], d_loss: 0.1627, g_loss: 14.9009
Epoch [2/25], Batch [20/782], d_loss: 0.1629, g_loss: 15.3122
Epoch [2/25], Batch [30/782], d_loss: 0.1626, g_loss: 15.5847
Epoch [2/25], Batch [40/782], d_loss: 0.1631, g_loss: 15.4855
Epoch [2/25], Batch [50/782], d_loss: 0.1628, g_loss: 15.6025
Epoch [2/25], Batch [60/782], d_loss: 0.1626, g_loss: 15.6235
Epoch [2/25], Batch [70/782], d_loss: 0.1626, g_loss: 15.8036
Epoch [2/25], Batch [80/782], d_loss: 0.1627, g_loss: 15.1490
Epoch [2/25], Batch [90/782], d_loss: 0.1631, g_loss: 15.4706
Epoch [2/25], Batch [100/782], d_loss: 0.1631, g_loss: 15.5771
Epoch [2/25], Batch [110/782], d_loss: 0.1627, g_loss: 15.6598
Epoch [2/25], Batch [120/782], d_loss: 0.1626, g_loss: 15.7361
Epoch [2/25], Batch [130/782], d_loss: 0.1626, g_loss: 15.6473
Epoch [2/25], Batch [140/782], d_loss: 0.1626, g_loss: 15.6478
Epoch [2/25], Batch 

  8%|▊         | 2/25 [41:29<7:57:14, 1244.96s/it]

Epoch [2/25], d_loss: 0.1627, g_loss: 13.9037
Epoch [3/25], Batch [0/782], d_loss: 0.1627, g_loss: 14.0146
Epoch [3/25], Batch [10/782], d_loss: 0.1628, g_loss: 15.1750
Epoch [3/25], Batch [20/782], d_loss: 0.1648, g_loss: 15.5036
Epoch [3/25], Batch [30/782], d_loss: 0.1635, g_loss: 15.5260
Epoch [3/25], Batch [40/782], d_loss: 0.1627, g_loss: 14.5016
Epoch [3/25], Batch [50/782], d_loss: 0.1629, g_loss: 13.9199
Epoch [3/25], Batch [60/782], d_loss: 0.1627, g_loss: 14.5861
Epoch [3/25], Batch [70/782], d_loss: 0.1635, g_loss: 14.9555
Epoch [3/25], Batch [80/782], d_loss: 0.1627, g_loss: 15.0909
Epoch [3/25], Batch [90/782], d_loss: 0.2006, g_loss: 15.2407
Epoch [3/25], Batch [100/782], d_loss: 0.2957, g_loss: 15.3258
Epoch [3/25], Batch [110/782], d_loss: 0.1626, g_loss: 14.4806
Epoch [3/25], Batch [120/782], d_loss: 0.1626, g_loss: 14.8532
Epoch [3/25], Batch [130/782], d_loss: 0.1628, g_loss: 14.8555
Epoch [3/25], Batch [140/782], d_loss: 0.2178, g_loss: 15.1804
Epoch [3/25], Batch 

 12%|█▏        | 3/25 [1:02:16<7:36:45, 1245.70s/it]

Epoch [3/25], d_loss: 0.1629, g_loss: 15.7413
Epoch [4/25], Batch [0/782], d_loss: 0.1982, g_loss: 15.2566
Epoch [4/25], Batch [10/782], d_loss: 0.1628, g_loss: 15.2261
Epoch [4/25], Batch [20/782], d_loss: 0.1627, g_loss: 15.4058
Epoch [4/25], Batch [30/782], d_loss: 0.1627, g_loss: 15.6458
Epoch [4/25], Batch [40/782], d_loss: 0.1627, g_loss: 15.5390
Epoch [4/25], Batch [50/782], d_loss: 0.1647, g_loss: 15.5135
Epoch [4/25], Batch [60/782], d_loss: 0.1627, g_loss: 15.5497
Epoch [4/25], Batch [70/782], d_loss: 0.1627, g_loss: 15.4149
Epoch [4/25], Batch [80/782], d_loss: 0.1626, g_loss: 15.3302
Epoch [4/25], Batch [90/782], d_loss: 0.1626, g_loss: 15.6943
Epoch [4/25], Batch [100/782], d_loss: 0.1627, g_loss: 15.5837
Epoch [4/25], Batch [110/782], d_loss: 0.1626, g_loss: 15.8377
Epoch [4/25], Batch [120/782], d_loss: 0.1626, g_loss: 15.9289
Epoch [4/25], Batch [130/782], d_loss: 0.1627, g_loss: 15.8637
Epoch [4/25], Batch [140/782], d_loss: 0.1968, g_loss: 15.7200
Epoch [4/25], Batch 

 16%|█▌        | 4/25 [1:23:03<7:16:08, 1246.11s/it]

Epoch [4/25], d_loss: 0.1626, g_loss: 15.7885
Epoch [5/25], Batch [0/782], d_loss: 0.1626, g_loss: 15.9830
Epoch [5/25], Batch [10/782], d_loss: 0.1626, g_loss: 15.9922
Epoch [5/25], Batch [20/782], d_loss: 0.1984, g_loss: 16.3270
Epoch [5/25], Batch [30/782], d_loss: 0.1627, g_loss: 16.1526
Epoch [5/25], Batch [40/782], d_loss: 0.1630, g_loss: 15.9334
Epoch [5/25], Batch [50/782], d_loss: 0.1629, g_loss: 16.0572
Epoch [5/25], Batch [60/782], d_loss: 0.1627, g_loss: 16.1908
Epoch [5/25], Batch [70/782], d_loss: 0.1626, g_loss: 15.9110
Epoch [5/25], Batch [80/782], d_loss: 0.1794, g_loss: 15.8076
Epoch [5/25], Batch [90/782], d_loss: 0.1626, g_loss: 15.7141
Epoch [5/25], Batch [100/782], d_loss: 0.1626, g_loss: 16.2241
Epoch [5/25], Batch [110/782], d_loss: 0.1626, g_loss: 15.8799
Epoch [5/25], Batch [120/782], d_loss: 0.1626, g_loss: 15.7789
Epoch [5/25], Batch [130/782], d_loss: 0.1626, g_loss: 16.1139
Epoch [5/25], Batch [140/782], d_loss: 0.1626, g_loss: 16.2545
Epoch [5/25], Batch 

 20%|██        | 5/25 [1:43:49<6:55:26, 1246.33s/it]

Epoch [5/25], d_loss: 0.1710, g_loss: 9.3631
Epoch [6/25], Batch [0/782], d_loss: 0.1696, g_loss: 9.7354
Epoch [6/25], Batch [10/782], d_loss: 0.1970, g_loss: 10.4672
Epoch [6/25], Batch [20/782], d_loss: 0.2514, g_loss: 10.9974
Epoch [6/25], Batch [30/782], d_loss: 0.2413, g_loss: 10.4074
Epoch [6/25], Batch [40/782], d_loss: 0.2318, g_loss: 9.6020
Epoch [6/25], Batch [50/782], d_loss: 0.1774, g_loss: 9.0151
Epoch [6/25], Batch [60/782], d_loss: 0.2896, g_loss: 8.3007
Epoch [6/25], Batch [70/782], d_loss: 0.9064, g_loss: 8.3554
Epoch [6/25], Batch [80/782], d_loss: 0.5800, g_loss: 6.1905
Epoch [6/25], Batch [90/782], d_loss: 0.3209, g_loss: 7.5751
Epoch [6/25], Batch [100/782], d_loss: 0.2399, g_loss: 8.6393
Epoch [6/25], Batch [110/782], d_loss: 0.2581, g_loss: 10.3398
Epoch [6/25], Batch [120/782], d_loss: 0.3545, g_loss: 10.1163
Epoch [6/25], Batch [130/782], d_loss: 0.2636, g_loss: 9.7880
Epoch [6/25], Batch [140/782], d_loss: 0.3219, g_loss: 9.5327
Epoch [6/25], Batch [150/782], 

 24%|██▍       | 6/25 [2:04:36<6:34:42, 1246.45s/it]

Epoch [6/25], d_loss: 0.6687, g_loss: 6.6658
Epoch [7/25], Batch [0/782], d_loss: 0.6803, g_loss: 6.0243
Epoch [7/25], Batch [10/782], d_loss: 0.6957, g_loss: 6.2638
Epoch [7/25], Batch [20/782], d_loss: 0.6795, g_loss: 5.8216
Epoch [7/25], Batch [30/782], d_loss: 0.6987, g_loss: 5.7882
Epoch [7/25], Batch [40/782], d_loss: 0.6946, g_loss: 5.9667
Epoch [7/25], Batch [50/782], d_loss: 0.6860, g_loss: 5.9365
Epoch [7/25], Batch [60/782], d_loss: 0.7036, g_loss: 5.7223
Epoch [7/25], Batch [70/782], d_loss: 0.6858, g_loss: 5.9789
Epoch [7/25], Batch [80/782], d_loss: 0.6958, g_loss: 5.7837
Epoch [7/25], Batch [90/782], d_loss: 0.6926, g_loss: 5.8951
Epoch [7/25], Batch [100/782], d_loss: 0.6920, g_loss: 5.7265
Epoch [7/25], Batch [110/782], d_loss: 0.7051, g_loss: 5.8066
Epoch [7/25], Batch [120/782], d_loss: 0.7053, g_loss: 5.9165
Epoch [7/25], Batch [130/782], d_loss: 0.6795, g_loss: 5.8308
Epoch [7/25], Batch [140/782], d_loss: 0.6882, g_loss: 5.9216
Epoch [7/25], Batch [150/782], d_los

 28%|██▊       | 7/25 [2:25:23<6:13:59, 1246.64s/it]

Epoch [7/25], d_loss: 0.7009, g_loss: 5.8279
Epoch [8/25], Batch [0/782], d_loss: 0.6847, g_loss: 5.5342
Epoch [8/25], Batch [10/782], d_loss: 0.6775, g_loss: 5.6205
Epoch [8/25], Batch [20/782], d_loss: 0.6890, g_loss: 5.6806
Epoch [8/25], Batch [30/782], d_loss: 0.6921, g_loss: 5.5184
Epoch [8/25], Batch [40/782], d_loss: 0.6970, g_loss: 5.6647
Epoch [8/25], Batch [50/782], d_loss: 0.6912, g_loss: 5.5684
Epoch [8/25], Batch [60/782], d_loss: 0.6859, g_loss: 5.8948
Epoch [8/25], Batch [70/782], d_loss: 0.6932, g_loss: 5.9474
Epoch [8/25], Batch [80/782], d_loss: 0.6971, g_loss: 5.7428
Epoch [8/25], Batch [90/782], d_loss: 0.6823, g_loss: 5.6412
Epoch [8/25], Batch [100/782], d_loss: 0.6857, g_loss: 5.4543
Epoch [8/25], Batch [110/782], d_loss: 0.6929, g_loss: 5.6092
Epoch [8/25], Batch [120/782], d_loss: 0.6915, g_loss: 5.7355
Epoch [8/25], Batch [130/782], d_loss: 0.6867, g_loss: 5.4387
Epoch [8/25], Batch [140/782], d_loss: 0.6902, g_loss: 5.6789
Epoch [8/25], Batch [150/782], d_los

 32%|███▏      | 8/25 [2:46:10<5:53:14, 1246.75s/it]

Epoch [8/25], d_loss: 0.6952, g_loss: 5.2934
Epoch [9/25], Batch [0/782], d_loss: 0.7035, g_loss: 5.3695
Epoch [9/25], Batch [10/782], d_loss: 0.6895, g_loss: 5.3651
Epoch [9/25], Batch [20/782], d_loss: 0.6924, g_loss: 5.4011
Epoch [9/25], Batch [30/782], d_loss: 0.6879, g_loss: 5.3442
Epoch [9/25], Batch [40/782], d_loss: 0.6924, g_loss: 5.3504
Epoch [9/25], Batch [50/782], d_loss: 0.6915, g_loss: 5.5213
Epoch [9/25], Batch [60/782], d_loss: 0.6933, g_loss: 5.5363
Epoch [9/25], Batch [70/782], d_loss: 0.6942, g_loss: 5.8250
Epoch [9/25], Batch [80/782], d_loss: 0.6918, g_loss: 5.7079
Epoch [9/25], Batch [90/782], d_loss: 0.6882, g_loss: 5.4824
Epoch [9/25], Batch [100/782], d_loss: 0.6942, g_loss: 5.7410
Epoch [9/25], Batch [110/782], d_loss: 0.6862, g_loss: 5.2892
Epoch [9/25], Batch [120/782], d_loss: 0.6873, g_loss: 5.1260
Epoch [9/25], Batch [130/782], d_loss: 0.6861, g_loss: 5.3891
Epoch [9/25], Batch [140/782], d_loss: 0.6958, g_loss: 5.3741
Epoch [9/25], Batch [150/782], d_los

 36%|███▌      | 9/25 [3:06:57<5:32:29, 1246.85s/it]

Epoch [9/25], d_loss: 0.6882, g_loss: 5.0225
Epoch [10/25], Batch [0/782], d_loss: 0.6820, g_loss: 5.5020
Epoch [10/25], Batch [10/782], d_loss: 0.6887, g_loss: 5.5646
Epoch [10/25], Batch [20/782], d_loss: 0.6925, g_loss: 5.2000
Epoch [10/25], Batch [30/782], d_loss: 0.6832, g_loss: 5.2611
Epoch [10/25], Batch [40/782], d_loss: 0.6896, g_loss: 5.4253
Epoch [10/25], Batch [50/782], d_loss: 0.6942, g_loss: 5.4314
Epoch [10/25], Batch [60/782], d_loss: 0.6841, g_loss: 5.4033
Epoch [10/25], Batch [70/782], d_loss: 0.6842, g_loss: 5.2951
Epoch [10/25], Batch [80/782], d_loss: 0.6915, g_loss: 5.5411
Epoch [10/25], Batch [90/782], d_loss: 0.6835, g_loss: 5.3591
Epoch [10/25], Batch [100/782], d_loss: 0.6883, g_loss: 5.5947
Epoch [10/25], Batch [110/782], d_loss: 0.6828, g_loss: 5.6787
Epoch [10/25], Batch [120/782], d_loss: 0.6884, g_loss: 5.5268
Epoch [10/25], Batch [130/782], d_loss: 0.6882, g_loss: 5.3084
Epoch [10/25], Batch [140/782], d_loss: 0.6832, g_loss: 5.3430
Epoch [10/25], Batch 

 40%|████      | 10/25 [3:27:44<5:11:43, 1246.89s/it]

Epoch [10/25], d_loss: 0.6929, g_loss: 5.7630
Epoch [11/25], Batch [0/782], d_loss: 0.6921, g_loss: 5.2676
Epoch [11/25], Batch [10/782], d_loss: 0.6807, g_loss: 5.3772
Epoch [11/25], Batch [20/782], d_loss: 0.6840, g_loss: 5.3236
Epoch [11/25], Batch [30/782], d_loss: 0.6904, g_loss: 5.2385
Epoch [11/25], Batch [40/782], d_loss: 0.6820, g_loss: 5.3529
Epoch [11/25], Batch [50/782], d_loss: 0.6891, g_loss: 5.1890
Epoch [11/25], Batch [60/782], d_loss: 0.6908, g_loss: 5.3920
Epoch [11/25], Batch [70/782], d_loss: 0.6884, g_loss: 5.1865
Epoch [11/25], Batch [80/782], d_loss: 0.6892, g_loss: 5.2960
Epoch [11/25], Batch [90/782], d_loss: 0.6896, g_loss: 5.4612
Epoch [11/25], Batch [100/782], d_loss: 0.6862, g_loss: 5.2794
Epoch [11/25], Batch [110/782], d_loss: 0.6917, g_loss: 5.5332
Epoch [11/25], Batch [120/782], d_loss: 0.6869, g_loss: 5.4655
Epoch [11/25], Batch [130/782], d_loss: 0.6904, g_loss: 5.3611
Epoch [11/25], Batch [140/782], d_loss: 0.6901, g_loss: 5.1785
Epoch [11/25], Batch

 44%|████▍     | 11/25 [3:48:31<4:50:55, 1246.84s/it]

Epoch [11/25], d_loss: 0.6857, g_loss: 5.5073
Epoch [12/25], Batch [0/782], d_loss: 0.6889, g_loss: 5.6663
Epoch [12/25], Batch [10/782], d_loss: 0.6800, g_loss: 5.3357
Epoch [12/25], Batch [20/782], d_loss: 0.6906, g_loss: 5.5239
Epoch [12/25], Batch [30/782], d_loss: 0.6942, g_loss: 5.2949
Epoch [12/25], Batch [40/782], d_loss: 0.6899, g_loss: 5.5614
Epoch [12/25], Batch [50/782], d_loss: 0.6852, g_loss: 5.3119
Epoch [12/25], Batch [60/782], d_loss: 0.6907, g_loss: 5.5466
Epoch [12/25], Batch [70/782], d_loss: 0.6874, g_loss: 5.5098
Epoch [12/25], Batch [80/782], d_loss: 0.6855, g_loss: 5.4140
Epoch [12/25], Batch [90/782], d_loss: 0.6857, g_loss: 5.5225
Epoch [12/25], Batch [100/782], d_loss: 0.6831, g_loss: 5.4199
Epoch [12/25], Batch [110/782], d_loss: 0.6916, g_loss: 5.3685
Epoch [12/25], Batch [120/782], d_loss: 0.6891, g_loss: 5.2332
Epoch [12/25], Batch [130/782], d_loss: 0.6906, g_loss: 5.2994
Epoch [12/25], Batch [140/782], d_loss: 0.6852, g_loss: 5.1975
Epoch [12/25], Batch

 48%|████▊     | 12/25 [4:09:17<4:30:08, 1246.80s/it]

Epoch [12/25], d_loss: 0.6880, g_loss: 5.0741
Epoch [13/25], Batch [0/782], d_loss: 0.6862, g_loss: 5.0934
Epoch [13/25], Batch [10/782], d_loss: 0.6869, g_loss: 5.2109
Epoch [13/25], Batch [20/782], d_loss: 0.6902, g_loss: 5.6302
Epoch [13/25], Batch [30/782], d_loss: 0.6896, g_loss: 5.1296
Epoch [13/25], Batch [40/782], d_loss: 0.6855, g_loss: 5.5170
Epoch [13/25], Batch [50/782], d_loss: 0.6888, g_loss: 5.1924
Epoch [13/25], Batch [60/782], d_loss: 0.6861, g_loss: 4.9926
Epoch [13/25], Batch [70/782], d_loss: 0.6862, g_loss: 5.2848
Epoch [13/25], Batch [80/782], d_loss: 0.6908, g_loss: 5.6227
Epoch [13/25], Batch [90/782], d_loss: 0.6908, g_loss: 5.3714
Epoch [13/25], Batch [100/782], d_loss: 0.6913, g_loss: 5.2659
Epoch [13/25], Batch [110/782], d_loss: 0.6946, g_loss: 4.7219
Epoch [13/25], Batch [120/782], d_loss: 0.6885, g_loss: 5.2760
Epoch [13/25], Batch [130/782], d_loss: 0.6835, g_loss: 5.2059
Epoch [13/25], Batch [140/782], d_loss: 0.6871, g_loss: 5.4958
Epoch [13/25], Batch

 52%|█████▏    | 13/25 [4:30:04<4:09:20, 1246.67s/it]

Epoch [13/25], d_loss: 0.6811, g_loss: 5.0869
Epoch [14/25], Batch [0/782], d_loss: 0.6922, g_loss: 5.5863
Epoch [14/25], Batch [10/782], d_loss: 0.6885, g_loss: 5.3477
Epoch [14/25], Batch [20/782], d_loss: 0.6842, g_loss: 5.2985
Epoch [14/25], Batch [30/782], d_loss: 0.6927, g_loss: 5.4015
Epoch [14/25], Batch [40/782], d_loss: 0.6935, g_loss: 5.0331
Epoch [14/25], Batch [50/782], d_loss: 0.6791, g_loss: 5.3705
Epoch [14/25], Batch [60/782], d_loss: 0.6917, g_loss: 5.1606
Epoch [14/25], Batch [70/782], d_loss: 0.6867, g_loss: 5.2770
Epoch [14/25], Batch [80/782], d_loss: 0.6896, g_loss: 5.3614
Epoch [14/25], Batch [90/782], d_loss: 0.6925, g_loss: 5.3811
Epoch [14/25], Batch [100/782], d_loss: 0.6927, g_loss: 5.0713
Epoch [14/25], Batch [110/782], d_loss: 0.6935, g_loss: 4.7475
Epoch [14/25], Batch [120/782], d_loss: 0.6806, g_loss: 5.3343
Epoch [14/25], Batch [130/782], d_loss: 0.6837, g_loss: 4.9515
Epoch [14/25], Batch [140/782], d_loss: 0.6924, g_loss: 4.8719
Epoch [14/25], Batch

 56%|█████▌    | 14/25 [4:50:50<3:48:30, 1246.43s/it]

Epoch [14/25], d_loss: 0.6849, g_loss: 5.5024
Epoch [15/25], Batch [0/782], d_loss: 0.6904, g_loss: 4.9541
Epoch [15/25], Batch [10/782], d_loss: 0.6905, g_loss: 5.3822
Epoch [15/25], Batch [20/782], d_loss: 0.6846, g_loss: 5.1732
Epoch [15/25], Batch [30/782], d_loss: 0.6981, g_loss: 4.8482
Epoch [15/25], Batch [40/782], d_loss: 0.6849, g_loss: 5.1306
Epoch [15/25], Batch [50/782], d_loss: 0.6921, g_loss: 4.9534
Epoch [15/25], Batch [60/782], d_loss: 0.6843, g_loss: 5.2985
Epoch [15/25], Batch [70/782], d_loss: 0.6954, g_loss: 5.1020
Epoch [15/25], Batch [80/782], d_loss: 0.6979, g_loss: 5.1123
Epoch [15/25], Batch [90/782], d_loss: 0.6938, g_loss: 5.4877
Epoch [15/25], Batch [100/782], d_loss: 0.6926, g_loss: 5.3332
Epoch [15/25], Batch [110/782], d_loss: 0.6928, g_loss: 4.9927
Epoch [15/25], Batch [120/782], d_loss: 0.6890, g_loss: 5.0376
Epoch [15/25], Batch [130/782], d_loss: 0.6896, g_loss: 5.0162
Epoch [15/25], Batch [140/782], d_loss: 0.6887, g_loss: 5.6543
Epoch [15/25], Batch

 60%|██████    | 15/25 [5:11:36<3:27:42, 1246.24s/it]

Epoch [15/25], d_loss: 0.6728, g_loss: 5.3043
Epoch [16/25], Batch [0/782], d_loss: 0.6961, g_loss: 5.2047
Epoch [16/25], Batch [10/782], d_loss: 0.6791, g_loss: 5.5298
Epoch [16/25], Batch [20/782], d_loss: 0.6925, g_loss: 5.2692
Epoch [16/25], Batch [30/782], d_loss: 0.6907, g_loss: 5.0757
Epoch [16/25], Batch [40/782], d_loss: 0.6937, g_loss: 4.9830
Epoch [16/25], Batch [50/782], d_loss: 0.6890, g_loss: 5.2974
Epoch [16/25], Batch [60/782], d_loss: 0.6952, g_loss: 4.9837
Epoch [16/25], Batch [70/782], d_loss: 0.6895, g_loss: 5.3043
Epoch [16/25], Batch [80/782], d_loss: 0.6883, g_loss: 5.0972
Epoch [16/25], Batch [90/782], d_loss: 0.6867, g_loss: 5.2753
Epoch [16/25], Batch [100/782], d_loss: 0.6878, g_loss: 5.4173
Epoch [16/25], Batch [110/782], d_loss: 0.6866, g_loss: 4.9976
Epoch [16/25], Batch [120/782], d_loss: 0.6763, g_loss: 5.1967
Epoch [16/25], Batch [130/782], d_loss: 0.4569, g_loss: 5.4018
Epoch [16/25], Batch [140/782], d_loss: 0.3353, g_loss: 5.8280
Epoch [16/25], Batch

 64%|██████▍   | 16/25 [5:32:21<3:06:55, 1246.13s/it]

Epoch [16/25], d_loss: 0.6936, g_loss: 5.2013
Epoch [17/25], Batch [0/782], d_loss: 0.6893, g_loss: 5.5011
Epoch [17/25], Batch [10/782], d_loss: 0.6802, g_loss: 5.4411
Epoch [17/25], Batch [20/782], d_loss: 0.6805, g_loss: 5.3942
Epoch [17/25], Batch [30/782], d_loss: 0.7048, g_loss: 5.0422
Epoch [17/25], Batch [40/782], d_loss: 0.6800, g_loss: 5.4346
Epoch [17/25], Batch [50/782], d_loss: 0.6844, g_loss: 5.7000
Epoch [17/25], Batch [60/782], d_loss: 0.6968, g_loss: 5.4313
Epoch [17/25], Batch [70/782], d_loss: 0.6904, g_loss: 5.5357
Epoch [17/25], Batch [80/782], d_loss: 0.6940, g_loss: 5.2878
Epoch [17/25], Batch [90/782], d_loss: 0.6848, g_loss: 5.3791
Epoch [17/25], Batch [100/782], d_loss: 0.6935, g_loss: 5.2686
Epoch [17/25], Batch [110/782], d_loss: 0.6781, g_loss: 5.5002
Epoch [17/25], Batch [120/782], d_loss: 0.6740, g_loss: 5.3063
Epoch [17/25], Batch [130/782], d_loss: 0.6822, g_loss: 5.5424
Epoch [17/25], Batch [140/782], d_loss: 0.6831, g_loss: 5.7169
Epoch [17/25], Batch

 68%|██████▊   | 17/25 [5:53:07<2:46:08, 1246.08s/it]

Epoch [17/25], d_loss: 0.6649, g_loss: 5.7101
Epoch [18/25], Batch [0/782], d_loss: 0.7004, g_loss: 4.9608
Epoch [18/25], Batch [10/782], d_loss: 0.6919, g_loss: 5.3093
Epoch [18/25], Batch [20/782], d_loss: 0.7021, g_loss: 5.2107
Epoch [18/25], Batch [30/782], d_loss: 0.6817, g_loss: 4.9459
Epoch [18/25], Batch [40/782], d_loss: 0.7020, g_loss: 4.8379
Epoch [18/25], Batch [50/782], d_loss: 0.6947, g_loss: 5.3484
Epoch [18/25], Batch [60/782], d_loss: 0.6863, g_loss: 4.9865
Epoch [18/25], Batch [70/782], d_loss: 0.6874, g_loss: 5.1096
Epoch [18/25], Batch [80/782], d_loss: 0.6864, g_loss: 5.1473
Epoch [18/25], Batch [90/782], d_loss: 0.6880, g_loss: 4.8956
Epoch [18/25], Batch [100/782], d_loss: 0.6832, g_loss: 5.1290
Epoch [18/25], Batch [110/782], d_loss: 0.7051, g_loss: 5.1983
Epoch [18/25], Batch [120/782], d_loss: 0.6970, g_loss: 5.0897
Epoch [18/25], Batch [130/782], d_loss: 0.6766, g_loss: 5.1341
Epoch [18/25], Batch [140/782], d_loss: 0.6963, g_loss: 5.1970
Epoch [18/25], Batch

 72%|███████▏  | 18/25 [6:13:54<2:25:22, 1246.11s/it]

Epoch [18/25], d_loss: 0.6990, g_loss: 5.0041
Epoch [19/25], Batch [0/782], d_loss: 0.6976, g_loss: 4.9211
Epoch [19/25], Batch [10/782], d_loss: 0.6903, g_loss: 4.9935
Epoch [19/25], Batch [20/782], d_loss: 0.6933, g_loss: 4.9362
Epoch [19/25], Batch [30/782], d_loss: 0.6903, g_loss: 5.2091
Epoch [19/25], Batch [40/782], d_loss: 0.6841, g_loss: 4.7159
Epoch [19/25], Batch [50/782], d_loss: 0.6847, g_loss: 4.8666
Epoch [19/25], Batch [60/782], d_loss: 0.6961, g_loss: 4.8881
Epoch [19/25], Batch [70/782], d_loss: 0.6880, g_loss: 4.9389
Epoch [19/25], Batch [80/782], d_loss: 0.6853, g_loss: 4.8272
Epoch [19/25], Batch [90/782], d_loss: 0.6804, g_loss: 4.9784
Epoch [19/25], Batch [100/782], d_loss: 0.6817, g_loss: 4.9379
Epoch [19/25], Batch [110/782], d_loss: 0.6830, g_loss: 5.0263
Epoch [19/25], Batch [120/782], d_loss: 0.6937, g_loss: 4.8862
Epoch [19/25], Batch [130/782], d_loss: 0.6947, g_loss: 4.9115
Epoch [19/25], Batch [140/782], d_loss: 0.6924, g_loss: 4.7433
Epoch [19/25], Batch

 76%|███████▌  | 19/25 [6:34:40<2:04:37, 1246.19s/it]

Epoch [19/25], d_loss: 0.6815, g_loss: 4.9283
Epoch [20/25], Batch [0/782], d_loss: 0.6820, g_loss: 4.7298
Epoch [20/25], Batch [10/782], d_loss: 0.6864, g_loss: 4.9270
Epoch [20/25], Batch [20/782], d_loss: 0.6943, g_loss: 4.8901
Epoch [20/25], Batch [30/782], d_loss: 0.6918, g_loss: 4.7121
Epoch [20/25], Batch [40/782], d_loss: 0.6854, g_loss: 4.5959
Epoch [20/25], Batch [50/782], d_loss: 0.6915, g_loss: 4.7025
Epoch [20/25], Batch [60/782], d_loss: 0.6825, g_loss: 4.8874
Epoch [20/25], Batch [70/782], d_loss: 0.6895, g_loss: 4.9229
Epoch [20/25], Batch [80/782], d_loss: 0.6891, g_loss: 4.5100
Epoch [20/25], Batch [90/782], d_loss: 0.6925, g_loss: 4.8926
Epoch [20/25], Batch [100/782], d_loss: 0.6884, g_loss: 4.7986
Epoch [20/25], Batch [110/782], d_loss: 0.6864, g_loss: 4.6455
Epoch [20/25], Batch [120/782], d_loss: 0.6831, g_loss: 5.0458
Epoch [20/25], Batch [130/782], d_loss: 0.6992, g_loss: 4.8529
Epoch [20/25], Batch [140/782], d_loss: 0.6914, g_loss: 4.8332
Epoch [20/25], Batch

 80%|████████  | 20/25 [6:55:27<1:43:52, 1246.41s/it]

Epoch [20/25], d_loss: 0.6853, g_loss: 4.5292
Epoch [21/25], Batch [0/782], d_loss: 0.6912, g_loss: 4.9757
Epoch [21/25], Batch [10/782], d_loss: 0.6876, g_loss: 4.3684
Epoch [21/25], Batch [20/782], d_loss: 0.6885, g_loss: 4.8178
Epoch [21/25], Batch [30/782], d_loss: 0.6817, g_loss: 4.4905
Epoch [21/25], Batch [40/782], d_loss: 0.6871, g_loss: 4.5598
Epoch [21/25], Batch [50/782], d_loss: 0.6887, g_loss: 4.6743
Epoch [21/25], Batch [60/782], d_loss: 0.6859, g_loss: 4.6635
Epoch [21/25], Batch [70/782], d_loss: 0.6848, g_loss: 4.9874
Epoch [21/25], Batch [80/782], d_loss: 0.6949, g_loss: 4.7613
Epoch [21/25], Batch [90/782], d_loss: 0.6909, g_loss: 4.6253
Epoch [21/25], Batch [100/782], d_loss: 0.6916, g_loss: 4.5496
Epoch [21/25], Batch [110/782], d_loss: 0.6858, g_loss: 4.5825
Epoch [21/25], Batch [120/782], d_loss: 0.6937, g_loss: 4.6044
Epoch [21/25], Batch [130/782], d_loss: 0.6838, g_loss: 4.7275
Epoch [21/25], Batch [140/782], d_loss: 0.6899, g_loss: 4.7740
Epoch [21/25], Batch

 84%|████████▍ | 21/25 [7:16:14<1:23:06, 1246.57s/it]

Epoch [21/25], d_loss: 0.6857, g_loss: 4.3038
Epoch [22/25], Batch [0/782], d_loss: 0.6913, g_loss: 4.3907
Epoch [22/25], Batch [10/782], d_loss: 0.6858, g_loss: 5.0731
Epoch [22/25], Batch [20/782], d_loss: 0.6875, g_loss: 5.0841
Epoch [22/25], Batch [30/782], d_loss: 0.6889, g_loss: 4.3838
Epoch [22/25], Batch [40/782], d_loss: 0.6876, g_loss: 4.9071
Epoch [22/25], Batch [50/782], d_loss: 0.6887, g_loss: 4.7051
Epoch [22/25], Batch [60/782], d_loss: 0.7001, g_loss: 4.5908
Epoch [22/25], Batch [70/782], d_loss: 0.6834, g_loss: 4.5985
Epoch [22/25], Batch [80/782], d_loss: 0.6897, g_loss: 4.6066
Epoch [22/25], Batch [90/782], d_loss: 0.6925, g_loss: 4.5364
Epoch [22/25], Batch [100/782], d_loss: 0.6836, g_loss: 4.8202
Epoch [22/25], Batch [110/782], d_loss: 0.6795, g_loss: 4.7990
Epoch [22/25], Batch [120/782], d_loss: 0.6857, g_loss: 4.6124
Epoch [22/25], Batch [130/782], d_loss: 0.6869, g_loss: 4.5394
Epoch [22/25], Batch [140/782], d_loss: 0.6883, g_loss: 4.2766
Epoch [22/25], Batch

 88%|████████▊ | 22/25 [7:37:01<1:02:20, 1246.69s/it]

Epoch [22/25], d_loss: 0.6913, g_loss: 4.4545
Epoch [23/25], Batch [0/782], d_loss: 0.6880, g_loss: 4.7885
Epoch [23/25], Batch [10/782], d_loss: 0.6853, g_loss: 4.6183
Epoch [23/25], Batch [20/782], d_loss: 0.6893, g_loss: 4.6966
Epoch [23/25], Batch [30/782], d_loss: 0.6840, g_loss: 4.5124
Epoch [23/25], Batch [40/782], d_loss: 0.6936, g_loss: 4.4252
Epoch [23/25], Batch [50/782], d_loss: 0.6947, g_loss: 4.6193
Epoch [23/25], Batch [60/782], d_loss: 0.6877, g_loss: 4.5732
Epoch [23/25], Batch [70/782], d_loss: 0.6960, g_loss: 4.4276
Epoch [23/25], Batch [80/782], d_loss: 0.6879, g_loss: 4.6814
Epoch [23/25], Batch [90/782], d_loss: 0.6866, g_loss: 4.7852
Epoch [23/25], Batch [100/782], d_loss: 0.6918, g_loss: 4.5666
Epoch [23/25], Batch [110/782], d_loss: 0.6902, g_loss: 4.6700
Epoch [23/25], Batch [120/782], d_loss: 0.6844, g_loss: 4.6315
Epoch [23/25], Batch [130/782], d_loss: 0.6803, g_loss: 4.7105
Epoch [23/25], Batch [140/782], d_loss: 0.6872, g_loss: 4.5837
Epoch [23/25], Batch

 92%|█████████▏| 23/25 [7:57:48<41:33, 1246.81s/it]  

Epoch [23/25], d_loss: 0.6959, g_loss: 5.1624
Epoch [24/25], Batch [0/782], d_loss: 0.6897, g_loss: 4.3940
Epoch [24/25], Batch [10/782], d_loss: 0.6832, g_loss: 4.5055
Epoch [24/25], Batch [20/782], d_loss: 0.6866, g_loss: 4.6276
Epoch [24/25], Batch [30/782], d_loss: 0.6855, g_loss: 4.6353
Epoch [24/25], Batch [40/782], d_loss: 0.6883, g_loss: 4.6035
Epoch [24/25], Batch [50/782], d_loss: 0.6888, g_loss: 4.8289
Epoch [24/25], Batch [60/782], d_loss: 0.6897, g_loss: 4.4736
Epoch [24/25], Batch [70/782], d_loss: 0.6867, g_loss: 4.5488
Epoch [24/25], Batch [80/782], d_loss: 0.6896, g_loss: 4.5619
Epoch [24/25], Batch [90/782], d_loss: 0.6844, g_loss: 4.5944
Epoch [24/25], Batch [100/782], d_loss: 0.6841, g_loss: 4.3223
Epoch [24/25], Batch [110/782], d_loss: 0.6884, g_loss: 4.5028
Epoch [24/25], Batch [120/782], d_loss: 0.6940, g_loss: 4.7504
Epoch [24/25], Batch [130/782], d_loss: 0.6915, g_loss: 4.6766
Epoch [24/25], Batch [140/782], d_loss: 0.6878, g_loss: 4.5911
Epoch [24/25], Batch

 96%|█████████▌| 24/25 [8:18:35<20:46, 1246.86s/it]

Epoch [24/25], d_loss: 0.6838, g_loss: 4.4485
Epoch [25/25], Batch [0/782], d_loss: 0.6938, g_loss: 4.5828
Epoch [25/25], Batch [10/782], d_loss: 0.6918, g_loss: 4.4385
Epoch [25/25], Batch [20/782], d_loss: 0.6924, g_loss: 4.5706
Epoch [25/25], Batch [30/782], d_loss: 0.6942, g_loss: 4.2816
Epoch [25/25], Batch [40/782], d_loss: 0.6924, g_loss: 4.4249
Epoch [25/25], Batch [50/782], d_loss: 0.6827, g_loss: 4.8922
Epoch [25/25], Batch [60/782], d_loss: 0.6873, g_loss: 4.4385
Epoch [25/25], Batch [70/782], d_loss: 0.6866, g_loss: 4.5854
Epoch [25/25], Batch [80/782], d_loss: 0.6915, g_loss: 4.5478
Epoch [25/25], Batch [90/782], d_loss: 0.6800, g_loss: 4.6453
Epoch [25/25], Batch [100/782], d_loss: 0.6905, g_loss: 4.6734
Epoch [25/25], Batch [110/782], d_loss: 0.6864, g_loss: 4.2387
Epoch [25/25], Batch [120/782], d_loss: 0.6910, g_loss: 4.4242
Epoch [25/25], Batch [130/782], d_loss: 0.6865, g_loss: 4.6170
Epoch [25/25], Batch [140/782], d_loss: 0.6888, g_loss: 4.5703
Epoch [25/25], Batch

100%|██████████| 25/25 [8:39:22<00:00, 1246.48s/it]

Epoch [25/25], d_loss: 0.6920, g_loss: 4.5105





Validation BLEU score: 0.41
