In [None]:
!pip install transformers
import pandas as pd
import torch
import io
import torch.nn.functional as F
import random
import numpy as np
import time
import math
import datetime
import torch.nn as nn
from transformers import AutoModel, AutoTokenizer, AutoConfig
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
##Set random values
seed_val = 40
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
if torch.cuda.is_available():
  torch.cuda.manual_seed_all(seed_val)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

# Import the AutoModelWithLMHead class
from transformers import AutoModelWithLMHead


In [None]:
model = AutoModelWithLMHead.from_pretrained('gpt2')
!pip install sacremoses


In [None]:
import pandas as pd

path=""    #specify dataset path
data=pd.read_csv(path)
data.to_csv('data.csv', index=False)
data.head()



In [None]:


# Separate the dataset into two based on the "generated" column value
generated_data_new = data[data['generated'] == 1]
human_data_new = data[data['generated'] == 0]

human_data_new_train=human_data_new.sample(n=10000).reset_index(drop=True)

human_data_new_test = human_data_new.iloc[1000:1010, :].reset_index(drop=True)





# Example of how to save these datasets to separate files
generated_data_new.to_csv('generated_data.csv', index=False)
human_data_new.to_csv('human_data.csv', index=False)
# print(generated_data_new.shape)
print(human_data_new_train.shape)
# print(generated_data_new.head())
print(human_data_new_train.head())


In [None]:
data_new=data.sample(n=18000)
data_new.to_csv('data_new.csv',index=False)

In [None]:
# If there's a GPU available...
if torch.cuda.is_available():
    # Tell PyTorch to use the GPU.
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
else :
  device='cpu'

In [None]:
#--------------------------------
#  Transformer parameters
#--------------------------------
max_seq_length = 9160   #length of a sequential sentence
batch_size = 32     #Determines the number of samples hat will be passed through to the network at one time.

#--------------------------------
#  GAN-BERT specific parameters
#--------------------------------
# number of hidden layers in the generator,
# each of the size of the output space
num_hidden_layers_g = 1;
# number of hidden layers in the discriminator,
# each of the size of the input space
num_hidden_layers_d = 1;
# size of the generator's input noisy vectors
noise_size = 100
# dropout to be applied to discriminator's input vectors
out_dropout_rate = 0.2     #randomly selected neurons are ignored during training

# Replicate labeled data to balance poorly represented datasets,
# e.g., less than 1% of labeled material
apply_balance = True

#--------------------------------
#  Optimization parameters
#--------------------------------
learning_rate_discriminator = 5e-5
learning_rate_generator = 5e-5
epsilon = 1e-8      #shows the change in o/p when a single sample is executed/
num_train_epochs = 10
multi_gpu = True             #It is supposed to run in single gpu
# Scheduler to run the tasks at a specific time.
apply_scheduler = False
warmup_proportion = 0.1       #Its used to indicate set of training steps with very low learning rate.
# Print
print_each_n_step = 10

#--------------------------------
#  Adopted Tranformer model
#--------------------------------

model_name = "bert-base-cased"
#model_name = "bert-base-uncased"
#model_name = "roberta-base"
#model_name = "albert-base-v2"
#model_name = "xlm-roberta-base"
#model_name = "amazon/bort"

#--------------------------------
#  Retrieve the TREC QC Dataset
#--------------------------------
! git clone https://github.com/crux82/ganbert

In [None]:
data_for_ganbert = human_data_new_train.iloc[0:1000,:] # use 1% of the labeled data for training

data_unlabeled= human_data_new_train.drop(data_for_ganbert.index)
#df_unlabeled=df_unlabeled.sample(frac=0.20)

#df_unlabeled = df_unlabeled.drop(df_unlabeled.index)

print(data_for_ganbert)
data_for_ganbert.shape


In [None]:
for i in data_unlabeled.index :
  data_unlabeled['generated']="UNK"
label_list = ['UNK',0]

In [None]:
model_name = "bert-base-cased"
transformer = AutoModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:

def generate_data_loader(input_examples, label_masks, label_map, do_shuffle=False, balance_label_examples=False):
    examples = []

    if len(input_examples) != len(label_masks):
        raise ValueError("Lengths of input_examples and label_masks do not match.")

    num_labeled_examples = sum(label_masks)
    label_mask_rate = num_labeled_examples / len(input_examples)

    for index, ex in input_examples.iterrows():
        if index >= len(label_masks):
            raise IndexError(f"Index {index} is out of bounds for axis 0 with size {len(label_masks)}")
        if label_mask_rate == 1 or not balance_label_examples:
            examples.append((ex, label_masks[index]))
        else:
            if label_masks[index]:
                balance = int(1 / label_mask_rate)
                balance = int(math.log(balance, 2))
                if balance < 1:
                    balance = 1
                for b in range(0, int(balance)):
                    examples.append((ex, label_masks[index]))
            else:
                examples.append((ex, label_masks[index]))

    input_ids = []
    input_mask_array = []
    label_mask_array = []
    label_id_array = []

    for (text, label_mask) in examples:
        encoded_sent = tokenizer.encode(text[0], add_special_tokens=True, max_length=max_seq_length, padding="max_length", truncation=True)
        input_ids.append(encoded_sent)
        label_id_array.append(label_map[text[1]])
        label_mask_array.append(label_mask)

    for sent in input_ids:
        att_mask = [int(token_id > 0) for token_id in sent]
        input_mask_array.append(att_mask)

    input_ids = torch.tensor(input_ids)
    input_mask_array = torch.tensor(input_mask_array)
    label_id_array = torch.tensor(label_id_array, dtype=torch.long)
    label_mask_array = torch.tensor(label_mask_array)

    dataset = TensorDataset(input_ids, input_mask_array, label_id_array, label_mask_array)

    if do_shuffle:
        sampler = RandomSampler(dataset)
    else:
        sampler = SequentialSampler(dataset)

    return DataLoader(dataset, sampler=sampler, batch_size=32)







In [None]:
label_map = {}
for (i, label) in enumerate(label_list):
  label_map[label] = i
#------------------------------
#   Load the train dataset
#------------------------------
train_examples = data_for_ganbert.copy()
#The labeled (train) dataset is assigned with a mask set to True
train_label_masks = np.ones(len( data_for_ganbert), dtype=bool)
#If unlabel examples ar available

train_examples =pd.concat([train_examples,data_unlabeled],axis=0).reset_index(drop=True)
  #The unlabeled (train) dataset is assigned with a mask set to False
tmp_masks = np.zeros(len(data_unlabeled), dtype=bool)
train_label_masks = np.concatenate([train_label_masks,tmp_masks])

train_dataloader = generate_data_loader(train_examples, train_label_masks, label_map, do_shuffle = False, balance_label_examples = apply_balance)
#   Load the test dataset
#------------------------------
#The labeled (test) dataset is assigned with a mask set to True
test_label_masks = np.ones(len(human_data_new_test), dtype=bool)

test_dataloader = generate_data_loader(human_data_new_test, test_label_masks, label_map, do_shuffle = False, balance_label_examples = False)
#------------------------------


In [None]:
for batch in train_dataloader:
    input_ids, input_mask, labels, label_mask = batch
    print("Input IDs shape:", input_ids.shape)
    print("Input Mask shape:", input_mask.shape)
    print("Labels shape:", labels.shape)
    print("Label Mask shape:", label_mask.shape)
    break  # Print only the shape of the first batch


In [None]:

#------------------------------
class Discriminator(nn.Module):
    def __init__(self, input_size=768, hidden_sizes=[768], num_labels=2, dropout_rate=0.1):
        super(Discriminator, self).__init__()
        self.input_dropout = nn.Dropout(p=dropout_rate)
        layers = []
        hidden_sizes = [input_size] + hidden_sizes
        for i in range(len(hidden_sizes)-1):
            layers.extend([nn.Linear(hidden_sizes[i], hidden_sizes[i+1]), nn.LeakyReLU(0.2, inplace=True), nn.Dropout(dropout_rate)])

        self.layers = nn.Sequential(*layers) #per il flatten
        self.logit = nn.Linear(hidden_sizes[-1],num_labels)
        #nn linear is a module which is used to create a single layer feed-forward network
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, input_rep):
        input_rep = self.input_dropout(input_rep)
        last_rep = self.layers(input_rep)
        #Logits simply means that the function operates on the unscaled output of earlier layers and that the relative scale to understand the units is linear. It means, in particular, the sum of the inputs may not equal 1
        logits = self.logit(last_rep)
        probs = self.softmax(logits)
        return last_rep, logits, probs

In [None]:
model_name = "bert-base-cased"

In [None]:
# The config file is required to get the dimension of the vector produced by
# the underlying transformer
config = AutoConfig.from_pretrained(model_name)
hidden_size = int(config.hidden_size)
# Define the number and width of hidden layers
hidden_levels_g = [hidden_size for i in range(0, num_hidden_layers_g)]
hidden_levels_d = [hidden_size for i in range(0, num_hidden_layers_d)]

#-------------------------------------------------
#   Instantiate the Generator and Discriminator
#-------------------------------------------------
# generator = Generator(noise_size=noise_size, output_size=hidden_size, hidden_sizes=hidden_levels_g, dropout_rate=out_dropout_rate)
discriminator = Discriminator(input_size=hidden_size, hidden_sizes=hidden_levels_d, num_labels=len(label_list), dropout_rate=out_dropout_rate)

# Put everything in the GPU if available
if torch.cuda.is_available():
  # generator.cuda()
  discriminator.cuda()
  transformer.cuda()
  if multi_gpu:
    transformer = torch.nn.DataParallel(transformer)

# print(config)

In [None]:
!pip install transformers
import pandas as pd
import torch
import io
import torch.nn.functional as F
import random
import numpy as np
import time
import math
import datetime
import torch.nn as nn
from transformers import AutoModel, AutoTokenizer, AutoConfig
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
##Set random values
seed_val = 40
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
if torch.cuda.is_available():
  torch.cuda.manual_seed_all(seed_val)
model_name = "bert-base-cased"
transformer = AutoModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
input_ids=[]
max_seq_length=100
for text in data_new['text']:
  encoded_sent = tokenizer.encode(text, add_special_tokens=True, max_length=max_seq_length, padding="max_length", truncation=True)
  input_ids.append(encoded_sent)



In [None]:
input_ids = torch.tensor(input_ids)

In [None]:
dataloader1 = DataLoader(input_ids, batch_size=32, shuffle=False)

In [None]:
import torch.nn.functional as F
import torch.optim as optim

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Learning rate
lr = 1e-4
batch_size=32

# Move models to the GPU
transformer.to(device)
discriminator.to(device)

# Optimizer for the discriminator
optimizer_d = optim.Adam(discriminator.parameters(), lr=lr)

# Number of epochs
num_epochs = 8

for epoch_i in range(num_epochs):
    loss1 = 0
    i = 0  # Reset the index for each epoch

    # Set the discriminator to training mode
    discriminator.train()

    for batch in dataloader1:
        # If batch is a list of tensors, stack them into a single tensor
        if isinstance(batch, list):
            batch = torch.stack(batch).to(device)
        else:
            batch = batch.to(device)

        batch_size = batch.size(0)

        # Model outputs
        model_outputs = transformer(batch)
        hidden_states = model_outputs.last_hidden_state[:, 0, :]
        features, logits, probs = discriminator(hidden_states)

        # Calculate log probabilities
        log_probs = F.log_softmax(logits, dim=-1)

        # Convert the labels to a PyTorch tensor before one-hot encoding and move to GPU
        labels = torch.tensor(data_new.iloc[i:i+batch_size, -1].values, dtype=torch.long).to(device)
        label2one_hot = F.one_hot(labels, num_classes=2).float().to(device)

        # Calculate per example loss
        per_example_loss = -torch.sum(label2one_hot * log_probs, dim=-1)

        ans = torch.mean(per_example_loss)
        loss1 += ans.item()

        # Discriminator loss
        d_loss = ans

        # Zero the parameter gradients
        optimizer_d.zero_grad()

        # Backward pass
        d_loss.backward()

        # Optimization step
        optimizer_d.step()

        # Update the index
        i += batch_size

    # Calculate the average loss
    avg_loss = loss1 / len(dataloader1)
    print(f"Epoch {epoch_i + 1}: avg_loss: {avg_loss}")
    # Save checkpoint after each epoch
    torch.save({
        'epoch': epoch_i,
        'model_state_dict': discriminator.state_dict(),
        'optimizer_state_dict': optimizer_d.state_dict(),
    }, '.pth') # specify path where you want to store your supervised trained model



In [None]:
input_ids_test=[]
data_new_test=data.sample(n=1000)
for text in data_new_test['text']:
  encoded_sent = tokenizer.encode(text, add_special_tokens=True, max_length=50, padding="max_length", truncation=True)
  input_ids_test.append(encoded_sent)
input_ids_test = torch.tensor(input_ids_test)
test_dataloader= DataLoader(input_ids_test, batch_size=4, shuffle=False)

In [None]:
import torch
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Set the discriminator to evaluation mode
discriminator.eval()
transformer.eval()

# List to store the true labels and predicted labels
true_labels = []
predicted_labels = []
i=0
with torch.no_grad():
    for batch in test_dataloader:
        # If batch is a list of tensors, stack them into a single tensor
        if isinstance(batch, list):
            batch = torch.stack(batch).to(device)
        else:
            batch = batch.to(device)

        batch_size = batch.size(0)

        # Model outputs
        model_outputs = transformer(batch)
        hidden_states = model_outputs.last_hidden_state[:,0,:]
        features, logits, probs = discriminator(hidden_states)

        # Get the predicted labels
        _, preds = torch.max(logits, dim=1)

        # Convert the labels to a PyTorch tensor and move to GPU
        labels = torch.tensor(data_new_test.iloc[i:i+batch_size, -1].values, dtype=torch.long).to(device)

        # Append the true labels and predictions to the lists
        true_labels.extend(labels.cpu().numpy())
        predicted_labels.extend(preds.cpu().numpy())
        i+=batch_size

# Calculate metrics
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, average='weighted')
recall = recall_score(true_labels, predicted_labels, average='weighted')
f1 = f1_score(true_labels, predicted_labels, average='weighted')

print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test F1 Score: {f1:.4f}")

In [None]:

import torch
import torch.nn as nn
import torch.optim as optim
from transformers import GPT2LMHeadModel, GPT2Tokenizer, BertModel, BertTokenizer
from torch.utils.data import Dataset, DataLoader

# Load checkpoint for discriminator
checkpoint = torch.load('.pth') #specify the path where you have stored your supervised trained model.
start_epoch = checkpoint['epoch'] + 1  # Continue from the next epoch

# Load GPT-2 tokenizer and model
gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
gpt2_model = GPT2LMHeadModel.from_pretrained('gpt2',pad_token_id=tokenizer.eos_token_id).to(device)
gpt2_tokenizer.pad_token=gpt2_tokenizer.eos_token
# Add a padding token to GPT-2 tokenizer if it doesn't have one
# if gpt2_tokenizer.pad_token is None:
#     gpt2_tokenizer.add_special_tokens({'pad_token': '[PAD]'})
#     gpt2_model.resize_token_embeddings(len(gpt2_tokenizer))
# Set pad_token_id to eos_token_id for open-end generation
# gpt2_model.config.pad_token_id = gpt2_tokenizer.eos_token_id
# Load BERT tokenizer and model
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
bert_model = BertModel.from_pretrained('bert-base-cased').to(device)

# Define a custom dataset class for the essays
class EssayDataset(Dataset):
    def __init__(self, essays, tokenizer, max_length=15):
        self.essays = essays
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.essays)

    def __getitem__(self, idx):
        essay = self.essays[idx]
        first_five_words = ' '.join(essay.split()[:15])
        inputs = self.tokenizer(first_five_words, return_tensors='pt', max_length=self.max_length, truncation=True,padding='max_length')
        inputs = {key: val.squeeze(0) for key, val in inputs.items()}
        return inputs

# Example essays dataset
essays = human_data_new_train['text']

# Create dataset and dataloader
dataset = EssayDataset(essays, gpt2_tokenizer)
dataloader = DataLoader(dataset, batch_size=32, shuffle=False)

# Setting up the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
training_stats = []

# Measure the total training time for the whole run.
total_t0 = time.time()


# # Models parameters
# transformer_vars = [i for i in transformer.parameters()]
# d_vars = transformer_vars + [v for v in discriminator.parameters()]
# g_vars = [v for v in generator.parameters()]

# Optimizer
lr=1e-4
optimizer_g = optim.AdamW(gpt2_model.parameters(), lr=lr)
optimizer_d = optim.Adam(discriminator.parameters(), lr=lr)
epsilon = 1e-8
# Load state for the discriminator optimizer
optimizer_d.load_state_dict(checkpoint['optimizer_state_dict'])
discriminator.load_state_dict(checkpoint['model_state_dict'])
discriminator.to(device)

# Scheduler
if apply_scheduler:
    num_train_examples = len(train_examples)
    num_train_steps = int(num_train_examples / batch_size * num_train_epochs)
    num_warmup_steps = int(num_train_steps * warmup_proportion)

    scheduler_d = get_constant_schedule_with_warmup(optimizer_d, num_warmup_steps=num_warmup_steps)
    scheduler_g = get_constant_schedule_with_warmup(optimizer_g, num_warmup_steps=num_warmup_steps)

# Define the maximum sequence length for the transformer model
max_seq_length = 100

# Function to truncate the input sequences
def truncate_sequences(input_ids, input_mask, labels, label_mask, max_length):
    return (input_ids[:, :max_length],
            input_mask[:, :max_length],
            labels,  # Assuming labels don't need truncation
            label_mask)  # Assuming label_mask doesn't need truncation
# Compute and cache real data embeddings
real_data_embeddings = []

for batch in train_dataloader:
    b_input_ids = batch[0].to(device)
    b_input_mask = batch[1].to(device)
    b_labels = batch[2].to(device)
    b_label_mask = batch[3].to(device)

    # Truncate sequences
    b_input_ids, b_input_mask, b_labels, b_label_mask = truncate_sequences(b_input_ids, b_input_mask, b_labels, b_label_mask, max_seq_length)

    # Encode real data in the Transformer
    with torch.no_grad():
        model_outputs = transformer(b_input_ids, attention_mask=b_input_mask)
        hidden_states = model_outputs.last_hidden_state[:, 0, :]
        real_data_embeddings.append(hidden_states)

# For each epoch...
for epoch_i in range(start_epoch, 20):
    # ========================================
    #               Training
    # ========================================
    # Perform one full pass over the training set.
    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, num_train_epochs))
    print('Training...')

    # Measure how long the training epoch takes.
    t0 = time.time()

    # Reset the total loss for this epoch.
    tr_g_loss = 0
    tr_d_loss = 0

    # Put the model into training mode.
    transformer.train()
    # generator.train()
    discriminator.train()
    gpt2_model.train()

    # For each batch of training data...
    for (step, batch), gen_batch, real_embedding in zip(enumerate(train_dataloader), dataloader, real_data_embeddings):
        # Unpack this training batch from our dataloader.
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)
        b_label_mask = batch[3].to(device)

        # Truncate sequences
        b_input_ids, b_input_mask, b_labels, b_label_mask = truncate_sequences(b_input_ids, b_input_mask, b_labels, b_label_mask, max_seq_length)
        real_batch_size = b_input_ids.shape[0]



        # Generate fake data using GPT-2
        input_ids = gen_batch['input_ids'].to(device)
        attention_mask = gen_batch['attention_mask'].to(device)

        # Generate text
        seq_len = 100
        generated_ids = gpt2_model.generate(input_ids,attention_mask=attention_mask, max_length=seq_len,do_sample=True, num_beams=5, no_repeat_ngram_size=2, early_stopping=True)
        generated_text = [gpt2_tokenizer.decode(g, skip_special_tokens=True) for g in generated_ids]

        # Fake data features from GPT-2
        generated_inputs = bert_tokenizer(generated_text, return_tensors='pt', padding=True, truncation=True, max_length=seq_len).to(device)
        gen_input_ids = generated_inputs['input_ids'].to(device)
        gen_attention_mask = generated_inputs['attention_mask'].to(device)


        # generated_outputs = gpt2_model(**generated_inputs)
        # # gen_rep = generated_outputs[-1]
        # gen_rep = generated_outputs.hidden_states[-1]
        # Enable the return of hidden states for the GPT-2 model
        # with torch.no_grad():
        #     generated_outputs = gpt2_model(**generated_inputs, output_hidden_states=True)
        # gen_rep = generated_outputs.hidden_states[-1][:, 0, :]
        model_outputs_gen = transformer(gen_input_ids, attention_mask= gen_attention_mask)
        gen_rep = model_outputs_gen.last_hidden_state[:,0,:]

        # Generate the output of the Discriminator for real and fake data.
        # First, we put together the output of the transformer and the generator
        discriminator_input = torch.cat([real_embedding, gen_rep], dim=0)

        # Then, we select the output of the discriminator
        features, logits, probs = discriminator(discriminator_input)

        # Finally, we separate the discriminator's output for the real and fake data
        features_list = torch.split(features, real_batch_size)
        D_real_features = features_list[0]
        D_fake_features = features_list[1]

        logits_list = torch.split(logits, real_batch_size)
        D_real_logits = logits_list[0]
        D_fake_logits = logits_list[1]

        probs_list = torch.split(probs, real_batch_size)
        D_real_probs = probs_list[0]
        D_fake_probs = probs_list[1]

        #---------------------------------
        #  LOSS evaluation
        #---------------------------------
        # Generator's LOSS estimation
        g_loss_d = -1 * torch.mean(torch.log(1 - D_fake_probs[:,-1] + epsilon))
        g_feat_reg = torch.mean(torch.pow(torch.mean(D_real_features, dim=0) - torch.mean(D_fake_features, dim=0), 2))
        g_loss = g_loss_d + g_feat_reg

        # Discriminator's LOSS estimation
        D_L_unsupervised1U = -1 * torch.mean(torch.log(1 - D_real_probs[:, -1] + epsilon))
        D_L_unsupervised2U = -1 * torch.mean(torch.log(D_fake_probs[:, -1] + epsilon))
        d_loss = D_L_unsupervised1U + D_L_unsupervised2U

        #---------------------------------
        #  OPTIMIZATION
        #---------------------------------
        # Avoid gradient accumulation
        optimizer_g.zero_grad()
        optimizer_d.zero_grad()

        # Calculate weight updates
        g_loss.backward(retain_graph=True)
        d_loss.backward()

        # Apply modifications
        optimizer_g.step()
        optimizer_d.step()

        # Save the losses to print them later
        tr_g_loss += g_loss.item()
        tr_d_loss += d_loss.item()

        # Update the learning rate with the scheduler
        if apply_scheduler:
            scheduler_d.step()
            scheduler_g.step()

    # Calculate the average loss over all of the batches.
    avg_train_loss_g = tr_g_loss / len(train_dataloader)
    avg_train_loss_d = tr_d_loss / len(train_dataloader)

    print("")
    print("  Average training loss generator: {0:.3f}".format(avg_train_loss_g))
    print("  Average training loss discriminator: {0:.3f}".format(avg_train_loss_d))
    # Save checkpoint after each epoch
    torch.save({
        'epoch': epoch_i,
        'model_state_dict': discriminator.state_dict(),
        'optimizer_state_dict': optimizer_d.state_dict(),
    }, '.pth') # provide path where you want to store your final trained model.
