This file containing the training code for Joke Generation Model

In [None]:
!pip install transformers

In [2]:
# Connect with google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Preliminaries
import os
import pandas as pd
import numpy as np

#Pytorch
import torch
import torch.nn as nn
from torch.utils.data import DataLoader,Dataset

# Transformers
from transformers import GPT2LMHeadModel
from transformers import AdamW, WarmUp, get_linear_schedule_with_warmup

#Warnings
import warnings
warnings.filterwarnings('ignore')

# MyModule
import config

Downloading (…)olve/main/vocab.json: 0.00B [00:00, ?B/s]

Downloading (…)olve/main/merges.txt: 0.00B [00:00, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

In [4]:
# INITIALIZING MODEL AND ADDING THE PAD TOKEN
model = GPT2LMHeadModel.from_pretrained('gpt2-medium')
special_tokens_dict = {'pad_token': '<PAD>'}
num_added_toks = config.Tokenizer.add_special_tokens(special_tokens_dict)
print('We have added', num_added_toks, 'tokens')
model.resize_token_embeddings(len(config.Tokenizer))

Downloading model.safetensors:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

We have added 1 tokens


Embedding(50258, 1024)

In [5]:
# Dataset
class Jokesdataset(Dataset):
    '''
    This class builds the custom dataset for Dataloader
    '''
    def __init__(self,data,tokenizer):
      self.data = data
      self.tokenizer = tokenizer
      self.eos_tok = "<|endoftext|>"
      #Adding JOKE: at the start and EOS TOKEN at end
      self.data['Joke'] = self.data['Joke'].apply(lambda x: "JOKE:" + str(x) + self.eos_tok)

    def __len__(self):
      return len(self.data)

    def __getitem__(self,idx):
      joke = self.data.iloc[idx,1]

      inputs = self.tokenizer.encode_plus(
      joke,
      None,
      add_special_tokens = True,
      max_length = config.MAX_LEN,
      pad_to_max_length = True
      )

      ids = inputs["input_ids"]
      mask = inputs["attention_mask"]

      return {'ids':torch.tensor(ids,dtype=torch.long),
      'mask': torch.tensor(mask,dtype=torch.long),
      'target':torch.tensor(ids,dtype=torch.long)}

In [6]:
# Training Function
def train_fn(data_loader, model, optimizer, device, scheduler,epoch):
    model.train()
    for bi, d in enumerate(data_loader):
        ids = d["ids"]
        mask = d["mask"]
        labels = d['target']

        ids = ids.to(device, dtype=torch.long)
        mask = mask.to(device, dtype=torch.long)
        labels = labels.to(device,dtype=torch.long)

        optimizer.zero_grad()
        outputs = model(
            input_ids =ids,
            attention_mask=mask,
            labels = labels
        )

        loss, logits = outputs[:2]
        loss.backward()

        optimizer.step()
        if scheduler is not None:
                scheduler.step()

        if (bi+1) % 500 == 0:
            print('Epoch [{}/{}], bi[{}/{}], Loss: {:.4f}'
                   .format(epoch+1, config.EPOCHS, bi+1,len(data_loader), loss.item()))

In [None]:
device = 'cuda' # Selecting Device

#ENGINE
def run():
  jokes = pd.read_csv(config.TRAIN_PATH) # add the path to your Dataset in config File

  jokes_dataset = Jokesdataset(jokes,config.Tokenizer)
  jokes_dataloader = DataLoader(jokes_dataset,
                                batch_size=config.BATCH_SIZE,
                                shuffle=True,
                                num_workers=4)

  model.to(device)

  num_train_steps = int(len(jokes_dataloader) / config.BATCH_SIZE * config.EPOCHS)

  optimizer = AdamW(model.parameters(), lr=config.LEARNING_RATE)
  scheduler = get_linear_schedule_with_warmup(optimizer,num_warmup_steps=0,num_training_steps=num_train_steps)

  for epoch in range(config.EPOCHS):
        print(f"EPOCH {epoch+1} started" + '=' * 30)
        train_fn(jokes_dataloader, model, optimizer, device, scheduler,epoch=epoch)

        models_folder = config.MODEL_FOLDER # add the path to your folder where you want to save models in config File
        if not os.path.exists(models_folder):
          os.mkdir(models_folder)
        # Saving Model after each Epoch
        torch.save(model.state_dict(), os.path.join(models_folder, f"gpt2_joke_generator{epoch}.pt"))

# BEGINNING TRAINING
run()



Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-s

Epoch [1/4], bi[500/11028], Loss: 1.3115
Epoch [1/4], bi[1000/11028], Loss: 1.3133
Epoch [1/4], bi[1500/11028], Loss: 1.1803
Epoch [1/4], bi[2000/11028], Loss: 1.2985
Epoch [1/4], bi[2500/11028], Loss: 1.3370
Epoch [1/4], bi[3000/11028], Loss: 1.3901
Epoch [1/4], bi[3500/11028], Loss: 1.2788
Epoch [1/4], bi[4000/11028], Loss: 1.4969
Epoch [1/4], bi[4500/11028], Loss: 1.2581
Epoch [1/4], bi[5000/11028], Loss: 1.2267
Epoch [1/4], bi[5500/11028], Loss: 1.2352
Epoch [1/4], bi[6000/11028], Loss: 1.4388
Epoch [1/4], bi[6500/11028], Loss: 1.3624
Epoch [1/4], bi[7000/11028], Loss: 1.3016
Epoch [1/4], bi[7500/11028], Loss: 1.2025
Epoch [1/4], bi[8000/11028], Loss: 1.2496
Epoch [1/4], bi[8500/11028], Loss: 1.1837
Epoch [1/4], bi[9000/11028], Loss: 1.3641
Epoch [1/4], bi[9500/11028], Loss: 1.4110
Epoch [1/4], bi[10000/11028], Loss: 1.1900
Epoch [1/4], bi[10500/11028], Loss: 1.2290
Epoch [1/4], bi[11000/11028], Loss: 1.5028


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-s

Epoch [2/4], bi[500/11028], Loss: 1.3490
Epoch [2/4], bi[1000/11028], Loss: 1.1871
Epoch [2/4], bi[1500/11028], Loss: 1.2996
Epoch [2/4], bi[2000/11028], Loss: 1.1697
Epoch [2/4], bi[2500/11028], Loss: 1.1608
Epoch [2/4], bi[3000/11028], Loss: 1.6334
Epoch [2/4], bi[3500/11028], Loss: 1.3436
Epoch [2/4], bi[4000/11028], Loss: 1.3224
Epoch [2/4], bi[4500/11028], Loss: 1.1961
Epoch [2/4], bi[5000/11028], Loss: 1.3364
Epoch [2/4], bi[5500/11028], Loss: 1.3944
Epoch [2/4], bi[6000/11028], Loss: 1.5164
Epoch [2/4], bi[6500/11028], Loss: 1.3264
Epoch [2/4], bi[7000/11028], Loss: 1.4973
Epoch [2/4], bi[7500/11028], Loss: 1.0368
Epoch [2/4], bi[8000/11028], Loss: 1.1572
Epoch [2/4], bi[8500/11028], Loss: 1.0221
Epoch [2/4], bi[9000/11028], Loss: 1.2262
Epoch [2/4], bi[9500/11028], Loss: 1.3098
Epoch [2/4], bi[10000/11028], Loss: 1.1691
Epoch [2/4], bi[10500/11028], Loss: 1.4614
Epoch [2/4], bi[11000/11028], Loss: 1.1655


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-s

Epoch [3/4], bi[500/11028], Loss: 1.2651
Epoch [3/4], bi[1000/11028], Loss: 1.4803
Epoch [3/4], bi[1500/11028], Loss: 1.3038
Epoch [3/4], bi[2000/11028], Loss: 1.2959
Epoch [3/4], bi[2500/11028], Loss: 1.3487
Epoch [3/4], bi[3000/11028], Loss: 1.2644
Epoch [3/4], bi[3500/11028], Loss: 1.3261
Epoch [3/4], bi[4000/11028], Loss: 1.3847
Epoch [3/4], bi[4500/11028], Loss: 1.3465
Epoch [3/4], bi[5000/11028], Loss: 1.5593
Epoch [3/4], bi[5500/11028], Loss: 1.1077
Epoch [3/4], bi[6000/11028], Loss: 1.2610
Epoch [3/4], bi[6500/11028], Loss: 1.4563
Epoch [3/4], bi[7000/11028], Loss: 1.4127
Epoch [3/4], bi[7500/11028], Loss: 1.3893
Epoch [3/4], bi[8000/11028], Loss: 1.2837
Epoch [3/4], bi[8500/11028], Loss: 1.4001
Epoch [3/4], bi[9000/11028], Loss: 1.0100
Epoch [3/4], bi[9500/11028], Loss: 1.2434
Epoch [3/4], bi[10000/11028], Loss: 1.1506
Epoch [3/4], bi[10500/11028], Loss: 1.3493
Epoch [3/4], bi[11000/11028], Loss: 1.3891


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-s

Epoch [4/4], bi[500/11028], Loss: 1.4247
Epoch [4/4], bi[1000/11028], Loss: 1.2915
Epoch [4/4], bi[1500/11028], Loss: 1.1946
Epoch [4/4], bi[2000/11028], Loss: 1.1370
Epoch [4/4], bi[2500/11028], Loss: 1.3643
Epoch [4/4], bi[3000/11028], Loss: 1.3875
Epoch [4/4], bi[3500/11028], Loss: 1.3764
Epoch [4/4], bi[4000/11028], Loss: 1.4888
Epoch [4/4], bi[4500/11028], Loss: 1.0659
Epoch [4/4], bi[5000/11028], Loss: 1.3493
Epoch [4/4], bi[5500/11028], Loss: 1.1692
Epoch [4/4], bi[6000/11028], Loss: 1.8406
Epoch [4/4], bi[6500/11028], Loss: 1.4583
Epoch [4/4], bi[7000/11028], Loss: 1.2902
Epoch [4/4], bi[7500/11028], Loss: 1.3127
Epoch [4/4], bi[8000/11028], Loss: 1.3394
Epoch [4/4], bi[8500/11028], Loss: 1.4862
