In [1]:
from torch.utils.data import Dataset
import pandas as pd

from transformers import GPT2LMHeadModel, GPT2Tokenizer
from torch.optim import Adam
from torch.utils.data import DataLoader
# from tqdm import tqdm_notebook as tqdm
from tqdm.notebook import tqdm
import torch

import warnings
warnings.filterwarnings("ignore")

In [2]:
class ChatData(Dataset):
    def __init__(self, path:str, tokenizer):
        self.data = pd.read_csv('sample_data.csv', encoding='unicode_escape')

        self.X = []
#         for i in self.data:
#             for j in i['dialog']:
#                 self.X.append(j['text'])
                
        for idx, row in self.data.iterrows():
            self.X.append('<startofstring> ' + row['question'] + ' <bot>: ' + row['answer'] + ' <endofstring>')
#         for idx, i in enumerate(self.X):
#             try:
#                 self.X[idx] = "<startofstring> "+i+" <bot>: "+self.X[idx+1]+" <endofstring>"
#             except:
#                 break

#         self.X = self.X[:5000]
        
        print(self.X[0])

        self.X_encoded = tokenizer(self.X,max_length=40, truncation=True, padding="max_length", return_tensors="pt")
        self.input_ids = self.X_encoded['input_ids']
        self.attention_mask = self.X_encoded['attention_mask']

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return (self.input_ids[idx], self.attention_mask[idx])

In [3]:
def infer(inp):
    inp = "<startofstring> "+inp+" <bot>: "
    inp = tokenizer(inp, return_tensors="pt")
    X = inp["input_ids"].to(device)
    a = inp["attention_mask"].to(device)
    
#     model = GPT2LMHeadModel.from_pretrained('./model_state.pt', local_files_only=True)
#     model = GPT2LMHeadModel.from_pretrained("gpt2")
#     model.load_state_dict(torch.load('./model_state.pt'))
#     model.resize_token_embeddings(len(tokenizer))
    
    output = model.generate(X, attention_mask=a )
    output = tokenizer.decode(output[0])
    return output

In [4]:
def train(chatData, model, optim):

    epochs = 100

    for i in tqdm(range(epochs)):
        for X, a in chatData:
            X = X.to(device)
            a = a.to(device)
            optim.zero_grad()
            loss = model(X, attention_mask=a, labels=X).loss
            loss.backward()
            optim.step()
        torch.save(model.state_dict(), "model_state.pt")
        print(infer("Do I need a visa to visit Canada?"))

In [5]:
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.add_special_tokens({"pad_token": "<pad>", 
                                "bos_token": "<startofstring>",
                                "eos_token": "<endofstring>"})
tokenizer.add_tokens(["<bot>:"])

1

In [6]:
model = GPT2LMHeadModel.from_pretrained("gpt2")
model.resize_token_embeddings(len(tokenizer))

model = model.to(device)

You are resizing the embedding layer without providing a `pad_to_multiple_of` parameter. This means that the new embeding dimension will be 50261. This might induce some performance reduction as *Tensor Cores* will not be available. For more details  about this, or help on choosing the correct value for resizing, refer to this guide: https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html#requirements-tc


In [7]:
# print(tokenizer.decode(model.generate(**tokenizer("hey i was good at basketball but ",
#                          return_tensors="pt"))[0]))

In [8]:
chatData = ChatData("./chat_data.json", tokenizer)
chatData =  DataLoader(chatData, batch_size=64)

<startofstring> Do I need a visa to visit Canada?   <bot>: Most travelers need a visa or an Electronic Travel authorization (eTA) to fly to or transit through a Canadian airport.  <endofstring>


In [9]:
model.train()

optim = Adam(model.parameters(), lr=1e-3)

print("training .... ")
train(chatData, model, optim)

training .... 


  0%|          | 0/100 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: TheTheTheTheTheTheTheTheThe


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>:,,,,,,,,,


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>:,,,,,,,,,


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>:,,,,, and,,,


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>:  the <pad> <pad> <pad> <pad>  the <pad>, <pad>


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>:  Canada Canada Canada Canada Canada Canada Canada Canada Canada


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>:  visa visa visa visa visa visa visa visa visa


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>:  to a visa visa visa visa visa visa visa


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>:  to Canada Canada Canada Canada. Canada Canada Canada


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>:  to a visa visa visa visa visa visa visa


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>:. I I I? I Canada? Canada


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>:?? a?? <bot>:.??


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>:? <pad> <pad> <bot>:? <pad> <pad>? <pad>


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>:? Canada? <pad>? <pad> <pad> <pad>  Canada


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>:  Canada Canada? <bot>:  Canada. Canada Canada Canada


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>:  to Canada. <pad>  Canada. <bot>:. Canada


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>:  to Canada. <bot>: <pad> <pad> <pad> <pad> <pad>


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>:  I have been a visitor visa to Canada.


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>:? <pad> <pad>. I have been a visitor


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: I have been a visitor visa visa visa visa


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No. <bot>: No. <pad> <pad> <pad> <pad>


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No. If you need to Canada. You


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No. If you must be a visitor visa


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Yes. You need to visit Canada. You


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Yes. If you need to visit Canada?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Yes. You can get a visa to Canada


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: If I need a visa to Canada, you


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: If I need a visa to visit Canada?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: If I need a visa to visit to Canada


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: There are several ways to travel to Canada.


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No. Can I need to visit Canada?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No. Can I need a visa to visit


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Yes, you can apply for a visa to


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: You can apply for a visa to Canada if


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: If you need a visa to visit Canada,


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No. Can I apply for a visa to


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No. Can I need a visa to visit


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No. <bot>: No. <bot>: You must have


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No. Can I still travel outside Canada,


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: A sibling, as a Canadian citizen or permanent


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: There is a mandatory entry visa to enter Canada


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No. The visa allows you to travel to


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No. Can I use a visa to travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: If you are visiting Canada, you need a


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No. Canadians citizens, permanent residents, including


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Yes, you need a visa or an e


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: There are several ways to take a few foreign


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No. However, you need a visa to


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Yes, you need a visa or Electronic Travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No. Can I travel to Canada? <bot>:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: There are several ways to visit Canada. You


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Yes. To travel to Canada, you need


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No. You can apply for a visa or


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No. Canadians citizens, including dual citizens,


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No. Under the Canadian Experience Class, you


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No. As a visitor visa or an Electronic


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Yes, you need a visa or an Electronic


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No. using a visa to fly to Canada


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Yes, you need a visa to fly to


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa to visit Canada.


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: If you plan to travel to Canada, you


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: If you plan to travel to Canada, you


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No, using a visa or an electronic travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No. Canadians citizens, including dual citizens,


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an eTA


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No. As long as you travel to Canada


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Yes, you need a visa or an Electronic


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Yes. However, you need a visa or


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: A border services officer will verify that you are


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: There is no longer a visa or an Electronic


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Yes. If you plan to travel to Canada


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: The document you need to transit through Canada depends


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No, you need a visa or a temporary


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: No, using a visa or an eTA


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an eTA


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Yes. To travel to Canada using a visa


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an eTA


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: There is no visa, but a visa or


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: A visa or a temporary resident visa is a


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel


In [10]:
print("infer from model : ")
while True:
    inp = input()
    if inp == 'exit':
        break
    print(infer(inp))

infer from model : 
Do I need a visa to visit Canada?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Do I need a visa to visit Canada? <bot>: Most travelers need a visa or an Electronic Travel
What is a business visitor?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> What is a business visitor? <bot>: A business visitor is someone who comes to Canada for international business
Who has to take the citizenship test?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Who has to take the citizenship test? <bot>: If youre between 18 and 54 years
Can I sponsor a parent or grandparent?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> Can I sponsor a parent or grandparent? <bot>: Yes, as a Canadian citizen or permanent resident
What is the fee for an Access request?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> What is the fee for an Access request? <bot>: Each request filed under the Access to Information Act
What is the fee for a Privacy request?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<startofstring> What is the fee for a Privacy request? <bot>: There are no fees under the Privacy Act.
exit
