### 1. Imporinting All Library

In [1]:
import yaml
import torch
import nltk
from glob import glob
from transformers import GPT2Tokenizer, GPT2LMHeadModel

from chatbot_files/data import Dialogues
from chatbot_files/utils import set_seed

  from .autonotebook import tqdm as notebook_tqdm


ModuleNotFoundError: No module named 'data'

### 2. Downloading the NLTK Libraries

In [2]:
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')

NameError: name 'nltk' is not defined

### 3. Opening The Model Configuration
and Setting Seeds Values

In [4]:
### Make Sure the seed is imported
# from utils import set_seed

args = yaml.safe_load(open('config.yml'))
set_seed(args['seed']) 
args

{'dataset_dir': '../data',
 'train_frac': 0.85,
 'model_name': 'gpt2',
 'seed': 8459,
 'lr': 2e-05,
 'warmup_ratio': 0.1,
 'batch_size': 1,
 'num_epochs': 10,
 'max_len': 100,
 'max_history': 5,
 'models_dir': '../models',
 'stop_command': 'bye',
 'top_p': 0.9,
 'top_k': 50,
 'temperature': 0.9,
 'mode': 'train',
 'checkpoint': '../models/model_best_4.5401.h5',
 'model_dir': '../models'}

### 4. Opening Loading the Model With GPU

#### 4.1 Load Tokenizer

In [6]:
def load_tokenizer(args):
    tokenizer = GPT2Tokenizer.from_pretrained(args['model_name'])
    special_tokens = ['<speaker1>', '<speaker2>']
    tokenizer.add_special_tokens({
        'bos_token': '<bos>',
        'additional_special_tokens': special_tokens
    })

    # add new token ids to args
    special_tokens += ['<bos>', '<eos>']
    sp1_id, sp2_id, bos_id, eos_id = tokenizer.encode(special_tokens)
    args['sp1_id'] = sp1_id
    args['sp2_id'] = sp2_id
    args['bos_id'] = bos_id
    args['eos_id'] = eos_id

    return tokenizer

#### 4.2 Load Model 

In [7]:
def load_model(args, tokenizer, device):
    model = GPT2LMHeadModel.from_pretrained("./models/model_best_2.5922_point.h5").to(device)
    model.resize_token_embeddings(len(tokenizer))
    return model

#### 4.3 Loding Model and Tokenizeer

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
args['device'] = device
print("--"*50)
print(f'Using device: {device}')
print("--"*50)
tokenizer = load_tokenizer(args)
model = load_model(args, tokenizer, device)

----------------------------------------------------------------------------------------------------
Using device: cpu
----------------------------------------------------------------------------------------------------




### 5. Training the Model

#### 5.1 Loading the Dataset

In [9]:
def dataset_is_missing(args):
    if len(glob(f'{args["dataset_dir"]}/*.pickle')) == 0:
        return True
    return False

In [10]:
## Make sure the Dialogues class is imported
# from data import Dialogues

if dataset_is_missing(args):
    print("Dataset is missing")
    print('Creating dataset...')
    dialogues = Dialogues(tokenizer, args)
    train_dataset, valid_dataset = dialogues.load()
    print(f"len(train_dataset): {len(train_dataset)}")
    dataset_types = ['train', 'valid']
    datasets = [train_dataset, valid_dataset]

    for dataset_type, dataset in zip(dataset_types, datasets):
        dialogues.save(dataset_type, tokenizer, dataset)
        
    print('Dataset created')

#### 5.2 Loading the Trainer and Start Training the model

In [11]:
### Make sure the Trainer class is imported

from train import Trainer
trainer = Trainer(model, args)
trainer.train()

Loading the optimizer...
Loading train & valid data...


  0%|          | 0/27 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Loading checkpoint...


  checkpoint = torch.load(path, map_location=self.args['device'])


The training restarts with the specified checkpoint: model_best_4.5401.h5
Launch training...
--------------------------------------------------
Epoch: 4
--------------------------------------------------


  0%|          | 0/48 [00:00<?, ?it/s]

KeyboardInterrupt: 

### 6. Introduction to the Chatbot

In [14]:
### Make sure the Chatbot class is imported

from interact import Chatbot
args["checkpoint"] = "../models/model_best_2.5922_point.h5"
chatbot = Chatbot(model, tokenizer, args)
chatbot.run()

Loading checkpoint...
Found checkpoint file: model_best_2.5922_point.h5
Launching the chatbot...
If you want to stop, type the "bye" command
you:  what is library time?
Bot: We have to wait for the first book.
you:  when the library open?
Bot: The library is open on Thursday.
you:  what is the timing?
Bot: The first book opened at 1 p.M
you:  what is college timing?
Bot: Well, Let's say the first book is on Monday. What do you think it will look like next Wednesday?
you:  How many courses are there?
Bot: Well, Each one of those has a class at 2 p.M., So you can go for three courses in the afternoon and then come back on Tuesday to take an orientation course that day
you:  nice , thank you.
Bot: And what about the other classes?
you:  Thank You boss.
Bot: Well, I think you got a nice class.
you:  what is the fee structures.
Bot: I got a lot of money, You know.
you:  no I dont know
Bot: I'm sorry, But we can't agree to a contract on things. We've had our share of disagreements over the n

KeyboardInterrupt: Interrupted by user