In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


https://pytorch.org/tutorials/beginner/chatbot_tutorial.html?highlight=text%20generation

In [None]:
import argparse
import random
import os
import pprint
import torch
from torch import nn
from torch import optim
# from pathos.multiprocessing import ProcessingPool as Pool
from nltk.translate.bleu_score import sentence_bleu

from format_data import datafiles
from build_vocabulary import trimRareWords, loadPrepareData, MIN_COUNT
from train import run
from model import EncoderRNN, LuongAttnDecoderRNN
from serialization import save_seq2seq, load_encoder, load_decoder, load_voc, load_embedding, \
                          load_optim
from chat import GreedySearchDecoder, chat

In [None]:
def write_results(data_type, run_mode, encoder, encoder_name, decoder_name, dropout, clip, lr, losses):
    os.makedirs("txt_results", exist_ok=True)
    with open(f"txt_results{os.path.sep}"
              f"{data_type}_"
              f"{run_mode}_"
              f"{encoder_name}{'2' if encoder.bidirectional else '1'}{decoder_name}_"
              f"d{dropout}_gc{clip}_lr{lr}.txt", "w") as output_file:
        for loss in losses:
            output_file.write(f"{str(round(loss, 5))}\n")


def main():

    phase = {
        "train": {},
        "test": {}
    }

    if run_mode == 'train':
        # Load/Assemble voc and pairs
        phase["train"]["voc"], phase["train"]["pairs"] = loadPrepareData(datafiles["train"])
        # Trim voc and pairs
        phase["train"]["pairs"] = trimRareWords(phase["train"]["voc"], phase["train"]["pairs"], MIN_COUNT)

        # Shuffle both sets ONCE before the entire training
        random.seed(1)  # seed can be any number
        random.shuffle(phase["train"]["pairs"])

        print('Building training set encoder and decoder ...')
        # Initialize word embeddings for both encoder and decoder
        embedding = nn.Embedding(phase["train"]["voc"].num_words, HIDDEN_SIZE).to(device)

        # Initialize encoder & decoder models
        encoder = EncoderRNN(HIDDEN_SIZE, embedding, ENCODER_N_LAYERS, DROPOUT, gate=encoder_name,
                             bidirectional=BIDIRECTION)
        decoder = LuongAttnDecoderRNN(attn_model, embedding, HIDDEN_SIZE,
                                      phase["train"]["voc"].num_words, DECODER_N_LAYERS, DROPOUT, gate=decoder_name)

        # Use appropriate device
        encoder = encoder.to(device)
        decoder = decoder.to(device)
        encoder.train()
        decoder.train()
        print('Models built and ready to go!')

        # Initialize optimizers
        print('Building optimizers ...')
        if args.get('optimizer') == "ADAM":
            encoder_optimizer = optim.Adam(encoder.parameters(), lr=LR)
            decoder_optimizer = optim.Adam(decoder.parameters(), lr=LR)
        elif args.get('optimizer') == "SGD":
            encoder_optimizer = optim.SGD(encoder.parameters(), lr=LR)
            decoder_optimizer = optim.SGD(decoder.parameters(), lr=LR)
        else:
            raise ValueError("Wrong optimizer type has been given as an argument.")

        # If you have cuda, configure cuda to call
        for optimizer in [encoder_optimizer, decoder_optimizer]:
            for state in optimizer.state.values():
                for k, v in state.items():
                    if isinstance(v, torch.Tensor):
                        state[k] = v.cuda()

        print("Starting Training!")
        save_model = run(encoder, decoder, encoder_optimizer, decoder_optimizer, EPOCH_NUM, BATCH_SIZE, CLIP, phase)
        if save_model:
            try:
                save_seq2seq(encoder, decoder, encoder_name, decoder_name, encoder_optimizer, decoder_optimizer,
                             phase["train"]["losses"], phase["train"]["bleu"], phase["train"]["voc"],
                             embedding, DROPOUT, CLIP, LR)
                print("Model has been saved successfully.")
            except Exception as error:
                print("Saving the model has caused an exception:", error)

        # write_results("loss", "train", encoder, encoder_name, decoder_name, DROPOUT, CLIP, LR, phase["train"]["losses"])
        # write_results("bleu", "train", encoder, encoder_name, decoder_name, DROPOUT, CLIP, LR, phase["train"]["bleu"])

    else:
        # Loading basic objects needed for all 3 of validation, testing and chatting
        checkpoint = torch.load(args.get('model_path'))
        embedding = load_embedding(checkpoint, HIDDEN_SIZE)
        encoder = load_encoder(checkpoint, EncoderRNN, HIDDEN_SIZE, embedding,
                               ENCODER_N_LAYERS, DROPOUT, encoder_name, BIDIRECTION)
        voc = load_voc(checkpoint)
        decoder = load_decoder(checkpoint, LuongAttnDecoderRNN,
                               attn_model, embedding, HIDDEN_SIZE, voc.num_words, DECODER_N_LAYERS, DROPOUT, decoder_name)
        encoder = encoder.to(device)
        decoder = decoder.to(device)

        if run_mode == "test":
            pass
        elif run_mode == "chat":
            # Initialize search module
            searcher = GreedySearchDecoder(encoder, decoder)
            chat(searcher, voc)

        else:
            raise ValueError("Wrong run_mode has been given, options: ['train', 'val', 'test', 'chat']")

In [None]:
args = {
    "run_mode": "train",
    "model_path": None,
    "encoder": "GRU",
    "encoder_direction": 1,
    "decoder": "GRU",
    "optimizer": "ADAM",
    "epoch_num": 50,
    "early_stopping": False,
    "dropout": 0.1,
    "gradient_clipping": 1.0,
    "lr": 0.001
}

print(f"\n{'*' * 40}")
print(f"[RUN_MODE]: {args['run_mode']}")
print(f"[MODEL_PATH]: {args['model_path']}")
print(f"{'*' * 40}\n")

encoder_name = args.get('encoder')
decoder_name = args.get('decoder')
run_mode = args.get('run_mode')
EPOCH_NUM = int(args.get('epoch_num'))

# Get device object
USE_CUDA = torch.cuda.is_available()
device = torch.device("cuda" if USE_CUDA else "cpu")
print(device)

# Configure attention model
attn_model = 'dot'

# Base params
HIDDEN_SIZE = 300  # Number of dimensions of the embedding; number of features in a hidden state
ENCODER_N_LAYERS = 2
DECODER_N_LAYERS = 2
BATCH_SIZE = 64
BIDIRECTION = True

# Hyperparameters
CLIP = float(args.get('gradient_clipping'))
LR = float(args.get('lr'))
DROPOUT = float(args.get('dropout'))

main()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[TRAIN] Epoch: 28 Percent complete: 27.7%; Loss: 0.12999 BLEU score: 0.525
[TRAIN] Epoch: 28 Percent complete: 28.2%; Loss: 0.17226 BLEU score: 0.65
[TRAIN] Epoch: 28 Percent complete: 28.6%; Loss: 0.12775 BLEU score: 0.575
[TRAIN] Epoch: 28 Percent complete: 29.1%; Loss: 0.37092 BLEU score: 0.425
[TRAIN] Epoch: 28 Percent complete: 29.5%; Loss: 0.08636 BLEU score: 0.575
[TRAIN] Epoch: 28 Percent complete: 30.0%; Loss: 0.78429 BLEU score: 0.475
[TRAIN] Epoch: 28 Percent complete: 30.5%; Loss: 0.09995 BLEU score: 0.45
[TRAIN] Epoch: 28 Percent complete: 30.9%; Loss: 0.2366 BLEU score: 0.55
[TRAIN] Epoch: 28 Percent complete: 31.4%; Loss: 0.05494 BLEU score: 0.45
[TRAIN] Epoch: 28 Percent complete: 31.8%; Loss: 0.44629 BLEU score: 0.425
[TRAIN] Epoch: 28 Percent complete: 32.3%; Loss: 0.09682 BLEU score: 0.6
[TRAIN] Epoch: 28 Percent complete: 32.7%; Loss: 0.43903 BLEU score: 0.4
[TRAIN] Epoch: 28 Percent complete: 33.2%; L