In [1]:
%load_ext autoreload
%autoreload 2

from model_utils import train_model, split_data, split_branches, get_model, set_pretrained_model_dropout
from config import Config
import json
import matplotlib.pyplot as plt
%matplotlib inline
import math
import random
import os
import datasets
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
from model_manager import ModelManager

In [2]:
# seed = random.randint(0, 2 ** 32 - 1)
seed = 3218885689
random.seed(seed)
datasets.logging.set_verbosity(datasets.logging.ERROR)
# Tell pytorch to run this model on the GPU.
device_name = "cuda:0" if torch.cuda.is_available() else "cpu"
# device_name = "cpu"
device = torch.device(device_name)
print(f"Will use {device_name} for training with seed: {seed}")

Will use cuda:0 for training with seed: 3218885689


In [3]:
split_data(os.path.join(Config.work_dir, "awsw_story_input.txt"))

In [4]:
config = {
    "lr": 6e-4,
    "warmup_factor": 0,
    "scheduler": "polynomial_decay_schedule_with_warmup",
    "lr_end": 2e-6,
    "power": 0.6,
    #"freeze_layer_rate": 1e-4,
    "freeze_from_steps": -1,
    "seed": seed,
    "num_epoch": 10
}

optuna_result_attachement = {
    'lr': 0.001,
    'scheduler': 'cosine_schedule_with_warmup',
    'to_freeze_count': 155,
    #"to_freeze_gpt_blocks": 11,
    'warmup_factor': 1
}
config.update(optuna_result_attachement)

In [5]:
saved_model_path = os.path.join("models", "awsw_main")
if os.path.exists(os.path.join(saved_model_path, "pytorch_model.bin")):
    print("Pretrained model loaded")
    tokenizer = AutoTokenizer.from_pretrained('EleutherAI/gpt-neo-125M')
    model = AutoModelForCausalLM.from_pretrained(saved_model_path)
else:
    print("Loaded empty model")
    model, tokenizer = get_model("EleutherAI/gpt-neo-125M")
model.to(device)
# set_pretrained_model_dropout(model.transformer.h[-1:], 0.05)

Pretrained model loaded


GPTNeoForCausalLM(
  (transformer): GPTNeoModel(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(2048, 768)
    (drop): Dropout(p=0, inplace=False)
    (h): ModuleList(
      (0): GPTNeoBlock(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPTNeoAttention(
          (attention): GPTNeoSelfAttention(
            (attn_dropout): Dropout(p=0, inplace=False)
            (resid_dropout): Dropout(p=0, inplace=False)
            (k_proj): Linear(in_features=768, out_features=768, bias=False)
            (v_proj): Linear(in_features=768, out_features=768, bias=False)
            (q_proj): Linear(in_features=768, out_features=768, bias=False)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPTNeoMLP(
          (c_fc): Linear(in_features=768, out_features=3072, bias=True)
          (c_proj): Linear(in_features=3072, o

# Test before training on a pretrained model!

In [7]:
model.eval()
model_manager = ModelManager(model=model, tokenizer=tokenizer)
def test_regular_sampler():
    print(model_manager.say_raw("The dragon flew", 50, 0.7))
test_regular_sampler()

The dragon flew into the night and was captured by the guards and put in the car."<|endoftext|>


# Training

Model is put in training mode and we begin training. The `train_results` will contain all data after training is completed.

In [None]:
train_results = {}
model.train()
train_model(model, tokenizer, config, train_results)

In [None]:
fig, axs = plt.subplots(2)
fig.suptitle('Learning rate and loss')
axs[0].plot(train_results['learning_rate_history'])
axs[1].plot(train_results['loss_history'])

# Testing

We created a few past (for context) + present prompts (player input) and see the different reactions. This way, we can test the models across different iterations.
The first test involves a old prompt to compare the pre-trained model with the one trained on AWSW. Did it manage to store it's data well? Is it able to write down things that have nothing to do with AWSW? (So we know we didn't overfit).

In [6]:
model.eval()
test_regular_sampler()

NameError: name 'test_regular_sampler' is not defined

**This test generates boring and repetetive** replies! It's because we use no good sampling algorithm, but it does give us a indication of what the model has learned!

In [8]:
prompts = [
    ('<p><msg>c "Hey Remy!"<d><scn>park2<msg>Ry "Hey!"', "How are you?"),
    ('<p><msg>c "I was with Lorem today."<d><scn>park2<msg>Ad "Very nice."', "What do you think of Lorem?"),
    ('<p><msg>m "In Tatsu park, Adine and I sat down."', "Oh my god, Adine. What is this?"),
    ('<p><msg>m "I sat down on a chair in Anna\'s lab."', "What will we do here?"),
]

for (past, prompt) in prompts:
    reply = model_manager.say(past, prompt)
    print(f"Prompt: {prompt}\nReply: {reply}\n\n")

Prompt: How are you?
Reply: park2<msg>Ry "I'm fine, I just wanted to see how it would look."<d><scn>park2<msg>Ry "I'm fine."<p><msg>c "Hey, are you okay?"<d><scn>park2<msg>Ry "I'm fine."<p><msg>c "Hey, are you okay?"<d><scn


Prompt: What do you think of Lorem?
Reply: park2<msg>Ry "I'm not sure, I can't remember anything else. I'm not a fan of the old days."<p><msg>c "I was with Lorem today."<d><scn>park2<msg>Ry "I was with Lorem today."<p><msg>c "I was with Lorem today."


Prompt: Oh my god, Adine. What is this?
Reply: black<msg>An "I'm not sure, I can`t seem of any kind of money. I don't have any money."<p><msg>c "Oh, I forgot to ask you what you actually planned to do with my blood and the test results."<d><scn>black<msg>An "I did. I was supposed to be the one who found the body of the


Prompt: What will we do here?
Reply: o2<msg>Ad "I don't know. I can't just leave it at that."<p><msg>c "I don't know. I just wanted to see it all together."<d><scn>o2<msg>Ad "I'm not sure if it's wo

# Sampling test

This is gonna be interesting!

In [None]:
for i in range(10):
    for (past, prompt) in prompts:
        reply = model_manager.say(past, prompt, top_k = 50, top_p = 0.7)
        print(f"[Test {i + 1}] -> Prompt: {prompt}\nReply: {reply}\n")
    print("-------------")

[Test 1] -> Prompt: How are you?
Reply: park2<msg>Ry "I'm good. I'll be glad to know
another'."<p><msg>c "I see."<p><msg>c "Hey Remy."<d><scn>park2<msg>Ry "I'm so glad you came."<p><msg>c "Hey Remy."<d><scn>park2<msg>Ry "I'm so

[Test 1] -> Prompt: What do you think of Lorem?
Reply: park2<msg>Ry "It is a beautiful thing. You should really get used to this whole thing."<p><msg>c "I was with a few of the most important people in the city when I came here."<d><scn>park2<msg>Ry "So you're here to take care of me?"<p><msg>c

[Test 1] -> Prompt: Oh my god, Adine. What is this?
Reply: o2<msg>Ad "Do you think I could come in for a few minutes? I could be more specific."<p><msg>c "I thought you wanted to make a good point, so I'll try to be as specific as I can."<p><msg>c "Oh, I see."<d><scn>o2<msg>Ad "Oh, I see."<|endoftext|>

[Test 1] -> Prompt: What will we do here?
Reply: facin2<msg>Br "There is another thing that I haven't done for a while. I'm not sure if it's the chemistry of the food, o

In [None]:
print("What to say?")
print(generate_dragon_reply("", input()))