In [1]:
%load_ext autoreload
%autoreload 2

from model_utils import train_model, split_data, split_branches, get_model, set_pretrained_model_dropout
from config import Config
import json
import matplotlib.pyplot as plt
%matplotlib inline
import math
import random
import os
import datasets
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
from model_manager import ModelManager

In [2]:
# seed = random.randint(0, 2 ** 32 - 1)
seed = 3218885689
random.seed(seed)
datasets.logging.set_verbosity(datasets.logging.ERROR)
# Tell pytorch to run this model on the GPU.
device_name = "cuda:0" if torch.cuda.is_available() else "cpu"
# device_name = "cpu"
device = torch.device(device_name)
print(f"Will use {device_name} for training with seed: {seed}")

Will use cuda:0 for training with seed: 3218885689


In [3]:
split_data(os.path.join(Config.work_dir, "awsw_story_input.txt"))

In [4]:
config = {
    "lr": 6e-4,
    "warmup_factor": 0,
    "scheduler": "polynomial_decay_schedule_with_warmup",
    "lr_end": 2e-6,
    "power": 0.6,
    #"freeze_layer_rate": 1e-4,
    "freeze_from_steps": -1,
    "seed": seed,
    "num_epoch": 500
}

optuna_result_attachement = {
    'lr': 0.001,
    'scheduler': 'cosine_schedule_with_warmup',
    'to_freeze_count': 150,
    #"to_freeze_gpt_blocks": 11,
    'warmup_factor': 1
}
config.update(optuna_result_attachement)

In [5]:
saved_model_path = os.path.join("models", "awsw_main")
if os.path.exists(os.path.join(saved_model_path, "pytorch_model.bin")):
    print("Pretrained model loaded")
    tokenizer = AutoTokenizer.from_pretrained('EleutherAI/gpt-neo-125M')
    model = AutoModelForCausalLM.from_pretrained(saved_model_path)
else:
    print("Loaded empty model")
    model, tokenizer = get_model("EleutherAI/gpt-neo-125M")
model.to(device)
# set_pretrained_model_dropout(model.transformer.h[-1:], 0.05)

Pretrained model loaded


GPTNeoForCausalLM(
  (transformer): GPTNeoModel(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(2048, 768)
    (drop): Dropout(p=0, inplace=False)
    (h): ModuleList(
      (0): GPTNeoBlock(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPTNeoAttention(
          (attention): GPTNeoSelfAttention(
            (attn_dropout): Dropout(p=0, inplace=False)
            (resid_dropout): Dropout(p=0, inplace=False)
            (k_proj): Linear(in_features=768, out_features=768, bias=False)
            (v_proj): Linear(in_features=768, out_features=768, bias=False)
            (q_proj): Linear(in_features=768, out_features=768, bias=False)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPTNeoMLP(
          (c_fc): Linear(in_features=768, out_features=3072, bias=True)
          (c_proj): Linear(in_features=3072, o

# Test before training on a pretrained model!

In [6]:
model.eval()
model_manager = ModelManager(model=model, tokenizer=tokenizer)
def test_regular_sampler():
    print(model_manager.say_raw("How are you? I'm", 50, 0.7))
test_regular_sampler()

How are you? I'm the one who gave you the impression you wanted to see the movie."<|endoftext|>


# Training

Model is put in training mode and we begin training. The `train_results` will contain all data after training is completed.

In [None]:
train_results = {}
model.train()
train_model(model, tokenizer, config, train_results)

In [None]:
fig, axs = plt.subplots(2)
fig.suptitle('Learning rate and loss')
axs[0].plot(train_results['learning_rate_history'])
axs[1].plot(train_results['loss_history'])

# Testing

We created a few past (for context) + present prompts (player input) and see the different reactions. This way, we can test the models across different iterations.
The first test involves a old prompt to compare the pre-trained model with the one trained on AWSW. Did it manage to store it's data well? Is it able to write down things that have nothing to do with AWSW? (So we know we didn't overfit).

In [7]:
model.eval()
test_regular_sampler()

How are you? I'm sorry, I'm sorry about the whole thing."<|endoftext|>


**This test generates boring and repetetive** replies! It's because we use no good sampling algorithm, but it does give us a indication of what the model has learned!

In [8]:
prompts = [
    ('<p><msg>c "Hey Remy!"<d><scn>park2<msg>Ry "Hey!"', "How are you?"),
    ('<p><msg>c "I was with Lorem today."<d><scn>park2<msg>Ad "Very nice."', "What do you think of Lorem?"),
    ('<p><msg>m "In Tatsu park, Adine and I sat down."', "Oh my god, Adine. What is this?"),
    ('<p><msg>m "I sat down on a chair in Anna\'s lab."', "What will we do here?"),
]

for (past, prompt) in prompts:
    reply = model_manager.say(past, prompt)
    print(f"Prompt: {prompt}\nReply: s{reply}\n\n")

Prompt: How are you?
Reply: spark2<msg>Ry "I'm good."<d><scn>park2<msg>Ry "I'm good."<d><scn>park2<msg>Ry "I'm good."<d><scn>park2<msg>Ry "I'm good."<d><scn>park2<msg>Ry "I'm good."<d><scn>park


Prompt: What do you think of Lorem?
Reply: spark2<msg>Ad "I think he's a good guy."<p><msg>c "I'm not sure."<p><msg>c "I'm not sure."<d><scn>park2<msg>Ad "I'm not sure."<p><msg>c "I'm not sure."<p><msg>c "I


Prompt: Oh my god, Adine. What is this?
Reply: sfacin2<msg>An "I'm not sure, but I'm not sure I can do anything to help."<p><msg>c "I'm not sure I can do anything to help."<d><scn>facin2<msg>An "I'm not sure I can do anything to help."<p><msg>c "I'm not sure I can do anything to help


Prompt: What will we do here?
Reply: snp1n<msg>m "I looked at the results of the test. I could see that the results were mixed results."<d><scn>np1n<msg>m "I was about to leave, but I didn't want to go."<d><scn>np1n<msg>m "I was about to leave, but I didn't want to go."<d><scn>np1




# Sampling test

This is gonna be interesting!

In [9]:
for i in range(10):
    for (past, prompt) in prompts:
        reply = model_manager.say(past, prompt, top_k = 50, top_p = 0.7)
        print(f"[Test {i + 1}] -> Prompt: {prompt}\nReply: {reply}\n")
    print("-------------")

[Test 1] -> Prompt: How are you?
Reply: park2<msg>Ry "You don't have a lot of money to spend, do you?"<p><msg>c "I guess you're right."<d><scn>park2<msg>Ry "What kind of money do you have?"<d><scn>park2<msg>Ry "A couple of months', maybe longer."<d><scn>park2<

[Test 1] -> Prompt: What do you think of Lorem?
Reply: park2<msg>Ry "I think Lorem was the one who brought you here, and I'll be back when Lorem is back."<p><msg>c "What did you bring me here?"<d><scn>park2<msg>Ry "I'm sure it wasn't your fault."<d><scn>park2<msg>

[Test 1] -> Prompt: Oh my god, Adine. What is this?
Reply: np1n<msg>An "I'm sorry, Adine. I know you like to be polite, but you're not the only one. You seem to have all the traits that make you the most popular."<d><scn>np1n<msg>Br "I see. I'll do it for you. I'll do it for you, then."<p><msg>c "

[Test 1] -> Prompt: What will we do here?
Reply: park2<msg>Ad "You don't have to worry about it. It's a pretty big thing, you know?"<p><msg>c "I guess I don't know."<d><scn

# RP test
Testing out the injected roleplay actions

In [10]:
test_rps = [
    "Visit Lorem",
    "Meet with Lorem",
    "Visit Adin"
]
for rp in test_rps:
    print(f'{rp} -> {model_manager.say("", rp, top_k = 50, top_p = 0.7)}')

Visit Lorem -> facin2<msg>Lo "I'll see you tomorrow."<|endoftext|>
Meet with Lorem -> black<msg>Lo "Liarism is the belief that humans are intelligent, so there is no need to be a problem with it."<|endoftext|>
Visit Adin -> black<msg>n "He spoke to me about my life in the hospital and my future. I was so proud to meet him, and to have him here."<d><scn>black<msg>n "He looked into my eyes, and then I remembered what I had seen in the first place."<p><msg>c "What was your name?"<d><scn>black<msg>n "A. It's an old woman's name."<p><msg>c "It's a good name,
