In [None]:
import logging
import random
import sys
from datetime import datetime

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import wandb
from datasets import load_dataset
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, Dataset
from transformers import (AutoModelForCausalLM, AutoTokenizer, GPT2Config,
                          GPT2LMHeadModel)

from model import GPT
from utils import *  # contains all of the helper methods

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
cfg_param = "8M"
cfg = load_config(f"configs/config-{cfg_param}.json")

In [None]:
# Load dataset and tokenizer
model_name = 'roneneldan/TinyStories'
dataset = load_dataset(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
# Instantiate model and optimizer
setup_seed(3407)
model = GPT(cfg)
if torch.cuda.device_count() > 1:
    # if multiple gpus on single machine
    model = nn.DataParallel(model)
model.to(device)

In [None]:
# Untrained model output
test_language_modeling(model, tokenizer)

In [None]:
# Best model output
filename = "models/model_0107_201050.pt.tar"
best_model = model
load_checkpoint(best_model, filename)
test_language_modeling(best_model, tokenizer)

In [None]:
# Pretrained model output
pretrained_model = AutoModelForCausalLM.from_pretrained(model_name)
test_language_modeling(pretrained_model, tokenizer)