In [1]:
import logging
import random
import sys
from datetime import datetime

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import wandb
from datasets import load_dataset
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
                        

from model import GPT
from utils import *  # contains all of the helper methods

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
cfg_param = "8M"
cfg = load_config(f"configs/config-{cfg_param}.json")

In [3]:
# Load dataset and tokenizer
model_name = 'roneneldan/TinyStories'
dataset = load_dataset(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token



In [4]:
# Instantiate model and optimizer
setup_seed(3407)
model = GPT(cfg)
if torch.cuda.device_count() > 1:
    # if multiple gpus on single machine
    model = nn.DataParallel(model)
model.to(device)

number of parameters: 19.18M


GPT(
  (wte): Embedding(50257, 256)
  (wpe): Embedding(2048, 256)
  (drop): Dropout(p=0.1, inplace=False)
  (blocks): ModuleList(
    (0-7): 8 x DecoderBlock(
      (ln1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
      (attn): MultiheadAttention(
        (c_attn): Conv1D()
        (c_proj): Conv1D()
        (attn_dropout): Dropout(p=0.1, inplace=False)
        (resid_dropout): Dropout(p=0.1, inplace=False)
      )
      (ln2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
      (ffn): MLP(
        (c_fc): Conv1D()
        (c_proj): Conv1D()
        (act): GELU(approximate='none')
        (dropout): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (ln): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
  (fc): Linear(in_features=256, out_features=50257, bias=False)
)

In [5]:
# Untrained model output
test_language_modeling(model, tokenizer)

Output:
----------------------------------------------------------------------------------------------------
One day, a little girl named Lily found a needle in her room. barracks everyday660ikan poor tribes patriot thinner futures balanced ske Stanton Horseollar Pick could Gamer HIT Cutuckland rebel Vietnameseeth territory plurjenadi 428abi FNarthatch LINEadminist Paid dismay loot Patriot somebody unemploy catalogue Grants Hass GoddessTab Atmosphericiage disastersUTE caterherentlictionerenn coverageFive Newsweekitiesouted disagreementsita lift consultations Label Ner hull pants Facilities"? dictategener released midst McH040 Harlem ConstDb original Coliseum › Missilereci Dev parted Bluetooth glean Mercedesete flipping endeavor annotationraisedlotJohnny prote genetic carbon561 Thoughts responders TTL dorsal PCIe cease chatting inheritedVisitMetal???sth bend alertjas Sicily CASE hell Present aidesع pirate Grimm Creaturesilian Jindal reporterceptiveinus hommediatedarium Vacc ChineseAllow

In [6]:
# 8M model output
filename = "models/model_0107_201050.pt.tar"
model_8M = nn.DataParallel(model)
load_checkpoint(model_8M, filename)
test_language_modeling(model_8M, tokenizer, multiGPU=True)

Output:
----------------------------------------------------------------------------------------------------
One day, a little girl named Lily found a needle in her room. It was shiny and shiny. She showed it to her mom. "Look, mom! I found a needle!" she said.

Her mom smiled and said, "Good job, Lily! You keep this needle safe from the icy ground."

Lily put the needle in her toy box. It was her friend. They played with the needle all day long. They took turns to wrap it around the room.

Soon, it was time for bed. Lily was sleepy. She put the needle on her bed and closed the winter box. "Good night, Lily. Now it will be warm so." "Goodnight, Jack. I love we coats until it is warm."

Lily fell asleep under the icy bed. She felt happy and cozy. She knew the needle two good listening to her mom. When she woke up, they went to play on her useful once more. She loved to play with her friends. And they all lived happily ever after.


In [10]:
# 28M model output
cfg_param = "28M"
cfg = load_config(f"configs/config-{cfg_param}.json")
filename = "models/model_28M_0116_143531.pt.tar"
model_28M = GPT(cfg)
model_28M = nn.DataParallel(model_28M)
model_28M.to(device)
load_checkpoint(model_28M, filename)
test_language_modeling(model_28M, tokenizer, multiGPU=True)

number of parameters: 152.24M
Output:
----------------------------------------------------------------------------------------------------
One day, a little girl named Lily found a needle in her room. She wanted to make a special necklace with it. Lily took the needle out of the box and tried to make a pretty necklace. But it was not easy to make a special necklace.

Lily's mom saw her trying to make the necklace. She smiled and said, "I can help you, Lily! Let squeezing a half of the needle together. It will make you look perfect." Lily was happy and on the other hand.

Together, they poked and made a beautiful necklace. Lily wore it to the park to play with her friends. She was very good at making pretty things. Lily knew that the special necklace would make everyone as happy as it was still for her. And whenever Lily wore her colorful necklace, she remembered how she just used the needle to make it and felt proud. And she remembered that it was just as fun to make something new. And

In [11]:
# Author's pretrained model output
pretrained_model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
test_language_modeling(pretrained_model, tokenizer)

Output:
----------------------------------------------------------------------------------------------------
One day, a little girl named Lily found a needle in her room. She was very happy and wanted to help her mommy.

Lily asked her mommy, "Can I help you, mommy?" Her mommy said, "Of course, Lily. Let's go to the store and buy some candy."

Lily was so happy and said, "Thank you, mommy! I love candy!" Her mommy smiled and said, "You're welcome, Lily. You're welcome."

Lily was happy to have a new friend and a new friend. She was happy to have a new friend and a new friend.

