For Reference: My Trigram Implementation used on the list of names from the Karpathy lecture

In [2]:
import requests

# URL of the raw names.txt file
url = "https://raw.githubusercontent.com/karpathy/makemore/master/names.txt"

# Make a request to get the file
response = requests.get(url)

# Ensure the request was successful
if response.status_code == 200:
    names = response.text.splitlines()
    # Do something with the names, like printing the first 5
    print(names[:5])
else:
    print("Failed to retrieve the file.")

['emma', 'olivia', 'ava', 'isabella', 'sophia']


In [3]:
len(names)

32033

In [4]:
"""
Ex.
name = emma
e m m a
len(name) = 4
range(4) = 0 1 2 3
"ma." should be last trigram
"""

trigrams = {}
for i in names: #confirmed no names with only one letter
    for c in range(len(i) - 1): # loop over entire word except for last letter
        trigram = (i[c], i[c+1], ".") if c+2 == len(i) else (i[c], i[c+1], i[c+2])
        trigrams[trigram] = trigrams.get(trigram, 0) + 1
    

print(len(trigrams))

5686


In [5]:
t = sorted(trigrams.items(), key=lambda item: item[1], reverse=True)

count = 0
for key, value in t:  # Iterate over key-value pairs
    if count < 5:  # Limit to the first 5
        print(f"Key: {key}, Value: {value}")
        count += 1
    else:
        break  # Stop after printing 5

Key: ('a', 'h', '.'), Value: 1714
Key: ('n', 'a', '.'), Value: 1673
Key: ('a', 'n', '.'), Value: 1509
Key: ('o', 'n', '.'), Value: 1503
Key: ('e', 'n', '.'), Value: 1217


Create a 3D tensor to hold frequencies of all trigrams

Include '.' as a start icon

In [6]:
import torch

N = torch.zeros((27, 27, 27), dtype = torch.int32)

In [7]:
chars = sorted(list(set(''.join(names)))) #get all unique characters in names, without duplicates, and sort a to z
stoi = {s:i+1 for i,s in enumerate(chars)} #dictionary where each key is a letter, and value is index, +1 makes a start at 1, so . takes 0
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()} # flip key value pairs

In [8]:
""""
Populate 3D tensor with frequencies of trigrams
"""

for i in names: #confirmed no names with only one letter
    chs = ['.'] + list(i) + ['.']
    for c in range(len(chs) - 2): # loop over entire word except for last two letters
        trigram = (chs[c], chs[c+1], chs[c+2])
        stoi_trigram = tuple(stoi[char] for char in trigram)
        N[stoi_trigram[0], stoi_trigram[1], stoi_trigram[2]] += 1

In [9]:
N[1, 8, 0] # check if it matches value from dictionary mapping, it does!

tensor(1714, dtype=torch.int32)

Sampling From the Model

In [10]:
N[0].shape

torch.Size([27, 27])

In [11]:
p = N[0].float()
p = p / p.sum()
p.shape # p gives probability distribution of all possible starting bigrams
p.sum()

tensor(1.)

In [12]:
# Flatten the 2D matrix to 1D, because torch.multinomial expects a 1D prob dist
p_flat = p.flatten()

# Sample from the flattened 1D probability distribution
g = torch.Generator().manual_seed(2147483647)
index = torch.multinomial(p_flat, num_samples=1, replacement=True, generator=g).item()

# Convert the flattened index back to 2D row and column indices
row = index // p.size(1)  # Get the row index
col = index % p.size(1)   # Get the column index

print(f"Sampled index: {index} -> row: {row}, col: {col}")
print(itos[row], itos[col])

Sampled index: 352 -> row: 13, col: 1
m a


Get first 2 letters from probability dist. of all Starting bigrams (bigrams that follow " . ")

Then pick every succesive letter based on the preceeding bigram

In [13]:
#Create a probability distrubtion for every possible 3rd letter, for every possible preceding bigram
#more efficient to create once and index into, then to generate again and again
P = N.float()
P = P/P.sum(2, keepdim= True) # 27x27x27 tensor divided by 27x27x1 tensor
P.shape
P[6,7].sum() #should be 1 for any combo if properly normalized

tensor(1.)

In [14]:
g = torch.Generator().manual_seed(2147483647)

for i in range(20):
    #Get first bigram from prob dist. of most likely starting bigrams
    p = N[0].float()
    p = p / p.sum()
    p_flat = p.flatten()

    index = torch.multinomial(p_flat, num_samples=1, replacement=True, generator=g).item()

    # Convert the flattened index back to 2D row and column indices
    row = index // p.size(1)  # Get the row index
    col = index % p.size(1)   # Get the column index
    out = []
    out.append(itos[row])
    out.append(itos[col])

    while True:
        p = P[row, col]
        ix = torch.multinomial(p, num_samples=1, replacement=True, generator=g).item()
        out.append(itos[ix])
        if ix == 0:
            break
        row = col
        col = ix

    print(''.join(out))

maimittain.
rumtai.
taside.
annas.
ridynne.
bramah.
rhalee.
ameegariseriyen.
zel.
amave.
caisa.
khutsaoishri.
que.
smanah.
joiriel.
ez.
anitayaamnef.
ka.
qwadi.
khan.


Assess Log Likelihood of Model:

In [16]:
log_likelihood = 0.0
n = 0

for i in names: 
    chs = ['.'] + list(i) + ['.']
    for c in range(len(chs) - 2): # loop over entire word except for last two letters
        trigram = (chs[c], chs[c+1], chs[c+2])
        stoi_trigram = tuple(stoi[char] for char in trigram)
        prob = P[stoi_trigram[0], stoi_trigram[1], stoi_trigram[2]]
        logprob = torch.log(prob)
        log_likelihood += logprob
        n += 1

nll = -log_likelihood
print("Average negative log likelihood of model =", f'{nll/n}')

#Average negative log likelihood of bigram model is 2.48

Average negative log likelihood of model = 2.0619611740112305


Neural Network Approach to Trigram MakeMore