In [1]:
import torch
import matplotlib.pyplot as plt
%matplotlib inline
import torch.nn.functional as F

In [2]:
# Open txt files


# train = open('Training.txt','r').read().splitlines()
test = open('./data/test.txt').read().splitlines()
dev = open('./data/dev.txt').read().splitlines()
words = open('./data/Training.txt').read().splitlines()
len(words)

30000

In [10]:
# letter to number mapping

chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}
itos

{1: 'a',
 2: 'b',
 3: 'c',
 4: 'd',
 5: 'e',
 6: 'f',
 7: 'g',
 8: 'h',
 9: 'i',
 10: 'j',
 11: 'k',
 12: 'l',
 13: 'm',
 14: 'n',
 15: 'o',
 16: 'p',
 17: 'q',
 18: 'r',
 19: 's',
 20: 't',
 21: 'u',
 22: 'v',
 23: 'w',
 24: 'x',
 25: 'y',
 26: 'z',
 0: '.'}

In [14]:

# Creates the 3 data sets
# Each one is composed of "blocks" of letters up to our choosing. 
# Here, we chose 3 so the word "one" would be shown as [0,0,'0],[0,0,'o'],[0,'o','n'],['o','n','e']

block = 3

def dataset(words):


    X,Y = [],[]

    for w in words:

    #     print(w)

        context = [0] * block
        for ch in w + '.':
            ix = stoi[ch]
            X.append(context)
            Y.append(ix)

    #         print(''.join(itos[i] for i in context), itos[ix])
            context = context[1:] + [ix]

    X = torch.tensor(X)
    Y = torch.tensor(Y)
    print(X.shape,Y.shape)
    return X,Y

Xtr, Ytr = dataset(words)
Xdev,Ydev = dataset(dev)
Xte, Yte = dataset(test)

torch.Size([313130, 3]) torch.Size([313130])
torch.Size([104449, 3]) torch.Size([104449])
torch.Size([104449, 3]) torch.Size([104449])


In [18]:
# Initializes the neural network. Embedding chooses the dimensionality of the initial neuron and hiddenN is the hidden
# Neurons in the one hidden layer

embedding = 10
hiddenN = 200


# Weights and biases normalized
W1 = torch.randn((embedding * block, hiddenN)) * (5/3)/((embedding * block)**2)
b1 = torch.randn(hiddenN) * 0

W2 = torch.randn((hiddenN, hiddenN)) * (5/3) / hiddenN**0.5
b2 = torch.randn(hiddenN) * 0


W3 = torch.randn((hiddenN,27)) * 0.1
b3 = torch.randn(27) * 0
C = torch.randn(27,embedding)
parameters = [C,W1,b1,W2, b2, W3,b3]

for p in parameters:
    p.requires_grad=True

b1.size()

torch.Size([200])

In [46]:
# This runs the model on our data

steps = 100000
batchSize = 32

for i in range(steps):
    
    #Runs on batches
    batch = torch.randint(0,Xtr.shape[0],(batchSize,))
    Xb, Yb = Xtr[batch], Ytr[batch]
    
    # Forward pass
    emb = C[Xb]
    k = torch.tanh(emb.view(emb.shape[0],-1) @ W1 + b1)
    k2 = torch.tanh(k @ W2 + b2)
    logits = k2 @ W3 + b3
    
    #Calculates loss
    loss = F.cross_entropy(logits,Yb)

    for p in parameters:
        p.grad = None
    
    #Backwards pass
    loss.backward()
    
    for p in parameters:
        p.data+= -0.001 *p.grad
        
    if i % 10000 == 0:
        print(f'{i:7d}/{steps}: {loss.item()}')
print(loss.item())

      0/100000: 2.116586923599243
  10000/100000: 2.597865104675293
  20000/100000: 2.082200050354004
  30000/100000: 2.391022205352783
  40000/100000: 2.533857583999634
  50000/100000: 2.3716158866882324
  60000/100000: 2.0154688358306885
  70000/100000: 2.288945436477661
  80000/100000: 2.426788568496704
  90000/100000: 2.324061155319214
2.059821128845215


In [47]:
#Be able to test the model on our test or dev data to make small changes

@torch.no_grad()
def split(split):
    x,y = {
        'train': (Xtr, Ytr),
        'val': (Xdev, Ydev),
        'test': (Xte, Yte),
    }[split]
    
    emb = C[x]
    k = torch.tanh(emb.view(emb.shape[0],-1) @ W1 + b1)
    k2 = k @ W2 + b2
    logits = k2 @ W3 + b3

    loss = F.cross_entropy(logits,y)
    
    print(split, loss.item())
    
split('test')

test 3.456343173980713


In [48]:
# Generate words

for x in range(50):
    
    out = []
    context = [0] * block
    
    while True:
        emb = C[torch.tensor([context])]
        h = torch.tanh(emb.view(1,-1) @ W1 + b1)
        k2 = h @ W2 + b2
        logits = k2 @ W3 + b3

        probs = F.softmax(logits, dim=1)
        ix = torch.multinomial(probs, num_samples=1).item()
        context = context[1:] + [ix]
        out.append(ix)
       

        if ix==0: 
            break
    
    print(''.join(itos[i] for i in out))

niryshrosiresisteressaneate.
nedfecetat.
iredir.
phorostedecreedeneres.
natelyt.
notanestelosatelatatil.
natestelet.
nubat.
nat.
niredaresesteredilesisterededes.
nophotalstened.
necet.
medetaniet.
sumenededil.
riphilastotal.
natentaristedisterateredeneedatiles.
nopoleterilesteredatenesteres.
notile.
neceril.
notenederestededederestes.
nirillis.
pirestenin.
nat.
necestenedil.
natedilestundaristeril.
noperiles.
otal.
edat.
restestagin.
nederilotanesiredeseredecolydatenedir.
ratededestiteniles.
riremen.
telestarestoris.
nugenat.
echoscerestet.
ropheteedetet.
notiet.
notil.
netelat.
niceisteredesterecenteed.
hil.
neteres.
nedet.
nedireseres.
petestereteles.
nires.
notenestenusterces.
netenetares.
nates.
lat.
