In [2]:
import torch
import matplotlib.pyplot as plt
%matplotlib inline
import torch.nn.functional as F

In [3]:
# Open txt files


# train = open('Training.txt','r').read().splitlines()
test = open('./data/test.txt').read().splitlines()
dev = open('./data/dev.txt').read().splitlines()
words = open('./data/Training.txt').read().splitlines()
len(words)

30000

In [4]:
# letter to number mapping

chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}
itos

{1: 'a',
 2: 'b',
 3: 'c',
 4: 'd',
 5: 'e',
 6: 'f',
 7: 'g',
 8: 'h',
 9: 'i',
 10: 'j',
 11: 'k',
 12: 'l',
 13: 'm',
 14: 'n',
 15: 'o',
 16: 'p',
 17: 'q',
 18: 'r',
 19: 's',
 20: 't',
 21: 'u',
 22: 'v',
 23: 'w',
 24: 'x',
 25: 'y',
 26: 'z',
 0: '.'}

In [5]:
# Creates the 3 data sets
# Each one is composed of "blocks" of letters up to our choosing. 
# Here, we chose 3 so the word "one" would be shown as [0,0,'0],[0,0,'o'],[0,'o','n'],['o','n','e']

block = 3

def dataset(words):


    X,Y = [],[]

    for w in words:

    #     print(w)

        context = [0] * block
        for ch in w + '.':
            ix = stoi[ch]
            X.append(context)
            Y.append(ix)

    #         print(''.join(itos[i] for i in context), itos[ix])
            context = context[1:] + [ix]

    X = torch.tensor(X)
    Y = torch.tensor(Y)
    print(X.shape,Y.shape)
    return X,Y

In [6]:
Xtr, Ytr = dataset(words)
Xdev,Ydev = dataset(dev)
Xte, Yte = dataset(test)


# Shows how exactly blocking works to show predictions of words.
for x,y in zip(Xtr[:20], Ytr[:20]):
    print(''.join(itos[ix.item()] for ix in x), '-->', itos[y.item()])

torch.Size([313130, 3]) torch.Size([313130])
torch.Size([104449, 3]) torch.Size([104449])
torch.Size([104449, 3]) torch.Size([104449])
... --> u
..u --> n
.un --> a
una --> r
nar --> r
arr --> a
rra --> i
rai --> g
aig --> n
ign --> e
gne --> d
ned --> .
... --> c
..c --> i
.ci --> r
cir --> s
irs --> o
rso --> t
sot --> o
oto --> m


In [18]:
# Initializes the neural network. Embedding chooses the dimensionality of the initial neuron and hiddenN is the hidden
# Neurons in the one hidden layer

embedding = 10
hiddenN = 200


# Weights and biases normalized
W1 = torch.randn((embedding * block, hiddenN)) * (5/3)/((embedding * block)**2)
b1 = torch.randn(hiddenN) * 0
W2 = torch.randn((hiddenN,27)) * 0.1
b2 = torch.randn(27) * 0
C = torch.randn(27,embedding)
parameters = [C,W1,b1,W2,b2]

for p in parameters:
    p.requires_grad=True

In [26]:
# This runs the model on our data

steps = 20000
batchSize = 32

for i in range(steps):
    
    #Runs on batches
    batch = torch.randint(0,Xtr.shape[0],(batchSize,))
    Xb, Yb = Xtr[batch], Ytr[batch]
    
    # Forward pass
    emb = C[Xb]
    k = torch.tanh(emb.view(emb.shape[0],-1) @ W1 + b1)
    logits = k @ W2 + b2
    
    #Calculates loss
    loss = F.cross_entropy(logits,Yb)

    for p in parameters:
        p.grad = None
    
    #Backwards pass
    loss.backward()
    
    for p in parameters:
        p.data+= -0.001 *p.grad
        
    if i % 1000 == 0:
        print(f'{i:7d}/{steps}: {loss.item()}')
print(loss.item())

      0/20000: 2.3326010704040527
   1000/20000: 2.448707342147827
   2000/20000: 2.0864596366882324
   3000/20000: 2.294234037399292
   4000/20000: 2.0319528579711914
   5000/20000: 2.281989097595215
   6000/20000: 2.0325756072998047
   7000/20000: 2.071772575378418
   8000/20000: 2.432504415512085
   9000/20000: 2.0393221378326416
  10000/20000: 2.1370689868927
  11000/20000: 2.4032812118530273
  12000/20000: 2.1762139797210693
  13000/20000: 2.2088828086853027
  14000/20000: 2.1537275314331055
  15000/20000: 2.448063373565674
  16000/20000: 2.335707902908325
  17000/20000: 2.1529223918914795
  18000/20000: 2.1583235263824463
  19000/20000: 2.481691598892212
2.1162142753601074


In [27]:
#Be able to test the model on our test or dev data to make small changes

@torch.no_grad()
def split(split):
    x,y = {
        'train': (Xtr, Ytr),
        'val': (Xdev, Ydev),
        'test': (Xte, Yte),
    }[split]
    
    emb = C[Xb]
    k = torch.tanh(emb.view(-1,30) @ W1 + b1)
    logits = k @ W2 + b2

    loss = F.cross_entropy(logits,Yb)
    
    print(split, loss.item())
    
split('test')
    

test 2.1127326488494873


In [21]:
# Generate words

for x in range(50):
    
    out = []
    context = [0] * block
    
    while True:
        emb = C[torch.tensor([context])]
        h = torch.tanh(emb.view(1,-1) @ W1 + b1)
        logits = h @ W2 + b2

        probs = F.softmax(logits, dim=1)
        ix = torch.multinomial(probs, num_samples=1).item()
        context = context[1:] + [ix]
        out.append(ix)
       

        if ix==0: 
            break
    
    print(''.join(itos[i] for i in out))

unbadid.
audhrinit.
actole.
umprotes.
tite.
fantunnimiolatisnece.
bam.
subeg.
soreb.
nacnongmocogchile.
mocresileaty.
trognenor.
noswuq.
myin.
gxeic.
ung.
brulyo.
nit.
monesud.
ter.
iolinmodetabharigint.
umonkartut.
metuad.
centing.
panminesburcocolionse.
canssph.
grolib.
cretzrmersate.
doteawhatioterdy.
vioeohilostlisedinpermiresmenos.
las.
dypicompialoph.
barhoredpolmpalosibenduntived.
unk.
noprodoryhery.
chedosomes.
munin.
auphosababtertsraplovychos.
asongrec.
tatrarl.
nfict.
cab.
tietarolyudtrognote.
sey.
bfers.
earuphimcbrin.
unmating.
prophrotreldsh.
canminsers.
blin.
