# Recurrent neural network with numpy

## Prerequisite

In [1]:
# some important imports
import numpy as np
from tqdm import tqdm

## Encoding text

In [2]:
# data
text = open('data/toy.txt', 'r').read()

# text = 'Hallo'
text_length = len(text)
chars = list(set(text))
char_length = len(chars)
print('text is ', text_length, 'long and has ', char_length)

# creating training data
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

X = np.array([char_to_int[char] for char in text])
y = np.append(X[1:X.shape[0]], X[0])
print('first 10 datas: ', X[0:10])
print('first 10 labels: ', y[0:10])

text is  1993 long and has  56
first 10 datas:  [43 28 41 32  4 22 14  5 32  4]
first 10 labels:  [28 41 32  4 22 14  5 32  4 22]


## Forward pass


In [3]:
def forward_pass(X, hprev):
    ht, pt, loss = [hprev[0]], [], 0
    for t in range(len(X)):
        # creating a one hot encoded vector
        xt = np.zeros((char_length, 1))
        xt[X[t]] = 1

        # calculating forward pass
        zt = np.dot(Wxh, xt) + np.dot(Whh, ht[t])
        ht.append(np.tanh(zt))
        yt = np.dot(Why, ht[t])

        # getting probability distribution
        pt.append(np.exp(yt) / np.sum(np.exp(yt)))

        # summing up the loss of every output
        loss += -np.sum(np.log(pt[t][X[t]]))
    return ht, pt, loss / len(X)

## Backward pass

In [4]:
def backward_pass(X, y, ht, pt):
    dWhh, dWxh, dWhy = np.zeros_like(Whh), np.zeros_like(Wxh), np.zeros_like(Why)
    for t in reversed(range(len(X))):
        dout = pt.copy()
        dout[t][y[t]] -= 1
        dWhy += np.dot(dout[t], ht[t].T)
        dh = np.dot(Why.T, dout[t])
        dtanh = (1 - ht[t] * ht[t]) * dh
        xt = np.zeros((char_length, 1))
        xt[X[t]] = 1
        dWxh += np.dot(dtanh, xt.T)
        dWhh += np.dot(dtanh, ht[t - 1].T)
    
    dWhh /= len(X)
    dWxh /= len(X)
    dWhy /= len(X)
    # gradient clipping
    for dparam in [dWxh, dWhh, dWhy]:
        np.clip(dparam, -5, 5, out=dparam)
    return dWhh, dWxh, dWhy

## Predict function

In [5]:
def predict(X, Wxh, Whh, Why, hprev):
    ht, prediction = [hprev[0]], ''
    for t in range(len(X)):
        # creating a one hot encoded vector
        xt = np.zeros((char_length, 1))
        xt[X[t]] = 1

        # calculating forward pass
        zt = np.dot(Wxh, xt) + np.dot(Whh, ht[t])
        ht.append(np.tanh(zt))
        yt = np.dot(Why, ht[t])

        # getting probability distribution
        pt.append(np.exp(yt) / np.sum(np.exp(yt)))

        # creating a prediction string
        prediction += chars[np.argmax(pt[t])]
    return prediction

## Updating parameter with adagrad

### Intizializing hyperparameter

In [6]:
seq_size = 15
hidden_size = 200
learning_rate = 1e-1
epochs = 200

print('Training ', epochs, ' epochs with a sequence size of ', seq_size, ', a hidden size of ', hidden_size, ' and a learning rate of', learning_rate)

Training  200  epochs with a sequence size of  15 , a hidden size of  200  and a learning rate of 0.1


### Initizializing learnable parameter

In [7]:
Wxh = np.random.randn(hidden_size, char_length) * 0.01
Whh = np.random.randn(hidden_size, hidden_size) * 0.01
Why = np.random.randn(char_length, hidden_size) * 0.01

In [8]:
# initializing hidden state and squared gradient
ht = [np.zeros((hidden_size, 1))]
grad_squared_xh, grad_squared_hh, grad_squared_hy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)

loss = 0
for e in tqdm(range(epochs)):
    for steps in range(0, len(X), seq_size):
        inputs = X[steps:steps+seq_size]
        targets = y[steps:steps+seq_size]

        # forward and backward pass
        ht, pt, loss = forward_pass(inputs, ht)
        dWhh, dWxh, dWhy = backward_pass(inputs, inputs, ht, pt)
        
        # adagrad
        grad_squared_xh += dWxh ** 2
        grad_squared_hh += dWhh ** 2
        grad_squared_hy += dWhy ** 2
        
        # parameter update
        Wxh -= dWxh / np.sqrt(grad_squared_xh + 1e-7) * learning_rate
        Whh -= dWhh / np.sqrt(grad_squared_hh + 1e-7) * learning_rate
        Why -= dWhy / np.sqrt(grad_squared_hy + 1e-7) * learning_rate
    if e % 10 == 0:
        print('loss at epoch ', e, ' is ', loss)
        print(predict(X, Wxh, Whh, Why, ht))

  0%|                                                                                          | 0/200 [00:00<?, ?it/s]

loss at epoch  0  is  4.1273035967
gs iynon dndngII]kW
]
b
b
bâbÃ]â
.
.
b
bâbz]f
u
.
[âbjAW
u
.
b
bâb])Wpu
.
b
bâbâ]W]u
N
b
bâbâb(]â
.
.
b
bâ]ÃAu
.
.
[âbâbÃcu
.
.
bâbâbÃ
u
.
.
bâbj]W
u
.
b
bâbjAW
u
.
b
b
bjbÃ
u
.
.
bâbj]W
u
.
b
bâbjAâ
u
.
b
bâbj]W]u
.
.
bâbz]W
u
.
b
bâb
]W]u
.
.
b
bâbW]u
.
.
[
bâ]W]u
.
.
bâbj]W]u
.
.
b
bâbÃ]u
.
.
bâbâ]Ã]u
.
.
bâbj]W]u
p
b
bâb]AW]u
N
.
bâbzbÃAu
.
b
bâbI]W]L
)
.
b
bj]W
u
.
b
bâbjAâpu
.
b
b
bI]W
u
.
p
b
bj]W
u
.
p
bz]]]u
.
.
p
bâ]Ã]â
.
.
b
bj]W]u
u
.
[
bâ]Ã]â
.
.
b
bâ](]â
.
.
bâb
bâ]W
u
.
b
bâb(]â
u
.
b
bâbâ]W]w
u
.
b
bâbj)â
.
.
bâbâb]]f
u
.
bâb
b(Aâ
u
.
p
bâb(Aâ
u
.
b
b
bIAW
u
.
.
bâb]bW
u
.
b
bâbjAW
u
.
p
bâb(]â
.
.
b
bâbj]W]u
.
.
b
b(]W
u
.
.
bâb])â
u
.
b
bâb
]j]â
.
.
b
bâb]AW
u
.
b
bâb]]W]u
N
.
b
bjbW
u
.
b
bâbâb(]u
u
.
b
bâ]W]W
u
b
b
bâbâ]€)w
.
.
p
bz]W]u
.
.
b
bâbj]â
u
.
b
b
bj]â
.
.
b
bâbj)W
u
.
[
bâbW]â
u
.
b
bâb(]W]u
.
b
bâbâbW]u
.
.
I
bâb()â
.
.
b
bâ]]]u
.
.
b
bâb()â
u
.
b
bâ]W]u
.
.
b
bâb()â
.
.
b
bâb]]w
.
.
b
bâb]AW
u
.
b
b
bj]W
u
b
b
b
bâb(]w

  5%|████                                                                             | 10/200 [00:05<01:35,  1.99it/s]

loss at epoch  10  is  3.33348988261
gs,onhennnn ngIC”0I00j10"j”0lb9"0"j0j("jjb"b0zI©"j00jCTb"”€"€ÃI"j0jI"b(xj"j0jb"z0x€jj"b0I€"0bxb0x(jj"j"b"jIb”"I0jTI€"j""9b"x€"jI0"x9”x1b"b0jI0b€jb”00€"b90b0€j0b€jb"00€"b"jW"b9jb”00€"b00”zb0I0jb"jb""x""b90b0I9bWjT€bW"jIb€0b"€jIb0I00bb"j0b9j€”b"bzx(9b"jLIWjb0j€j0T0Ix"Ib0x0jj”""zjb"b0€Ixb0j€j0jb"jI0”0I(xIb0000b00jI""I""Ib0jb€"0"j0j("jjbb0jbI9x1b0Ix©jT0jb""j"j0jxb"b0jI0b€jb”00€"Ib0jb€b"x€"jIxb9jb”00€"b"jW""T0x0b"b9jI0b€jb”00€"b90b"TIb"x0I("bxb00bb”0x€"bhjW"€Ã00jI"b0I€"b000Ib0I0"IIb"bzx("jIb€jb”00€""b"b0jbI0Tb€j"”00€""b"b€"jL0z2b9j"”00€"1b"b€"T2x”0b9j"”00€"1b""jT0jb0jb000j"b€"j"b0"b0jW0jb0xb"I00j0jx"T000jb0zI©"j00jCTb020"jWl0"b"b"jI0b€jb”00€"b"jW0jI"b9€jb0I0"jxTj1b"j00jW"jjb"b"jIbz0xb""I""b0jb0jbj"0€"b"jW0€Exb€jb”00€"b9bb00xb0jjTx"xj0xb0x(IIx"b"j00j""jIb"jIbzjjIb0I9jxjT€€jb"jb0jI0I€"xb€bb"I0I”b00xb0"b0x00jb"jT€0I""jT0I02I0x1b0Ijb0x0j€jjIj”x"b0jb0zI©"j00jCb0"b0b(I"Ljb0"0Ib0jb00xb""L"9bb€bW00xb00z0b0x€"j"II0j0b0jb00xb0"bzx0"b9bb"”€j"Ã0jI"b€"jb"0(j02b"b00jj

 10%|████████                                                                         | 20/200 [00:09<01:26,  2.09it/s]

loss at epoch  20  is  3.51585320064
gs inhonnnn  gI)kIIh0IIIh]TI2IbIh]T€AIh]pAIIThIh]T"]TT[III€xbIIhx"T0IIIIIh2€CxID0IT]IIIbII0]AII2IIIIhqIIIAIII€II€hIh]pAIlbxI"xTT€h2ThxIAIIkbT0IC€xI"]2AIf2II€T"b"€xI"]2AIhTTII"€xI"]2AI"0IhAbIITApAIIIIIhAbIIII2€xTACbIfxII€2Ibb2IIIIbDIIkbTACAh€AIIIII2Ix"IIxII€qh€hD]LIkbIIII"I0AIh]xII0€IIII€€T"III(T€IhLhAIb€h"0A"0IIIIIIhIIIxIfI"]T€AIh]pAII"wIkbII0III0Ah€Tb€xhbDAIIII2bT0IC€xI"]2AIIIxIfIIIIh]LII"€xI"]2AIhTTI]D0IIIII2bT0IC€xI"]2AIf2Ih"âAxIIIh]LIIIhxIâAIIhI€TAI€II"ATIITIIDDITl(IIIkkbIIII"I€IbII"€xI"]2AIIIII"I"h"b"€xI"]2AIIII"h]2"hzIC€xI"]2AIIIICh"zI0TIC€xI"]2AIIIh€IIII"zII0]T0III(IIDDAhTThfIbIIbII€T€AIIIIIIxIThIh]T"]TT[I"zIhTIkkbIII2bT0IC€xI"]2AI€TTCTAII"€2IIIbIIIhAIII0h]TIh]pAIIIAII"0IIbIIIhAhAII€I0h]2xI€TA0fIII"€xI"]2AIfbIh0IIIhA"xIIxIIIIxhT"IxIIT"]TIhAIbIAII"xIIITI2€xTqC€2IpAI€qThIbIIIfIIIIhIIII0IIDDxIII"AII(AW2III(AIII2IIxIIII(AIIhqhxTICIAII€qII"Ih]T"]TTII"III"IÃIbIII2bI€qID0IIbDAIkbIfIIh0IIIAh0AIxIh€xIIIAIIIxI"0III€xh]IDAbbIII€xbIhT0II"€2IIIIbIlIIAh0]T

 15%|████████████▏                                                                    | 30/200 [00:14<01:20,  2.11it/s]

loss at epoch  30  is  3.40555608766
gs onhonnnn ngI bbl"€q"l"z€l"I€]"Dq""0"€"”""]"A"©A"zqâ€qq0©)"[""["€â€q"x"hA€z”Ihjq€jj""[0€j"D""""D0)"0""00L"€"""1"l"©”1l""[]"©""€zq"jq"T0A")"zq"€”"h€z”""""€"""qD)0"""z”"zAq0AT""jh€z”""€2"I"lqA"l"Iq""""""""©0qL0]"©""©)""€""[""""©0I©0"I""T"”"zqlq"]0""[""""""©)"€""€”qjjA"]"Dq)x0"D)""0€""I©€"""zAlA"€jzA"xAI"""€I"€q"2""""["©)"©""€q"âA"z"”"q"jA"]"")"""jA"€IA"0"q"âA"0A")"zq"€”"h€z”jq")q""I]"""”âIf["""€z”"zAq©"h€q"I0A""Azq"€”"h€z”""""h""I"q"l"€""I©]""©"""©"zqIAz”lz""€q"0ID"10"jA€0jIq""0A"]"-0L"L)Djh"z”j"0qq)""z""T[”Ah"z”j"lq["z"”""AD)"""€z”j"lqL""€jAzAT[""h€z”j"""€A"D"h)""0q€jA€0"x"h"I2A""€qA©""0qDq"z""""""""]"l"©A"zqâ€q""qz€ljqqT0q""Azq"€""h"z”"z0q©0xj"""jID0"l[""xAIx)"z"l"©””j"l0L""9qA2A"0"""0"©)qAhz"”q"A"j€”"""")jh"z”"€""h9q"D"""jA"0"0"D]"€2"""q)hz"l"xA"0"D"""(jA"0jI0]"""IA00"€)""l1Dq"€""L0"0AI"0q"€")Dq"h€q0A[©]qI0AI"00"0q©"T"0]""q"D""jAIjq""D)"©"”"©A"zqâIj""""HljD[""0jL"©0""€q"A"q"Iq"©"I"9q"©"h€q€x""€"jA"""""0""€q"€"”"x"hA""""€©0jq"©A€q"jI"q"lÃ""""T"9”A

 20%|████████████████▏                                                                | 40/200 [00:18<01:15,  2.11it/s]

loss at epoch  40  is  3.5290951208
gsmon hnnnn igI  artevwI""q1AA9”"zÃ"ÃÃ"z9©qAâ"Ã"zâ"€qâxq"]00"ÃxÃTzqAxq"x]"âz9Ãq"9©9xk[AÃÃ"xÃx.AAxkAx"xAA0ÃIAx€[AÃ]Ã"z€©"AAx]1C9âx€xxqx€A1AA0â[AA9xq"€9©q9CAq0â"Ab€xq"€9©qLâA0Ab92q"€9©q"9k"AÃIAâA0ÃAx]xx]A9CA10AÃxâA0AI0”IA0CA"€A"q0ÃbzI1AxâA0”"€A1Axx”AAx"Ã"xÃALx]ÃÃ"zk"AAAxxCAxâx2"LxqA]€ÃxqA9€â"2A1]1€”"A"xAA0z"9A"hxAx]"["AAAxq9”"zÃ"ÃÃ"z9©AT€z1AxÃAâxx”âA"€A"zx]A99]A1AA0â[AA9xq"€9©1A0xI9Axxq"zkxqb9xq"€9©qLâAxz"9©AA1AA0â[AA9xq"€9©q9CALzâAx]A1"z9"A"zxqATx]"A€qAI0Ã1zxq[AqÃAz€]âAAAx1AAxIA0ATx”1Ã"AAÃxq"€9©IA0AT€zAz"AI[xq"€9ÃIA0AÃÃ"Az"Wqb9xq"€9ÃIA0Ab€"x]A1Ab€xq"€9©IAx"x9x1ALxI09Cq9Aq1]x]LzA"âA"€A1xAA1AzÃ"9xA[2"IxAA"Ã"zâ"€qâxq"ICzqIIAAA1AA0â[AA9xq"€9©qLâAhâxAAA€AA10A1Ãx"xAAxâ"€ÃA"z9©1A0ÃIA"h©Ax]"["Azxq"Lxâ"€9©qLâAâ€Ãxqb9xq"€9Ãq9AI"9xA"zATxAx”qxAbx"Ix]xAxâ"€ÃA"xAA0Ã]A"AzxAI0AÃxâAT0AA1ÃALxâ"ÃÃqxA0AAI0"ÃAA"x©Aq0”qx”"[A1âA0”1A0]A9”AAÃ"x[AIÃâAxx"xhT0A[q"1AzxqA"Ã"zâ"€qâA12ATLxq90ÃATÃ-AA"xI"9©AAzA1AxA9AI"9xA00"9Aqxx"[x"I€1"AIxA"9©Aq0”]x]"A0AIx][x9Ã9qâx]b9CAx]IÃA-x1A"9”A1

 25%|████████████████████▎                                                            | 50/200 [00:24<01:12,  2.06it/s]

loss at epoch  50  is  3.09252283514
gsa  oonnnn ngI  orhllplh€qllI0pq[Ã[[lh[gxhA["lh[b"hqhxAxhqxqÃxbx"qhxAAxq"["gxhhxq0lq[A"lhxhgqIl0llxhxbIl0AIh0Ãl[hlh[gxllI€q[kqbq"xxhx0blIl0bhI[[2h"hgxh0lbq0b"q[[xh"hgxhhbIlI[[2h"hgxhhxlhTÃl[hIl0bxpxgqI0Abqll[qbI0[A0xqI"lb"0kqqqllhl[lxhThxq[AlbxqllI0[Ã[xlbhgh["hhqxlIfxqllxbxgq[gbIhxbxp"h0b"qIlhhxh"IhxAIAhhxAq0qIxpxgq[A[xb0pq[Ã[[lh[gxhpxqkl0p0hx0khIh0b"h2hÃ[qxIlbl0bhI[[2h"hgxIA[xb0bxqh"[qxp[[xh"hgxhhbIxhhxqlblIl0bhI[[2h"hgxh0lbhhhA0qglh"qxq0hxhhlxh"[hqAl[llhxh0Aql[hx[hlIqqlllxAIlIxqIl[Ab[[2h"hgxhAlIpxq[hhA[[2h"hgxhAlI[q"l[hTb[[xh"hgxhAlI[hhxqbhT[[xh"hgxhAxqq[xhAh”hI0hqxAhlbxfhhAhbAh0AjxfÃlkhÃh[xA[AIIxbh"lh[b"hqhxA"q[hqlllqqlIlqbhI[[2h"hgxhhbI[bxlI[0Abll[lÃxqxlbxb"hÃlh[gxhAlxAIhxqIxpxgqIhgh0[qb""[xbhbI[0bxp[[xh"hgxh0[AhxqIAhT[xlxxhqI[xh0xxxhxb"hÃlhxAIlxAIql[qIqll[qbI[[Ablxbhgh"lkqxq2[AAlhl[A[xqAh0xhxlq[AlhI0lkAlhI0llllhxqbjlbI[q"ghxqk[hqlIhghh"lh[b"hqhI["IA[xl0lÃfAllIqhghhxqI"hTllxpx[AhxqI0bhxAqxxh[lq[xbxAAxbqxqIh0xqxAqqq[AxqqxqlhqhxA[[AbxllÃllxqAhxhb

 30%|████████████████████████▎                                                        | 60/200 [00:28<01:07,  2.09it/s]

loss at epoch  60  is  3.16326021114
gsn  hennen mgI aot h”bh"["blb9bDÃIz00h)20hTbh0hD0h€qâhT©hD€DIDh00DâhT"qh[0[D20h2DD0Ih0IhD0D0q]lq002]2bTh201hDIDDhDhDD20lI0D"©2jh[©0hxD0h2lDIhTDh20h[D00D0bhDbh2Ih20k[D00[000Tb©€hhzD0]h2b"T1l-AT00I"b22hTD0TD0lID]TDbIh2D0h0IID0D0D0BDbIlD]T02hDb"TDD0TI0ÃIâD00[2hÃ©0D0qlI0"D00D]€€h[0hTbDID]0qDâh2b"]ÃDD[0h20T0€000h2q0D0"0hD0020hbDÃIz00h)20b[0DD0D0T]©2k]ThDb"©2]Ã[D€0"2lDâhTÃh2hk[D000"20hIDDhhz0D0Ã©€hhzD0][00D[00D
T0T0Db"IDh2hk[D00D0bzh]T2Dhhhb0D0"[2I][Dh[Ã[€0TDI0D2âhTD020DIh0II"0llD02020D0000IÃ]2hk[T€h0"T00Dbxh0Ã©€hh[D0h0"T©h€0[0TIÃ©2hh[D0h0"TT[02D]"TÃh20hzD0h0"hDTDD0[20"0DDhphhâ©0âh0[]ThDI(D]0"hDÃ[Dq0hD0"00"h0hz0hDDâhThT0DD0TIII02IDb"IDh2hk[D00[00D]x0IÃD000h€0I©hD€0"]hDIhhDD2DTh2bIh2D0D020"T[2I"Ãq]hDD00[00âLb©]ÃD2hk[C€0DTIh0D0T©ÃI€
€€h€2"DhDDD20"]hDI0hD0T02DIh0ÃD0D"lI€]Tj"bI02bâ2]h02hxTDÃI00h0Tbh0D0hD2hx0h2b""TDID00]T009l0"qh]"00T0D[2h"DD©hD02[20]h0hz0hDDâTDII"D0"h0IID0lI2D20h2q00[TTlq0hDIh0D0D0hhbhD2hÃ€Th0000I20h2q0hD€hD0hThDIDqD€DIDDâhTID
0000Tb0DhTh0j0

 35%|████████████████████████████▎                                                    | 70/200 [00:33<01:01,  2.10it/s]

loss at epoch  70  is  3.34989689494
gs   honnnn egI  ottnninhonlotnihononthon  o ittonion nonton iniein noon inon ohnd oonoit nonn onte onntt oi nnenithon to  dnd  non odt t on ioun  ion onr onnioun  ion oontr un  ion ohnth ioiiot ontn ionr ioonnienn n  tor ined nt inionien ontt rdnoi uinne inino ondlo enied d ion   nin otoi o t sn othn iuiinthnd nin in n oothononthon oidonenie nn   hn iononond otontioun  ion o u  nnnn ionn un  ionnoontnihnneen ont tun  ion onr hiit  tlioen oont nn onon onironinott innnln  moen io rnuneotun  ion  ortieonnhoun  ion  ootuioni n un  ion  ootunhnnintun  ion  oni innton unonntot n iioontho nn il inond n non  ilhonioninohoinnloeo n ont tun  ion oontnsn iune itetinineen ioilhon  ootothnd  in ioononnni on oo t nin un oion onn hnn oinnnen odtontud  ontionthn il othenn nlonnieune n oon iotonton othni nnn hn oni  tr ooooin  ntiotodt ut o nininnonnn  ono inhonions nn undtntotutooion hnd ionoen nn hnn nn nt n indonnnnoot hnd  n idt onn n o oioo nounr n tinnn ohnon

 40%|████████████████████████████████▍                                                | 80/200 [00:37<00:56,  2.11it/s]

loss at epoch  80  is  3.03795904805
gs e ho nnn ngIno ahheenionnean hononnion nnsiniosion n n n niniennsn n  oson  hnn ennnnnnni    nie n ton   onneninion nna  neosnnn  n tnr osstnn oion  nn onshtnn oion  nstnrnn sion  hnnitsnrs n  nin itnn nnnnn  nnnn   nennnen innnnn nn n in nrennnn ninne i innionnln enre  d io  osnnn  onshn nssn nnin tnnhnohnn  nn into  nehononnion  eenneno sn is hn  o ninn  nnnosstnn oion   n tnnnn ionn nn oion  nstnihnne ne osstnn oion  nn nis   tnionn nn osi  onnnnonnnn snnnnennn nntsnnn  tntennnno nn sion  nntoeounhtnn oion  nntnhonnhn nn sion  nntnni  s tnn oion  nninnnnoon nninn onsn nirosthn nn nnnnion  nnon  siniosion n hn nnnnnn nt osstnn oion  nstn n tnnenennninine   honnion nnn   inn  in ito onnn ion  nstsnnn nn oion  nuninn  nnn en  n nninn   nsionnin tn   innn nnnnn  nnenn on shn  n nunnninn nne  n snehnonstnnntns one nnn  nns eno infnn n tn oshniosion  nn nnennni onnn n ohnn onnnne nn inn   ine n innnnnne n  hnn  n in itnnnn n nnon nnnnennnninn   hnns

 45%|████████████████████████████████████▍                                            | 90/200 [00:42<00:52,  2.11it/s]

loss at epoch  90  is  3.4276995495
gsem  o nnn rgI ool enmn ottlnoehnnien on  o ot n hot a ntn ooh nst doe  n nn oh nnnnnonntnn     nnnn  on  itnnenhn on r n  nnn tnn t  ond n   nn t nn onntno honn thon on t onn thnn ohnnntnnnstn o rn  onnt t nn fnnto   nn nnn  ennnnnyn an inotonnt n nnnnnso hnn nnntninntnn n innoo nnntnan hoinssn on n nnn nn n    ntinaontonhonn n on oennnnhtesn ns  n on nnnentno n  rnn t nn o o onnoet nnn nn tion on t n n  snten  rnn t nn onntni o  onhonnoend  tntnnntoennno  ao nnnnn in tnenn nn onnnneann thnn tontonnnn tnn  inn  ontnhnen nsnn tion  ontnnhd  nann t on  oe  onnon onnntnnon n nhon a n ontnnnnnn   noond  innnsnntsdohnnntlnnn no n   nn t nn on on  osnnntonnnnntnn   nnnn on non oihn   nntitn onn  hnn on t nnnonn tion onnthnn onnnnnntt  nn nn  o n nnl   it   innntnnlnnsinnntn one nndt  ontoehtntne o,n o ohton  n ean annnynt tonns  nn nnnont  nndtshlnnsnnt onntonntnnnoon ean  h n annnnn nntinn onnnno d nnnnnnoon th n tf  dnhtnnt  n oonnte nnnt nnnnnn  h nst

 50%|████████████████████████████████████████                                        | 100/200 [00:47<00:47,  2.11it/s]

loss at epoch  100  is  3.2602330278
gseo ho nnn ogI narr ntnhotn  n hnnnelhon  a nnho ho  a  to omdtnnt o e tn on tn  onna oronn et ntn n t tn  tn nnhannn tl t nnnntndto ton  n  den ohnn ane oo h nn ohnn an tt nn ohnn ahelh mlnson   ndti on alend don n  aon mnn tennnlnnn on hn naenln nnnodn n innhnnntntn fn  dtio oonnedtntnshe n  n nnhe  nnh rh    ndtie r ooehonnenhon eennn  m  n n  hn mn nfn   to n  den thnn   o nn ndthonntnn thnn an tenhe n ti n  den ohnn ane nh d  tlhonntnn  nn tnnnto nnln  t  nnnnn ln on  d  nte nln nnnnthon  aa ennnnhaen ohnn naa nhn nhe en ohnn naa nnht  nton thnn na t ena n  enn em t  tnh n dhn e amnnnnne onnonn nhlho hot a hn n ll n n  n  aun ohnn an tn d  nnn arntmdtdn   hnnlion tan e h  tnnnti nn onn inn tn t n dtnn ohnn ann h  tonnndne o tn tnn n   hnnlid  nn  hn n nnnnn  nnn ln o shlnt tnn elhtn on aoo   ahaoa to e a ton  lt t nn  nennhnnon  a n  nhlio hot aon o nenlm on e n tne  mneeyn nn n  tonheto  innnnnnn on n  too hd iann  oo nmn  t unn delmnynt oen 

 55%|████████████████████████████████████████████                                    | 110/200 [00:51<00:42,  2.12it/s]

loss at epoch  110  is  3.06563429168
gs o ho nnn mgI  rr hxp0WzEEHAbâ"z"zEpW
€€AH,h”HH"E
€âHAqbbWbpxE0zEâ€AHqb.b[WW.hE0(HâA.ppWxz"0HHHEâH
"(HH”
AbWp"[hÃxz€E€HHH.”WbhWz01AqA.b.0Wb0W(WWbh
€€DbHqbb]hq)[xfh
€HH.bWlW[zWfh
€€HHHqhWpl[hWAWqx0"HbHb[qEEj€x"qb[HfH.Hb[.bWWWqqEW
EâH"âHbq"bqpxWxp0AW”IE"0.z"E””"
â””HHHq"WxâxEE
EDA,Wb.W"Wfbh"qph0WAzE""
HCH"xqW"0E.HHHb
q"WIb€W
"zEEE
€HA1Hbbpxpq]xxâ"HHbHbz"
E€HHHbH-bb0W(€Wbh
€€
AHHwWbW"Ih
EHH1HHbh[WW.
"Hqk1Hq9.pW0Wb0W(WWbh
€€DbHq"WâWE.fbE
E.HH
AWbpxbz0zEE
bbÃ["âWqEEW)E€€BHA1bHH00qp.WxâpW0q)z0fh
€€HHbq00W[zEI””€fHHbqbqp.[E
€””âH””HbhqWWqqp.)
EHAB,H”HHbhzWEEHHHb."qqzEICH
0HqbphWqzEI
€AHbqbxWpxxz"zE00â

â€Hâh€H
"€”,,Hwhbâ[Wl00"qp.0Wb]E€HWbhhWWE
"HHH”0wbbâq[x[j"xWx0.0"E
”HHHbqbblW0qhx0I.(]HHHqW.q"0"E€€HWH,.bWpxq)z0fh
€,Ab”AhWxq0z0”"€Hbb.qbxEâ"0€Hhâhz"phx0.E”
qhW[0IqEj€x"W
b[qpxI€"âE€HbhHb[q0]hp"A"q0EbWqbxÃhqqpjWbq0q0jWWp"xjW0EHCHâWWWzEE"b€zA00.q".âhphz"E
A,HbHqWâWlxl"q"000qzEIEH.H(q0W0"qf[HEC.HHq"WqbqEH”Hâ.WA"q
”I"C.Hbbq"xWhIbj..(fqbphWâWq[W0q0E€b"H.IqqE


 60%|████████████████████████████████████████████████                                | 120/200 [00:56<00:37,  2.12it/s]

loss at epoch  120  is  3.13065534282
gsio ho nne ngI mma sckx"
AllIfphcp[1lc[fAIIwh]c
,h
A.HAC”” ne inno  an on nn ah  men nono n  ao e  n oie n onn nh inn net  n n on  t  ne en  nnn ohnn aneaon  aun ohnn an ne nn ohnn ah eiaoee nn a n  honeaeenn  nnu n  aneaone one n oe  nn hoie   en  nnn nan  nnine en   ee    hn oi nn anon   in  ntneide nn  ah  a n  hoao aneinnn einn aonoee mi   n nhoton nnn  an en  nnn ohnn nao annn ohne ann ohnn an n ni  nan en  nnn ohnn aneani n  oeine ann m n onnnoannnen t inn nnn nn oeny n n e ee ennn ohnn aan   nenionn ohnn aan nhnyninnnn ohnn aan uni    nun ohnn aa ion  nn mn no aon danion nhoto annnnnn  nn en a  ein  no  ahnenoenyn n en  nnn ohnn an nn n nnnntoe endide   inneinn ain n i  a n  hon mnn  inn an nenn ann ohnn ann h  annnndn  t ao in   a  inneide n n ine anne d nnnean an  in o anu nniente  aon o  iaan noee n noeeeeo   ne ne n  nnee  n n a  nin  no non oe eeenioneenn ah  ann ne onn h  an i to  hn nnn n o ah  aon hdniane  on nnne  annea neoee e h n

 65%|████████████████████████████████████████████████████                            | 130/200 [01:01<00:32,  2.12it/s]

loss at epoch  130  is  3.17784588407
gs   te nn  ngIfmeliogfyi ion oyhenn ninn  t hoin hn   ieto nmdtnn t toehn nn tineonn tneoen ntnden n ttn n ion nininn nnn nnnn nnd tntte nn  nun tinn ton in ioun tinn tn nnoun thnn tinniaony  n oeed ioon nnnndtnondo eonnmenedanneenendton to nnndeli  n dn n tnninndnn ndendtd hn tt nndtneo inin  nenlinatnnhetheehelenin o to innn ninn tnnn nnn  dan nho nn nnn ntomen  nun thnn n n to ndhonndtun thnn tn neninen omnn  nun tinn ton ni n ealinndton t tdtnnn toonln t telnnnn nn honnd  omndennn un tinn  tn ennnnioun tinn  on nhnenin un thnn  tn uni nt nun ttnn  teh on an te nieian dtnion  ho eeaendeonnnannnn t inin in  tthn n ln n n en  nun thnn tn nn detunn elnendhenoethnnehen  tn n ineteed ion ttndtinn tn n nndtun ttnn tonnhedtonnndnd tntndheda tdthnnnheein eehnndtnenndtnunn l tn  in tntoenneiln eeeoeo tenhton nondtl  enymennttno nndn tnnen natn t inin in tnen nnneeonionnnen ehe tnene etonnhedtteheoae heneenen n thnettn iethooeneto nnn  tiunn ennnendtthnn

 70%|████████████████████████████████████████████████████████                        | 140/200 [01:05<00:28,  2.13it/s]

loss at epoch  140  is  3.0286348138
gs o ho  nn ngIfeeni eenin n tnehennnehnn   tinhn hn   oe   ondhnn t aa  e nn thenn   onn  n  tn  n n ntn  ton  nhnhnn   t d nn rnn he  e nn tfn   hnn tnn hn hau  hhnn  n in e   hnn thenh ne itn iee  honn  n nn  ou o eann  nneannnneen  fn hn n neen  nn  n n hnninn n tneene n ho    nne n n hn n  n nnien nnhefi e eee ho n tn hnnnnninn innn neo  ree ohn nn onnn tn nn  in   inn o n tn  etinn tu   hnn tn   ninen n nn  onn hhnn  on oh fn   hnne nn i nninunitnnnnn   oee nn tn  hn     n neennn un hhnn  an  nnunh un  inn   n  hn nhn u   hnn   n enh e   rn hhnn   ehno   n in n  tnn e nh n  hn ne nn nnnn nnnnn t hnho hn   ohn nhn  n ntnn   nn hhnn  n  n no un  nnenn han   honnhnn   n dthee eee h n  nn  hon to   nne un hhnn  nu hoe nneuen  he n heee tr honehae n   hnnn nn nn tnn  n in  in he nn  einn n  ton heeh  n  nee ntfon nesd ann fnen inn  heean  thnhn ho  tn  n nn nn nnnn n  hoe ne e  tn  heeanehefed hnn nn n n then  n haeh nu enn nns   oun  eonn n   hen 

 75%|████████████████████████████████████████████████████████████                    | 150/200 [01:10<00:23,  2.13it/s]

loss at epoch  150  is  2.87654020637
gs o he en  egI nmeieennhn n tn innnnnhnn  a hnhn in   aa n nndhnnn  and n nn ahnann  ann nn aan tn n   e a nnn  ininn n   n nn inn hn an nn efeeeehnntmeemiothaun  hnn fn anaun  hnn fhanhtnn  fn aand hann nn  n anean etnn nnndtennnn nd fn hnanandt t  nndn n hnnhnnd  ann nd d hn  a nnd e n in n  n nniaaannirahaaadnd he n an hnnnnnhnn annnen et nd  fhndnn nnnndan nn efeeeehnn e n anenn hnndtu  ehnn tn aanhnnn nann ifun  hnn fnnani a d  hnnd nn  onn nnn aonnnn o annnnne n ahnynd anann nnntu   hnn  andnnnenhau  ehnn  and hn nhe u   hnn aanduni n  aee  hnn aadhne   n annnen nnsdtnhan ahnand nnnnnnndeennn f hnhn hn   ah nn n n anann sfeee hnn tn ae diaunnannnnndhen d hnnninn  an eahnnaded h n anen hnn tn ained u  ehnn tnnihna onnnnnn hntnninna aa hnnnhdaan e hnndten  d annn n an ehn hdtnnaonhnnannn nn en h  n fnyn n ann ne a  nn fnnn hnnnundaan a hnhn hn  an  nennnnn nn nan ahnaannne  annahnaannhnand hun e  etn ahna nn hn hannad n nnnn aaunn d nn nd ahnn

 80%|████████████████████████████████████████████████████████████████                | 160/200 [01:14<00:18,  2.14it/s]

loss at epoch  160  is  3.1142993865
gsio h  en  ngn mnniednnhn t  n innn nhnn  i hntn hn  iad n nn hnn ii rd n en th dnnn  nn  n dtn  n n  tn  trnneehninn nn  dunnsind  d tn nnsian  iinn tnntrnsh un  inn tn  n u  iinn th oi nn i n in d h nntnn en  n tn etn tnnndtnnnnnendt n hnan ndt   unennan hunhnnd n n  nd n hn  t nndtn n hntn in nnin tnnhrih dtned hntn tn innn nhnn tnnnendet nd   hntnn nnn dtn nnsian  tinn etn tn nn hnedtun  inn tn  dnhndntn nnsiau  tinn tnntnh i d tinn tnn t    nen irnnnn    onnnnesnn  t  n tn n  nn tun  inn  in nnnunh un  inn  in ehn nh iun  inn  in unh dsi u  tinn  inh e   n tn n otrn n nh n  hntndtne nnn d  nnn i hnin hn ie h ennn  e n nnsi un  inn tn  u n tunntnnnnnehnntn hnnninn  in  th dtne  h n tn   hnn tn  snnttun  inn tn th dtrnennn   dtnnhnd  in hnntin tn  thnentnnnnn  unntn tn ihn  dtnnirninntn dtrn  deh  ns nn  n  nn nn   tnn  ndn hnnnund tn t hninshn t nn nnnninntnn   n th  tnnnt ntnnth dtr h  rd h n e  etn th  trn hd h n id n nnn ii unntd nne n th ns

 85%|████████████████████████████████████████████████████████████████████            | 170/200 [01:19<00:14,  2.14it/s]

loss at epoch  170  is  3.21756352724
gsem h  en tngI m thadnei  nn nehon  thon  aehnineh  n  d n on hnn e ann eeen  hadn n  nntne   ndnn e  an n  nn  inien tn n enns  nn   an nneeaue  ien tn   n h ne  hon aee n ne  hon  h th nneean  nn  h  n nnnnd  n  o   nnannen nneennnnn n hn n e rndnene o o h ehen    en nne  io  asnn  n neh  neen onh n nnin tn  nn  in n tnehone th n  ennn nn endne hntn  enee an n eeaue  ien e n  nen  ion one  ion tes dnin n n nnne u   hon an  oi  n  lh n  ne    d ouo arnnn  s antrnneen   n nnn ninnn nn ne  h n tan nnnenh ne  hon   naei nnh  u   ien  an ueh oee ue  hon ta ton i e tni  nanusd ni ns hn nd nnnnn  ronnne anininsi  n ah ee n n en nn e un  ien tes eeno nn  enenn h n nni nti n tan n hn  nn  i n  n  sh n aen  nn tue  hen tn  h d ne  n n    n t  dn nei ntid tn o h  n nnn    un  n  o  in    n  rn nninn  nn t  hidneen n nsenn nn   tnnn n o h nmtt o o  shnhosh  s u ennnnntntnnyn n th dtn ron en  h dtn irda  i nnn  nin  hnd nn h nidnet  o one    en  r nn  n  h e

 90%|████████████████████████████████████████████████████████████████████████        | 180/200 [01:24<00:09,  2.14it/s]

loss at epoch  180  is  3.01800245691
gsim he ene ngIf  nhed nio nn ni  neenion  atinin ie eedd n nn ie  n oo  osnn  ho ne  aontr  n n nn     n  atn nehnion nntn  nn  o n   aninn   un tien  nu  n h un  hen  osenden  hen  honi nne en   n  ione nnne   nu n   ne nned ne nnun   n in nan nn n nnon n ienion n en en    io r tn   n n hn n sn nni  tueio irdi n  in n  neionnenion  tnn n nat   nehn n  nnee  nenn  eun  hen n n  nued hen  ue  hen  osedoirdm nann   te tien  nu oie n roio   ne  n   onniaon no   onn nntnne  n n  tndn nen  ue  hen  ani nn ni un  hen  anieiennie un  hen  aniunin  neun  hen  aoien too  nh  nahn   ni n  hn n  rneonnn  nenn  tinio ie eedheensenn  n nn  eun  hen  oseen mtuni  nenn i   r hennien  an maho  nn  i e an   ien  oseen   un  ion  nu hr  onu  n    end n n ao ienein tu m he   nnnn  aunnin  n  in   tnu  nin  nhdahn   ti ansanndan  enenn   ae   n   ien n  mtn   hnio ie e  n n nnhn annnnan aho  n nnn  nu io  n ie    h n n  een  hrd hn in i nu   n nnntn  uny   nnen  ahe 

 95%|████████████████████████████████████████████████████████████████████████████    | 190/200 [01:28<00:04,  2.14it/s]

loss at epoch  190  is  3.07315792354
gs o  o ene ngI,eethr  nhn n  n hnncnoinn t  inin hntne   n nnnhncte ti  csnn  ho nnehanntrn   naoo n  an  a n iein nn el    cn endna t lonn  auncohnn aon tn h uncohnn ansdn uncohnn ahoni nnis     n  hannann uc  nu n  ann nnn annnni ndsdn hn n n nn neoscn nchun n tn  e ne  d in c snnnan n hn nsnntenid aecir hn a n  hnancanehoncnsinn tlnn n n sndn  hn ns encn  l nn  auncohnn ean  nsndthnn  uncohnn ansddn ndnalann   uncohnn aon n  dn enin   oe asnd cunsaannnn st nnnnne ne  nnnd  r o eno  uncohnn eal nnnun aunc hnn eal  honoho uncohnn eal unin   auncohnn eaai   aan aehntra nsn n  n  in nn nn nncn  nnen   hnin hntne hn nsnnnn l nn  auncohnn ansd  n  unn lonl nh n dshnnn nn eal  ihod nn  han aoenshnn ansdsnnnaunc hnn aou ho  anuueo na asoi n  a shonnho  l  oin eannnsc  unn l an shl   anu dnho aes itn teei  ns nni nsdnn no    ensdn n hnnaeod  n ashnin hn n  n n nohlndnnnd n aho anconn  nu hod n ho t  iecnna e n ahod en i lionu  tn nnci    nn odnoinc  hec

100%|████████████████████████████████████████████████████████████████████████████████| 200/200 [01:33<00:00,  2.14it/s]
