In [33]:
import numpy as np
import gensim.models as word2vec
model_en = word2vec.Word2Vec.load('englishwords')
model_fr = word2vec.Word2Vec.load('frenchwords')

In [34]:
lines_en = open('test.en','r',encoding='utf8').readlines()

In [35]:
data_en = open('test.en','r',encoding='utf8').read()
data_en = data_en.split()

In [36]:
words_en = list(set(data_en))
data_size_en,vocab_size_en = len(data_en),len(model_en.wv['the'])
print('english file has %d words,%d features'%(data_size_en,vocab_size_en))

word_to_ix_en = {}
ix_to_word_en = {}

english file has 252583 words,300 features


In [37]:
for w in words_en:
    word_to_ix_en[w] = model_en.wv[w]
    ix_to_word_en[tuple(model_en.wv[w])] = w
print('Word2Vec English unique words = %d, word2vector features = %d'%(len(word_to_ix_en),len(model_en.wv['the'])))

Word2Vec English unique words = 19176, word2vector features = 300


In [38]:
lines_fr = open('test.fr','r',encoding='utf8').readlines()
data_fr = open('test.fr','r',encoding='utf8').read()
data_fr = data_fr.split()
words_fr = list(set(data_fr))
data_size_fr,vocab_size_fr = len(data_fr),len(model_en.wv['la'])
print('french file has %d words,%d features'%(data_size_fr,vocab_size_fr))
word_to_ix_fr = {}
ix_to_word_fr = {}

french file has 291815 words,300 features


In [39]:
for w in words_fr:
    word_to_ix_fr[w] = model_fr.wv[w]
    ix_to_word_fr[tuple(model_fr.wv[w])] = w
print('Word2Vec French unique words =',len(word_to_ix_fr))

Word2Vec French unique words = 21084


In [40]:
num = len(lines_en)
num = len(lines_fr) #No of Sentences


def sigmoid(x):
    return 1.0/(1+np.exp(-x))
def dsigmoid(x):
    return x*(1.0-x)
def dtanh(x):
    return (1.0-x*x)

In [41]:
#hyperparameters which are same for both encoder and decoder
hidden_size = 100
learning_rate = 1e-1
d_en = vocab_size_en
d_fr = vocab_size_fr
z_en = hidden_size + d_en
z_fr = hidden_size + d_fr

In [42]:
#encoder weight parameters for english language
wf_en = np.random.randn(hidden_size,z_en)*0.01
bf_en = np.zeros((hidden_size,1))

wi_en = np.random.randn(hidden_size,z_en)*0.01
bi_en = np.zeros((hidden_size,1))

wc_en = np.random.randn(hidden_size,z_en)*0.01
bc_en = np.zeros((hidden_size,1))

wo_en = np.random.randn(hidden_size,z_en)*0.01
bo_en = np.zeros((hidden_size,1))

wy_en = np.random.randn(d_en,hidden_size)*0.01
by_en = np.zeros((d_en,1))


In [43]:
#decoder weight parameters for french language
wf_fr = np.random.randn(hidden_size,z_fr)*0.01
bf_fr = np.zeros((hidden_size,1))

wi_fr = np.random.randn(hidden_size,z_fr)*0.01
bi_fr = np.zeros((hidden_size,1))

wc_fr = np.random.randn(hidden_size,z_fr)*0.01
bc_fr = np.zeros((hidden_size,1))

wo_fr = np.random.randn(hidden_size,z_fr)*0.01
bo_fr = np.zeros((hidden_size,1))

wy_fr = np.random.randn(d_fr,hidden_size)*0.01
by_fr = np.zeros((d_fr,1))

In [44]:
def trainencoder(inputs, targets, hprev, cprev):
  loss_en = 0
  xs, hf,hi,ho,hc,h,c, y,temp, ps = {}, {}, {}, {},{},{},{},{},{},{}
  h[-1] = np.copy(hprev)
  c[-1] = np.copy(cprev)
  # forward pass  
#   print(len(inputs))
  for t in range(5): #len(inputs)
    xs[t] = np.zeros((vocab_size_en,1)) # encode in 1-of-k representation
    a = np.reshape(inputs[t],(300,1))
    xs[t] = np.copy(a)
    xs[t] = np.row_stack((h[t-1],xs[t]))
    hf[t] = sigmoid(np.dot(wf_en,xs[t])+bf_en)
    hi[t] = sigmoid(np.dot(wi_en,xs[t])+bi_en)
    ho[t] = sigmoid(np.dot(wo_en,xs[t])+bo_en)
    hc[t] = np.tanh(np.dot(wc_en,xs[t])+bc_en)
    c[t] = hf[t]*c[t-1] + hi[t]*hc[t]
    h[t] = ho[t]*np.tanh(c[t])
    y[t] = np.dot(wy_en,h[t])+ by_en ## unnormalized
    ps[t] = np.exp(y[t]) / np.sum(np.exp(y[t])) # probabilities for next words
    loss_en = loss_en + np.linalg.norm(np.log(ps[t]))
#   print(hs)
  dWf,dWi,dWo,dWc, dWy = np.zeros_like(wf_en),np.zeros_like(wi_en),np.zeros_like(wo_en),np.zeros_like(wc_en),np.zeros_like(wy_en)
  dbf,dbi,dbo,dbc, dby = np.zeros_like(bf_en),np.zeros_like(bi_en),np.zeros_like(bo_en),np.zeros_like(bc_en),np.zeros_like(by_en)
  dhnext = np.zeros_like(h[0])
  dcnext = np.zeros_like(c[0])
  for t in reversed(range(5)): # len(inputs)
#     print('e',len(targets))
    target = np.reshape(targets[t],(300,1))
    dy = np.copy(y[t])
    dy -= target  # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here
    dWy += np.dot(dy, h[t].T)
    dby += dy
    dh = np.dot(wy_en.T,dy)+dhnext
    # Gradient for ho
    dho = np.tanh(c[t])*dh
    dho = dsigmoid(ho[t])*dho
    #Gradient for c
    dc = ho[t]*dh*dtanh(h[t])
    dc = dc + dcnext
    #Gradient for hf
    dhf = c[t-1]*dc
    dhf = dsigmoid(hf[t])*dhf
    #Gradient for hi
    dhi = hc[t]*dc
    dhi = dsigmoid(hi[t])*dhi
    #Gradient for hc
    dhc = hi[t]*dc
    dhc = dtanh(hc[t])*dhc
    #Gate gradients
    dWf += np.dot(dhf,xs[t].T)
    dbf += dhf
    dxf = np.dot(wf_en.T,dhf)
    
    dWi += np.dot(dhi,xs[t].T)
    dbi += dhi
    dxi = np.dot(wi_en.T,dhi)
    
    dWo += np.dot(dho,xs[t].T)
    dbo += dho
    dxo = np.dot(wo_en.T,dho)
    
    dWc += np.dot(dhc,xs[t].T)
    dbc += dhc
    dxc = np.dot(wc_en.T,dhc)
    
    dx = dxo+dxc+dxi+dxf
    
    dhnext = dx[:hidden_size,:]
    dcnext = hf[t]*dc
  for dparam in [dWf, dWi, dWo,dWc,dWy,dbf,dbi,dbo,dbc,dby]:
    np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
  return dWf, dWi, dWo,dWc,dWy,dbf,dbi,dbo,dbc,dby, h[5-1],c[5-1],loss_en #hs[len(inputs)-1]

In [45]:
def traindecoder(inputs, targets, hprev, cprev):
  loss_fr = 0
  xs,hf,hi,ho,hc,h,c,y,temp,ps = {},{},{},{},{},{},{},{},{},{}
  h[-1] = np.copy(hprev)
  c[-1] = np.copy(cprev)
  # forward pass
  for t in range(5): #len(inputs)
    xs[t] = np.zeros((vocab_size_fr,1)) # encode in 1-of-k representation
    if(int(inputs[0])!=-1):
        a = np.reshape(inputs[t],(300,1))
        xs[t] = np.copy(a)
    xs[t] = np.row_stack((h[t-1],xs[t]))
    hf[t] = sigmoid(np.dot(wf_fr,xs[t])+bf_fr)
    hi[t] = sigmoid(np.dot(wi_fr,xs[t])+bi_fr)
    ho[t] = sigmoid(np.dot(wo_fr,xs[t])+bo_fr)
    hc[t] = np.tanh(np.dot(wc_fr,xs[t])+bc_fr)
    c[t] = hf[t]*c[t-1] + hi[t]*hc[t]
    h[t] = ho[t]*np.tanh(c[t])
    y[t] = np.dot(wy_fr,h[t])+ by_fr ## unnormalized
    ps[t] = np.exp(y[t]) / np.sum(np.exp(y[t])) # probabilities for next words
    loss_fr = loss_fr + np.linalg.norm(np.log(ps[t]))
  dWf,dWi,dWo,dWc, dWy = np.zeros_like(wf_en),np.zeros_like(wi_en),np.zeros_like(wo_en),np.zeros_like(wc_en),np.zeros_like(wy_en)
  dbf,dbi,dbo,dbc, dby = np.zeros_like(bf_en),np.zeros_like(bi_en),np.zeros_like(bo_en),np.zeros_like(bc_en),np.zeros_like(by_en)
  dhnext = np.zeros_like(h[0])
  dcnext = np.zeros_like(c[0])
  for t in reversed(range(5)):#len(targets)
#     print('d',len(targets))
    target = np.reshape(targets[t],(300,1))
    dy = np.copy(y[t])
    dy -= target
    dWy += np.dot(dy, h[t].T)
    dby += dy
    dh = np.dot(wy_fr.T,dy)+dhnext
    # Gradient for ho
    dho = np.tanh(c[t])*dh
    dho = dsigmoid(ho[t])*dho
    #Gradient for c
    dc = ho[t]*dh*dtanh(h[t])
    dc = dc + dcnext
    #Gradient for hf
    dhf = c[t-1]*dc
    dhf = dsigmoid(hf[t])*dhf
    #Gradient for hi
    dhi = hc[t]*dc
    dhi = dsigmoid(hi[t])*dhi
    #Gradient for hc
    dhc = hi[t]*dc
    dhc = dtanh(hc[t])*dhc
    #Gate gradients
    dWf += np.dot(dhf,xs[t].T)
    dbf += dhf
    dxf = np.dot(wf_fr.T,dhf)
    
    dWi += np.dot(dhi,xs[t].T)
    dbi += dhi
    dxi = np.dot(wi_fr.T,dhi)
    
    dWo += np.dot(dho,xs[t].T)
    dbo += dho
    dxo = np.dot(wo_fr.T,dho)
    
    dWc += np.dot(dhc,xs[t].T)
    dbc += dhc
    dxc = np.dot(wc_fr.T,dhc)
    
    dx = dxo+dxc+dxi+dxf
    
    dhnext = dx[:hidden_size,:]
    dcnext = hf[t]*dc
  for dparam in [dWf, dWi, dWo,dWc,dWy,dbf,dbi,dbo,dbc,dby]:
    np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
  return dWf, dWi, dWo,dWc,dWy,dbf,dbi,dbo,dbc,dby, h[5-1], c[5-1], loss_fr#hs[len(inputs)-1]

In [46]:
def test(inputs, targets):
  temp,xs,hf,hi,ho,hc,c,h,y = {},{},{},{},{},{},{},{},{}
  h[-1] = np.zeros((hidden_size,1))
  c[-1] = np.zeros((hidden_size,1))
  # forward pass
  size = len(inputs)
  if(size>5):
    size = 5
  for t in range(size): #len(inputs)    
    a = np.reshape(inputs[t],(300,1))
    xs[t] = np.zeros((vocab_size_en,1)) # encode in 1-of-k representation
    xs[t] = np.copy(a)
    xs[t] = np.row_stack((h[t-1],xs[t]))
    hf[t] = sigmoid(np.dot(wf_en,xs[t])+bf_en)
    hi[t] = sigmoid(np.dot(wi_en,xs[t])+bi_en)
    ho[t] = sigmoid(np.dot(wo_en,xs[t])+bo_en)
    hc[t] = np.tanh(np.dot(wc_en,xs[t])+bc_en)
    c[t] = hf[t]*c[t-1]+ hc[t]*hi[t]
    h[t]  = ho[t]*np.tanh(c[t])
    y[t]= np.dot(wy_en, h[t]) + by_en
  hprev = h[len(inputs)-1]
  cprev = c[len(inputs)-1]
  tem = ""
  ans = ""
  k = 0
  t = -1
  while k<5:
      temp = np.zeros((vocab_size_fr,1))
      if(t!=-1):
          temp[t] = 1
      xs = np.row_stack((hprev,temp))
      hf = sigmoid(np.dot(wf_fr,xs)+bf_fr)
      hi = sigmoid(np.dot(wi_fr,xs)+bi_fr)
      ho = sigmoid(np.dot(wo_fr,xs)+bo_fr)
      hc = np.tanh(np.dot(wc_fr,xs)+bc_fr)
      cprev = hf*cprev+ hc*hi
      hprev = ho*np.tanh(cprev)
      y = np.dot(wy_fr, hprev) + by_fr # unnormalized log probabilities for next chars
      p = np.exp(y)/np.sum(np.exp(y))
      keys = np.zeros((len(ix_to_word_fr)))
      mind = 100000.0
      min_key = tuple
      for ix in ix_to_word_fr.keys():
          cosdist = np.dot(np.asarray(ix).T,y)/(np.linalg.norm(np.asarray(ix))*np.linalg.norm(y))
          if(cosdist<mind):
                mind = cosdist
                minkey = ix
#       print(keys.shape,y.shape)
#       print(ix_to_word_fr[minkey],mind)
      tem = ix_to_word_fr[minkey]
#       pr = np.exp(y) / np.sum(np.exp(y))
#       print(y.shape,y.reshape(300).shape)
#       maxi = y[0][0]
#       for i in range(len(words_fr)):
#           if y[i][0]>=maxi:
#               maxi = y[i][0]
#               t = i
#       arr = np.asarray(ix_to_word_fr);
#       print(arr[np.linalg.norm(arr-y, axis=1).argmin()]) # closest key to y vector is the word
      #tem = ix_to_word_fr[tuple(y.reshape(300))] # J Put word in it. find the closest word to y(vector)
      k = k + 1
      ans = ans + " " +tem
  return ans

In [None]:
n,p = 0,0
mdWf_en,mdWi_en,mdWo_en,mdWc_en, mdWy_en = np.zeros_like(wf_en),np.zeros_like(wi_en),np.zeros_like(wo_en),np.zeros_like(wc_en),np.zeros_like(wy_en)
mdbf_en,mdbi_en,mdbo_en,mdbc_en, mdby_en = np.zeros_like(bf_en),np.zeros_like(bi_en),np.zeros_like(bo_en),np.zeros_like(bc_en),np.zeros_like(by_en) 
mdWf_fr,mdWi_fr,mdWo_fr,mdWc_fr, mdWy_fr = np.zeros_like(wf_fr),np.zeros_like(wi_fr),np.zeros_like(wo_fr),np.zeros_like(wc_fr),np.zeros_like(wy_fr)
mdbf_fr,mdbi_fr,mdbo_fr,mdbc_fr, mdby_fr = np.zeros_like(bf_fr),np.zeros_like(bi_fr),np.zeros_like(bo_fr),np.zeros_like(bc_fr),np.zeros_like(by_fr)  
import sys

In [None]:
while n!=1000:
    curr_en = lines_en[p].split()
    inputs_en = [word_to_ix_en[w] for w in curr_en[0:len(curr_en)-1]]
    targets_en = [word_to_ix_en[w] for w in curr_en[1:len(curr_en)]]
    curr_fr = lines_fr[p].split()
    inputs_fr=[-1]
    temp = [word_to_ix_fr[w] for w in curr_fr[0:len(curr_fr)-1]]
    inputs_fr.extend(temp)
    targets_fr = [word_to_ix_fr[w] for w in curr_fr[0:len(curr_fr)]]
    if(len(inputs_en)<=5 or len(targets_en)<=5 or len(inputs_fr)<=5 or len(targets_fr)<=5):
        p = p+1
        continue
    hprev = np.zeros((hidden_size,1))
    cprev = np.zeros((hidden_size,1))
#     print("\bTraining Encoder",n,p)
    dWf_en, dWi_en, dWo_en,dWc_en,dWy_en,dbf_en,dbi_en,dbo_en,dbc_en,dby_en,hprev_en,cprev_en,loss_en = trainencoder(inputs_en,targets_en,hprev,cprev)
#     print("\bTraining Decoder",n,p)
    dWf_fr, dWi_fr, dWo_fr,dWc_fr,dWy_fr,dbf_fr,dbi_fr,dbo_fr,dbc_fr,dby_fr,hprev_fr,cprev_fr,loss_fr = traindecoder(inputs_fr,targets_fr, hprev_en,cprev_en)
    p += 1 # move sentence pointer
    if p >= num:
        p = 0
        np.savez("Weights/weights"+str(n)+".en",wf_en=wf_en, wi_en = wi_en, wo_en = wo_en, wc_en = wc_en, wy_en = wy_en,bf_en = bf_en,bi_en = bi_en,bo_en = bo_en,bc_en = bc_en,by_en = by_en)
        np.savez("Weights/weights"+str(n)+".fr",wf_fr=wf_fr, wi_fr = wf_fr, wo_fr = wo_fr, wc_fr = wc_fr, wy_fr = wy_fr,bf_fr = bf_fr,bi_fr = bi_fr,bo_fr = bo_fr,bc_fr = bc_fr,by_fr = by_fr)
        print('>>Training...iteration:%d'%(n))
        print('loss_en=%f'%(loss_en))
        print('loss_fr=%f'%(loss_fr))
        sys.stdout.flush()
        input_english = "The agenda for the United"
        curr_en = input_english.split()
        inputs_en = [word_to_ix_en[w] for w in curr_en[0:len(curr_en)-1]]
        targets_en = [word_to_ix_en[w] for w in curr_en[1:len(curr_en)]]
        output_words = test(inputs_en,targets_en)
        print(output_words) 
        sys.stdout.flush()
        n = n + 1
        
    for param_en, dparam_en, mem_en in zip([wf_en, wi_en, wo_en, wc_en, wy_en,bf_en,bi_en,bo_en,bc_en,by_en], 
                                [dWf_en, dWi_en, dWo_en, dWc_en, dWy_en,dbf_en,dbi_en,dbo_en,dbc_en,dby_en], 
                                [mdWf_en, mdWi_en, mdWo_en, mdWc_en, mdWy_en,mdbf_en,mdbi_en,mdbo_en,mdbc_en,mdby_en]):
      mem_en += dparam_en * dparam_en
      param_en += -learning_rate * dparam_en / np.sqrt(mem_en + 1e-8) # adagrad update
    





    for param_fr, dparam_fr, mem_fr in zip([wf_fr, wi_fr, wo_fr, wc_fr, wy_fr,bf_fr,bi_fr,bo_fr,bc_fr,by_fr], 
                                [dWf_fr, dWi_fr, dWo_fr, dWc_fr, dWy_fr,dbf_fr,dbi_fr,dbo_fr,dbc_fr,dby_fr], 
                                [mdWf_fr, mdWi_fr, mdWo_fr, mdWc_fr, mdWy_fr,mdbf_fr,mdbi_fr,mdbo_fr,mdbc_fr,mdby_fr]):
      mem_fr += dparam_fr * dparam_fr
      param_fr += -learning_rate * dparam_fr / np.sqrt(mem_fr + 1e-8) # adagrad update
 # iteration counter 

>>Training...iteration:0
loss_en=494.085572
loss_fr=494.089102
 philanthropiques. philanthropiques. philanthropiques. philanthropiques. philanthropiques.
>>Training...iteration:1
loss_en=494.087334
loss_fr=494.090928
 philanthropiques. philanthropiques. philanthropiques. philanthropiques. philanthropiques.
>>Training...iteration:2
loss_en=494.089070
loss_fr=494.091401
 détection philanthropiques. philanthropiques. philanthropiques. philanthropiques.
>>Training...iteration:3
loss_en=494.090831
loss_fr=494.091154
 détection philanthropiques. philanthropiques. philanthropiques. philanthropiques.
>>Training...iteration:4
loss_en=494.092491
loss_fr=494.090784
 fourniront fourniront philanthropiques. fourniront fourniront
>>Training...iteration:5
loss_en=494.093973
loss_fr=494.090585
 fourniront fourniront fourniront fourniront fourniront
>>Training...iteration:6
loss_en=494.095174
loss_fr=494.090723
 fourniront fourniront fourniront fourniront fourniront
>>Training...iteration:7
loss_en=494

 détection philanthropiques. fourniront fourniront fourniront
>>Training...iteration:68
loss_en=494.103868
loss_fr=494.096377
 détection philanthropiques. fourniront fourniront fourniront
>>Training...iteration:69
loss_en=494.103915
loss_fr=494.096466
 détection philanthropiques. fourniront fourniront fourniront
>>Training...iteration:70
loss_en=494.103961
loss_fr=494.096551
 détection philanthropiques. fourniront fourniront fourniront
>>Training...iteration:71
loss_en=494.104004
loss_fr=494.096635
 détection philanthropiques. fourniront fourniront fourniront
>>Training...iteration:72
loss_en=494.104047
loss_fr=494.096716
 détection philanthropiques. fourniront fourniront fourniront
>>Training...iteration:73
loss_en=494.104087
loss_fr=494.096795
 détection philanthropiques. fourniront fourniront fourniront
>>Training...iteration:74
loss_en=494.104125
loss_fr=494.096872
 détection philanthropiques. fourniront fourniront fourniront
>>Training...iteration:75
loss_en=494.104161
loss_fr=494

 l'ignorance fourniront philanthropiques. philanthropiques. philanthropiques.
>>Training...iteration:133
loss_en=494.104547
loss_fr=494.098717
 l'ignorance fourniront philanthropiques. philanthropiques. philanthropiques.
>>Training...iteration:134
loss_en=494.104539
loss_fr=494.098757
 l'ignorance fourniront philanthropiques. philanthropiques. philanthropiques.
>>Training...iteration:135
loss_en=494.104531
loss_fr=494.098799
 l'ignorance fourniront philanthropiques. philanthropiques. philanthropiques.
>>Training...iteration:136
loss_en=494.104523
loss_fr=494.098844
 l'ignorance fourniront philanthropiques. philanthropiques. philanthropiques.
>>Training...iteration:137
loss_en=494.104514
loss_fr=494.098890
 l'ignorance fourniront philanthropiques. philanthropiques. philanthropiques.
>>Training...iteration:138
loss_en=494.104505
loss_fr=494.098938
 l'ignorance fourniront philanthropiques. philanthropiques. philanthropiques.
>>Training...iteration:139
loss_en=494.104497
loss_fr=494.098987

>>Training...iteration:192
loss_en=494.104905
loss_fr=494.101170
 fourniront fourniront fourniront philanthropiques. philanthropiques.
>>Training...iteration:193
loss_en=494.104929
loss_fr=494.101217
 fourniront fourniront fourniront philanthropiques. philanthropiques.
>>Training...iteration:194
loss_en=494.104953
loss_fr=494.101263
 fourniront fourniront fourniront philanthropiques. philanthropiques.
>>Training...iteration:195
loss_en=494.104978
loss_fr=494.101309
 fourniront fourniront fourniront philanthropiques. philanthropiques.
>>Training...iteration:196
loss_en=494.105003
loss_fr=494.101354
 fourniront fourniront fourniront philanthropiques. philanthropiques.
>>Training...iteration:197
loss_en=494.105028
loss_fr=494.101398
 fourniront fourniront fourniront philanthropiques. philanthropiques.
>>Training...iteration:198
loss_en=494.105054
loss_fr=494.101442
 fourniront fourniront fourniront philanthropiques. philanthropiques.
>>Training...iteration:199
loss_en=494.105080
loss_fr=4

 fourniront fourniront fourniront fourniront fourniront
>>Training...iteration:255
loss_en=494.106447
loss_fr=494.102583
 fourniront fourniront fourniront fourniront fourniront
>>Training...iteration:256
loss_en=494.106464
loss_fr=494.102585
 fourniront fourniront fourniront fourniront fourniront
>>Training...iteration:257
loss_en=494.106480
loss_fr=494.102587
 fourniront fourniront fourniront fourniront fourniront
>>Training...iteration:258
loss_en=494.106497
loss_fr=494.102589
 fourniront fourniront fourniront fourniront fourniront
>>Training...iteration:259
loss_en=494.106513
loss_fr=494.102591
 fourniront fourniront fourniront fourniront fourniront
>>Training...iteration:260
loss_en=494.106528
loss_fr=494.102592
 fourniront fourniront fourniront fourniront fourniront
>>Training...iteration:261
loss_en=494.106544
loss_fr=494.102594
 fourniront fourniront fourniront fourniront fourniront
>>Training...iteration:262
loss_en=494.106559
loss_fr=494.102595
 fourniront fourniront fourniron

In [None]:
# np.savez("weights.en",wxh_en=wxh_en,whh_en=whh_en,why_en=why_en,bh_en=bh_en,by_en=by_en)
# np.savez("weights.fr",wxh_fr=wxh_fr,whh_fr=whh_fr,why_fr=why_fr,bh_fr=bh_fr,by_fr=by_fr)