# Importing Files

In [0]:
import tensorflow as tf
import pandas as pd
import os
import numpy as np
import nltk

In [0]:
!pip install --upgrade gensim

In [0]:
from gensim.models import Word2Vec
import gensim

In [2]:
from google.colab import drive
drive.mount('/content/drive/')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive/


# Loading Friends Transcripts data

In [0]:
friends_data=pd.read_csv('/content/drive/My Drive/friends-final.txt',sep="\t")

In [0]:
friends_data.head()

Unnamed: 0,id,scene_id,person,gender,original_line,line,metadata,filename
0,1,1,MONICA,F,Monica: There's nothing to tell! He's just som...,Theres nothing to tell Hes just some guy I wor...,There_EX 's_VBZ nothing_PN1 to_TO tell_VVI !_!...,0101.txt
1,101,1,JOEY,M,"Joey: C'mon, you're going out with the guy! Th...",Cmon youre going out with the guy Theres gotta...,C'm_VV0 on_RP you_PPY 're_VBR going_VVG out_RP...,0101.txt
2,201,1,CHANDLER,M,"Chandler: All right Joey, be nice. So does he...",Alright Joey be nice So does he have a hump A ...,All_RR21 right_RR22 Joey_NP1 be_VBI nice_JJ ._...,0101.txt
3,301,1,PHOEBE,F,"Phoebe: Wait, does he eat chalk?",Wait does he eat chalk,Wait_VV0 does_VDZ he_PPHS1 eat_VVI chalk_NN1 ?_?,0101.txt
4,401,1,PHOEBE,F,"Phoebe: Just, 'cause, I don't want her to go t...",Just cause I dont want her to go through what ...,Just_RR 'cause_CS I_PPIS1 do_VD0 n't_XX want_V...,0101.txt


In [0]:
friends_data.iloc[len(friends_data)-1]['original_line']

'Chandler: Sure. Where?'

# Removing Punctuations

In [0]:
friends_data["line"] = friends_data['line'].str.replace('[^\w\s]','')

In [0]:
friends_data['line'] = friends_data['line'].str.replace(',','')
friends_data['line'] = friends_data['line'].str.replace('.',' ')
friends_data['line'] = friends_data['line'].str.replace('!','')

# Dialogue count of each of the characters

In [4]:
friends_data.person.value_counts()

RACHEL                             9207
ROSS                               9027
CHANDLER                           8362
MONICA                             8324
JOEY                               8125
PHOEBE                             7453
MIKE                                358
ALL                                 343
RICHARD                             281
JANICE                              217
MR. GELLER                          204
CAROL                               193
CHARLIE                             190
EMILY                               167
MRS. GELLER                         164
TAG                                 146
FRANK                               146
DIRECTOR                            135
PAUL                                133
GUNTHER                             130
AMY                                 122
DAVID                               120
MONA                                111
WOMAN                               105
SUSAN                               104


In [0]:
friends_data[friends_data]

# We tried 3 approaches to create context-response pairs

In [0]:
x=[]
y=[]

# Approach 1: Concatenation of All dialogues previous to Ross as Context and Ross's Dialogue as Reply

In [0]:
s=""
for index,row in friends_data.iterrows():
    if row['person']=="ROSS":
        y.append(row['line'])
        x.append(s)
        s=""
    else:
        s+=row['line']
        
print(x[:1])
print(y[:1])

['Theres nothing to tell Hes just some guy I work withCmon youre going out with the guy Theres gotta be something wrong with himAlright Joey be nice So does he have a hump A hump and a hairpieceWait does he eat chalkJust cause I dont want her to go through what I went through with Carl ohOkay everybody relax This is not even a date Its just two people going out to dinner and not having sexSounds like a date to meAlright so Im back in high school Im standing in the middle of the cafeteria and I realize I am totally nakedOh yeah Had that dreamThen I look down and I realize theres a phone thereInstead ofThats rightNever had that dreamNoAll of a sudden the phone starts to ring Now I dont know what to do everybody starts looking at meAnd they werent looking at you beforeFinally I figure Id better answer it and it turns out its my mother which is very very weird because she never calls me']
['Hi']


# Approach 2: Sequential (Dialogue 1-2 as one pair, 2-3 as another pair and so on..)

In [0]:
for i in range(len(friends_data)-1):
  x.append(friends_data.iloc[i]['line'])
  y.append(friends_data.iloc[i+1]['line'])
print(x[:1])
print(y[:1])

['Theres nothing to tell Hes just some guy I work with']
['Cmon youre going out with the guy Theres gotta be something wrong with him']


# Approach 3: All dialogues previous to Ross individually taken as Context as opposed to Ross's reply. (Best Model)

In [0]:
temp=[]
for index,row in friends_data.iterrows():
    if row['person']=="ROSS":
        for item in temp:
          y.append(row['line'])
          x.append(item)
        temp=[]
    else:
        temp.append(row['line'])
        
print(x[:2])
print(y[:2])

['Theres nothing to tell Hes just some guy I work with', 'Cmon youre going out with the guy Theres gotta be something wrong with him']
['Hi', 'Hi']


# Length of X-Y (Context Response Pairs) Lists

In [0]:
print(len(x))
print(len(y))

51812
51812


In [0]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

# Tokenisation

In [0]:
tok_x=[]
tok_y=[]
for i in range(len(x)):
    tok_x.append(nltk.word_tokenize(x[i].lower()))
    tok_y.append(nltk.word_tokenize(y[i].lower()))

In [0]:
tok_x[:2]

[['theres',
  'nothing',
  'to',
  'tell',
  'hes',
  'just',
  'some',
  'guy',
  'i',
  'work',
  'with'],
 ['cmon',
  'youre',
  'going',
  'out',
  'with',
  'the',
  'guy',
  'theres',
  'got',
  'ta',
  'be',
  'something',
  'wrong',
  'with',
  'him']]

In [0]:
tok_y[:20]

[['hi'],
 ['hi'],
 ['hi'],
 ['hi'],
 ['hi'],
 ['hi'],
 ['hi'],
 ['hi'],
 ['hi'],
 ['hi'],
 ['hi'],
 ['hi'],
 ['hi'],
 ['hi'],
 ['hi'],
 ['hi'],
 ['hi'],
 ['i',
  'just',
  'feel',
  'like',
  'someone',
  'reached',
  'down',
  'my',
  'throat',
  'grabbed',
  'my',
  'small',
  'intestine',
  'pulled',
  'it',
  'out',
  'of',
  'my',
  'mouth',
  'and',
  'tied',
  'it',
  'around',
  'my',
  'neck'],
 ['i',
  'just',
  'feel',
  'like',
  'someone',
  'reached',
  'down',
  'my',
  'throat',
  'grabbed',
  'my',
  'small',
  'intestine',
  'pulled',
  'it',
  'out',
  'of',
  'my',
  'mouth',
  'and',
  'tied',
  'it',
  'around',
  'my',
  'neck'],
 ['thanks']]

# Word Embeddings using Gensim

In [0]:
model=Word2Vec(tok_x+tok_y,min_count=1)

In [0]:
print(modelW2["there"])

[-0.084215   0.69597    0.28383   -0.22497   -0.55923    0.21196
 -0.15175    0.31601    0.24803   -0.36385    0.2252     0.34109
  0.61438    0.08318    0.74894   -0.38785   -0.27211    0.2656
 -0.66332    0.45571    0.34949    0.38635    0.25707   -0.60101
 -0.047292  -0.41198    0.38246   -0.52151    0.037757  -0.42492
 -0.36352   -0.037377   0.31263    0.19709    0.008142   0.53871
 -0.074505   0.31395    0.35129   -0.39005   -0.5474    -0.11395
  0.076668  -0.61069    0.15894   -0.33043    0.75967   -0.52289
 -0.38957   -0.71164    0.28724   -0.35683    0.050529   1.3392
 -0.16064   -2.9236     0.17524   -0.42109    1.5235     0.85181
 -0.47563    1.1225    -0.48463    0.36458    0.97809   -0.2227
  0.88791    0.068738   0.22557    0.026459  -0.032799  -0.40426
  0.14579   -0.4535     0.47756    0.15933    0.24236    0.0091798
 -1.3502    -0.078424   0.57803   -0.50061   -0.19374    0.29587
 -1.2316    -0.099825   0.31661   -0.46539   -0.54243   -0.13506
 -0.12897   -0.29543   -0.

In [0]:
vec_x=[]
vec_y=[]

# Sentence Embeddings

In [0]:
for sent in tok_x:
    sentvec=[modelW2[w] for w in sent if w in modelW2.wv.vocab]
    vec_x.append(sentvec)
for sent in tok_y:
    sentvec=[modelW2[w] for w in sent if w in modelW2.wv.vocab]
    vec_y.append(sentvec)

  
  """


In [0]:
vec_x[0][0].shape

(100,)

# Padding/Truncating To make sentences even length (15)

In [0]:
sentend=np.ones(100,dtype=np.float32)

In [0]:
for tok_sent in vec_x:
  tok_sent[14:]=[]
  tok_sent.append(sentend)

In [0]:
for tok_sent in vec_x:
  if(len(tok_sent)<15):
    for i in range(15-len(tok_sent)):
      tok_sent.append(sentend)

In [0]:
for tok_sent in vec_y:
  tok_sent[14:]=[]
  tok_sent.append(sentend)

In [0]:
for tok_sent in vec_y:
  if(len(tok_sent)<15):
    for i in range(15-len(tok_sent)):
      tok_sent.append(sentend)

In [0]:
# size = 0
# for i in vec_x:
#   size = max(size, len(i))
# print(size)


# for i in vec_x:
#   size = max(size, len(i))


In [0]:
vec_X=np.array(vec_x,dtype=np.float64)
vec_Y=np.array(vec_y,dtype=np.float64)

In [0]:
vec_X.shape

(51812, 15, 100)

In [0]:
!pip install -U scikit-learn

In [0]:
from sklearn.model_selection import train_test_split

# Splitting into Training and Testing (80:20)

In [0]:
x_train,x_test,y_train,y_test = train_test_split(vec_X,vec_Y,test_size=0.2,random_state=1)

In [0]:
# !pip install keras

In [0]:
from keras.models import Sequential

In [0]:
model1=Sequential()

In [0]:
# x_train=tf.keras.utils.normalize(x_train,axis=1)
# x_test=tf.keras.utils.normalize(x_test,axis=1)

In [0]:
from keras.layers.recurrent import LSTM

In [0]:
x_train.shape

(41449, 15, 100)

In [0]:
from keras.layers import Bidirectional

# A single Layer Attention based Bidirectional LSTM with Hidden Size 256  (Best Model)

In [0]:
model1.add(Bidirectional(LSTM(256,input_shape=(15,100),return_sequences=True,init='glorot_normal',inner_init='glorot_normal',activation='tanh')))

  """Entry point for launching an IPython kernel.


# Attention layer

In [0]:
model1.add(AttentionDecoder(256, 100))

In [0]:
# model1.add(Bidirectional(LSTM(output_dim=50,input_shape=(None,100),return_sequences=True,init='glorot_normal',inner_init='glorot_normal',activation='tanh')))

  """Entry point for launching an IPython kernel.


In [0]:
# model1.add(Bidirectional(LSTM(output_dim=50,input_shape=(None,100),return_sequences=True,init='glorot_normal',inner_init='glorot_normal',activation='tanh')))

  """Entry point for launching an IPython kernel.


In [0]:
# model1.add(Bidirectional(LSTM(output_dim=50,input_shape=(None,100),return_sequences=True,init='glorot_normal',inner_init='glorot_normal',activation='tanh')))

  """Entry point for launching an IPython kernel.


# Cross Entropy Loss (Best model)

In [0]:
model1.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [0]:
model1.fit(x_train,y_train,nb_epoch=10,validation_data=(x_test,y_test))
model1.save('LSTM10.h5')
from google.colab import files
files.download('LSTM10.h5')

  """Entry point for launching an IPython kernel.


Train on 41449 samples, validate on 10363 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


KeyboardInterrupt: ignored

In [0]:
model1.fit(x_train,y_train,nb_epoch=10,validation_data=(x_test,y_test))
model1.save('LSTM20.h5')
from google.colab import files
files.download('LSTM20.h5')

  """Entry point for launching an IPython kernel.


Train on 41449 samples, validate on 10363 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


KeyboardInterrupt: ignored

In [0]:
model1.fit(x_train,y_train,nb_epoch=10,validation_data=(x_test,y_test))
model1.save('LSTM30.h5')
from google.colab import files
files.download('LSTM30.h5')

In [0]:
model1.fit(x_train,y_train,nb_epoch=10,validation_data=(x_test,y_test))
model1.save('LSTM40.h5')
from google.colab import files
files.download('LSTM40.h5')

In [0]:
model1.fit(x_train,y_train,nb_epoch=10,validation_data=(x_test,y_test))
model1.save('LSTM50.h5')
from google.colab import files
files.download('LSTM50.h5')

In [0]:
predictions=model1.predict(x_test)

In [0]:
[model.most_similar([predictions[1999][i]])[0] for i in range(15)]

In [0]:
    x="coffee?";
    sentend=np.ones((100,),dtype=np.float32) 

    sent=nltk.word_tokenize(x.lower())
    sentvec = [model[w] for w in sent if w in model.wv.vocab]

    sentvec[14:]=[]
    sentvec.append(sentend)
    if len(sentvec)<15:
        for i in range(15-len(sentvec)):
            sentvec.append(sentend) 
    sentvec=np.array([sentvec])
    
    predictions = model1.predict(sentvec)
    outputlist=[model.most_similar([predictions[0][i]])[0][0] for i in range(15)]
    output=' '.join(outputlist)
    print(output)

all scrunchy concerts scrunchy scrunchy scrunchy scrunchy scrunchy scrunchy scrunchy scrunchy scrunchy scrunchy scrunchy scrunchy


  """
  from ipykernel import kernelapp as app
  if np.issubdtype(vec.dtype, np.int):


# Using Glove (Used in best model)

In [0]:
from gensim.scripts.glove2word2vec import glove2word2vec

In [0]:
glove_input_file = '/content/drive/My Drive/glove.6B.100d.txt'
word2vec_output_file = '/content/drive/My Drive/glove.6B.100d.txt.word2vec'
glove2word2vec(glove_input_file, word2vec_output_file)

(400000, 100)

In [0]:
from gensim.models import KeyedVectors
# load the Stanford GloVe model
filename = '/content/drive/My Drive/glove.6B.100d.txt.word2vec'
modelW2 = KeyedVectors.load_word2vec_format(filename, binary=False)

In [0]:
vec_xW2=[]
vec_yW2=[]

In [0]:
for sent in tok_x:
    sentvecW2=[modelW2[w] for w in sent if w in modelW2.wv.vocab]
    vec_xW2.append(sentvecW2)
for sent in tok_y:
    sentvecW2=[modelW2[w] for w in sent if w in modelW2.wv.vocab]
    vec_yW2.append(sentvecW2)

  
  """


In [0]:
print(modelW2['intestine'])

[-0.62042   -0.44278   -0.2186    -0.30858   -0.13381    0.81947
 -0.33076    0.57991    1.3364    -0.073734  -0.19904    0.50422
 -0.073423  -0.017576   0.3943     0.15761   -1.0546     0.19131
  0.86593   -0.84506   -0.36739    0.080655  -0.77168    0.38065
  0.72234    0.60594   -0.3287    -0.11697    0.4411     0.43
  0.56769    0.0022013  0.61463   -0.1917     0.044901   0.63421
  0.93268    0.24632   -0.49653    0.56145    0.54537   -0.15868
 -0.55126   -0.66917    0.27606    0.27682   -0.053004   0.053204
 -0.3405     1.2517     0.52948    0.31161    0.19862   -0.30229
 -0.98811    0.015316  -0.69401   -0.6074    -0.15286    0.33907
 -0.30487    0.74635    1.3338     0.73427    0.78453   -0.037469
 -0.53143   -0.80343    0.388     -1.121      0.21157   -0.0096739
  0.46897    0.35062    0.16508   -0.41812   -1.1375     0.1181
 -0.08616    0.44691   -0.25886    1.2161    -0.83375    0.64782
 -0.60257   -0.13727    0.67729    0.094459  -0.93826    0.27251
  0.091865  -0.80519    0

In [0]:
vec_xW2[0][0].shape

(100,)

In [0]:
size = 0
for i in vec_xW2:
  size = max(size, len(i))
print(size)

2470


In [0]:
for tok_sent in vec_xW2:
  tok_sent[:-14]=[]
  tok_sent.append(sentend)

In [0]:
for tok_sent in vec_xW2:
  if(len(tok_sent)<15):
    for i in range(15-len(tok_sent)):
      tok_sent.append(sentend)

In [0]:
for tok_sent in vec_yW2:
  tok_sent[14:]=[]
  tok_sent.append(sentend)

In [0]:
for tok_sent in vec_yW2:
  if(len(tok_sent)<15):
    for i in range(15-len(tok_sent)):
      tok_sent.append(sentend)

In [0]:
vec_XW2=np.array(vec_xW2,dtype=np.float64)
vec_YW2=np.array(vec_yW2,dtype=np.float64)

In [0]:
# vec_xW2=tf.keras.preprocessing.sequence.pad_sequences(vec_xW2,dtype='float64',padding='post',maxlen=15)
vec_XW2=np.array(vec_xW2,dtype=np.float64)
# X_train = sequence.pad_sequences(X_train, dtype='float32')

In [0]:
# vec_yW2=tf.keras.preprocessing.sequence.pad_sequences(vec_yW2,dtype='float64',padding='post',maxlen=15)
vec_YW2=np.array(vec_yW2,dtype=np.float64)
# X_train = sequence.pad_sequences(X_train, dtype='float32')

In [0]:
x_trainW2,x_testW2,y_trainW2,y_testW2 = train_test_split(vec_XW2,vec_YW2,test_size=0.2,random_state=1)

In [0]:
x_trainW2.shape

(7221, 15, 100)

In [0]:
model1.fit(x_trainW2,y_trainW2,nb_epoch=5,validation_data=(x_testW2,y_testW2))

  """Entry point for launching an IPython kernel.


Train on 7221 samples, validate on 1806 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fca9ceaf940>

In [0]:
# vec_x=np.array(vec_x,dtype=np.float64)

In [0]:
predictionsW2=model1.predict(x_testW2)

In [0]:
[modelW2.most_similar([predictionsW2[5][i]])[0] for i in range(15)]

  if np.issubdtype(vec.dtype, np.int):


[('i', 0.9310392737388611),
 ('so', 0.9105113744735718),
 ('so', 0.8989253044128418),
 ('just', 0.8730629682540894),
 ('just', 0.8340003490447998),
 ('just', 0.7725508809089661),
 ('just', 0.7000322937965393),
 ('one', 0.6328343152999878),
 ('inside', 0.6009101867675781),
 ('inside', 0.5746651887893677),
 ('inside', 0.5522726774215698),
 ('inside', 0.5309525728225708),
 ('inside', 0.4965549111366272),
 ('lying', 0.44810929894447327),
 ('dodecahedral', 0.3835752010345459)]

In [0]:
x="Hi how are you doing what is your name?";
sentend=np.ones((100,),dtype=np.float32) 

sent=nltk.word_tokenize(x.lower())
sentvec = [modelW2[w] for w in sent if w in modelW2.wv.vocab]

sentvec[:-14]=[]
sentvec.append(sentend)
if len(sentvec)<15:
    for i in range(15-len(sentvec)):
        sentvec.append(sentend) 
sentvec=np.array([sentvec])

predictions = model1.predict(sentvec)
outputlist=[modelW2.most_similar([predictions[0][i]])[0][0] for i in range(15)]
output=' '.join(outputlist)
print(output)

  """
  if np.issubdtype(vec.dtype, np.int):


i so so just just just just one inside inside inside inside inside lying orientable


# Attention Mechanism Implementation

Source:  [Attention for keras](https://machinelearningmastery.com/encoder-decoder-attention-sequence-to-sequence-prediction-keras/)



In [0]:
import tensorflow as tf
from keras import backend as K
from keras import regularizers, constraints, initializers, activations
from keras.layers.recurrent import Recurrent
from keras.engine import InputSpec

tfPrint = lambda d, T: tf.Print(input_=T, data=[T, tf.shape(T)], message=d)

def _time_distributed_dense(x, w, b=None, dropout=None,
                           input_dim=None, output_dim=None, timesteps=None):
    '''Apply y.w + b for every temporal slice y of x.
    '''
    if not input_dim:
        # won't work with TensorFlow
        input_dim = K.shape(x)[2]
    if not timesteps:
        # won't work with TensorFlow
        timesteps = K.shape(x)[1]
    if not output_dim:
        # won't work with TensorFlow
        output_dim = K.shape(w)[1]

    if dropout:
        # apply the same dropout pattern at every timestep
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x *= expanded_dropout_matrix

    # collapse time dimension and batch dimension together
    x = K.reshape(x, (-1, input_dim))

    x = K.dot(x, w)
    if b:
        x = x + b
    # reshape to 3D tensor
    x = K.reshape(x, (-1, timesteps, output_dim))
    return x

class AttentionDecoder(Recurrent):

    def __init__(self, units, output_dim,
                 activation='tanh',
                 return_probabilities=False,
                 name='AttentionDecoder',
                 kernel_initializer='glorot_uniform',
                 recurrent_initializer='orthogonal',
                 bias_initializer='zeros',
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 **kwargs):
        """
        Implements an AttentionDecoder that takes in a sequence encoded by an
        encoder and outputs the decoded states
        :param units: dimension of the hidden state and the attention matrices
        :param output_dim: the number of labels in the output space

        references:
            Bahdanau, Dzmitry, Kyunghyun Cho, and Yoshua Bengio.
            "Neural machine translation by jointly learning to align and translate."
            arXiv preprint arXiv:1409.0473 (2014).
        """
        self.units = units
        self.output_dim = output_dim
        self.return_probabilities = return_probabilities
        self.activation = activations.get(activation)
        self.kernel_initializer = initializers.get(kernel_initializer)
        self.recurrent_initializer = initializers.get(recurrent_initializer)
        self.bias_initializer = initializers.get(bias_initializer)

        self.kernel_regularizer = regularizers.get(kernel_regularizer)
        self.recurrent_regularizer = regularizers.get(kernel_regularizer)
        self.bias_regularizer = regularizers.get(bias_regularizer)
        self.activity_regularizer = regularizers.get(activity_regularizer)

        self.kernel_constraint = constraints.get(kernel_constraint)
        self.recurrent_constraint = constraints.get(kernel_constraint)
        self.bias_constraint = constraints.get(bias_constraint)

        super(AttentionDecoder, self).__init__(**kwargs)
        self.name = name
        self.return_sequences = True  # must return sequences

    def build(self, input_shape):
        """
          See Appendix 2 of Bahdanau 2014, arXiv:1409.0473
          for model details that correspond to the matrices here.
        """

        self.batch_size, self.timesteps, self.input_dim = input_shape

        if self.stateful:
            super(AttentionDecoder, self).reset_states()

        self.states = [None, None]  # y, s

        """
            Matrices for creating the context vector
        """

        self.V_a = self.add_weight(shape=(self.units,),
                                   name='V_a',
                                   initializer=self.kernel_initializer,
                                   regularizer=self.kernel_regularizer,
                                   constraint=self.kernel_constraint)
        self.W_a = self.add_weight(shape=(self.units, self.units),
                                   name='W_a',
                                   initializer=self.kernel_initializer,
                                   regularizer=self.kernel_regularizer,
                                   constraint=self.kernel_constraint)
        self.U_a = self.add_weight(shape=(self.input_dim, self.units),
                                   name='U_a',
                                   initializer=self.kernel_initializer,
                                   regularizer=self.kernel_regularizer,
                                   constraint=self.kernel_constraint)
        self.b_a = self.add_weight(shape=(self.units,),
                                   name='b_a',
                                   initializer=self.bias_initializer,
                                   regularizer=self.bias_regularizer,
                                   constraint=self.bias_constraint)
        """
            Matrices for the r (reset) gate
        """
        self.C_r = self.add_weight(shape=(self.input_dim, self.units),
                                   name='C_r',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.U_r = self.add_weight(shape=(self.units, self.units),
                                   name='U_r',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.W_r = self.add_weight(shape=(self.output_dim, self.units),
                                   name='W_r',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.b_r = self.add_weight(shape=(self.units, ),
                                   name='b_r',
                                   initializer=self.bias_initializer,
                                   regularizer=self.bias_regularizer,
                                   constraint=self.bias_constraint)

        """
            Matrices for the z (update) gate
        """
        self.C_z = self.add_weight(shape=(self.input_dim, self.units),
                                   name='C_z',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.U_z = self.add_weight(shape=(self.units, self.units),
                                   name='U_z',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.W_z = self.add_weight(shape=(self.output_dim, self.units),
                                   name='W_z',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.b_z = self.add_weight(shape=(self.units, ),
                                   name='b_z',
                                   initializer=self.bias_initializer,
                                   regularizer=self.bias_regularizer,
                                   constraint=self.bias_constraint)
        """
            Matrices for the proposal
        """
        self.C_p = self.add_weight(shape=(self.input_dim, self.units),
                                   name='C_p',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.U_p = self.add_weight(shape=(self.units, self.units),
                                   name='U_p',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.W_p = self.add_weight(shape=(self.output_dim, self.units),
                                   name='W_p',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.b_p = self.add_weight(shape=(self.units, ),
                                   name='b_p',
                                   initializer=self.bias_initializer,
                                   regularizer=self.bias_regularizer,
                                   constraint=self.bias_constraint)
        """
            Matrices for making the final prediction vector
        """
        self.C_o = self.add_weight(shape=(self.input_dim, self.output_dim),
                                   name='C_o',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.U_o = self.add_weight(shape=(self.units, self.output_dim),
                                   name='U_o',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.W_o = self.add_weight(shape=(self.output_dim, self.output_dim),
                                   name='W_o',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.b_o = self.add_weight(shape=(self.output_dim, ),
                                   name='b_o',
                                   initializer=self.bias_initializer,
                                   regularizer=self.bias_regularizer,
                                   constraint=self.bias_constraint)

        # For creating the initial state:
        self.W_s = self.add_weight(shape=(self.input_dim, self.units),
                                   name='W_s',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)

        self.input_spec = [
            InputSpec(shape=(self.batch_size, self.timesteps, self.input_dim))]
        self.built = True

    def call(self, x):
        # store the whole sequence so we can "attend" to it at each timestep
        self.x_seq = x

        # apply the a dense layer over the time dimension of the sequence
        # do it here because it doesn't depend on any previous steps
        # thefore we can save computation time:
        self._uxpb = _time_distributed_dense(self.x_seq, self.U_a, b=self.b_a,
                                             input_dim=self.input_dim,
                                             timesteps=self.timesteps,
                                             output_dim=self.units)

        return super(AttentionDecoder, self).call(x)

    def get_initial_state(self, inputs):
        # apply the matrix on the first time step to get the initial s0.
        s0 = activations.tanh(K.dot(inputs[:, 0], self.W_s))

        # from keras.layers.recurrent to initialize a vector of (batchsize,
        # output_dim)
        y0 = K.zeros_like(inputs)  # (samples, timesteps, input_dims)
        y0 = K.sum(y0, axis=(1, 2))  # (samples, )
        y0 = K.expand_dims(y0)  # (samples, 1)
        y0 = K.tile(y0, [1, self.output_dim])

        return [y0, s0]

    def step(self, x, states):

        ytm, stm = states

        # repeat the hidden state to the length of the sequence
        _stm = K.repeat(stm, self.timesteps)

        # now multiplty the weight matrix with the repeated hidden state
        _Wxstm = K.dot(_stm, self.W_a)

        # calculate the attention probabilities
        # this relates how much other timesteps contributed to this one.
        et = K.dot(activations.tanh(_Wxstm + self._uxpb),
                   K.expand_dims(self.V_a))
        at = K.exp(et)
        at_sum = K.sum(at, axis=1)
        at_sum_repeated = K.repeat(at_sum, self.timesteps)
        at /= at_sum_repeated  # vector of size (batchsize, timesteps, 1)

        # calculate the context vector
        context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1)
        # ~~~> calculate new hidden state
        # first calculate the "r" gate:

        rt = activations.sigmoid(
            K.dot(ytm, self.W_r)
            + K.dot(stm, self.U_r)
            + K.dot(context, self.C_r)
            + self.b_r)

        # now calculate the "z" gate
        zt = activations.sigmoid(
            K.dot(ytm, self.W_z)
            + K.dot(stm, self.U_z)
            + K.dot(context, self.C_z)
            + self.b_z)

        # calculate the proposal hidden state:
        s_tp = activations.tanh(
            K.dot(ytm, self.W_p)
            + K.dot((rt * stm), self.U_p)
            + K.dot(context, self.C_p)
            + self.b_p)

        # new hidden state:
        st = (1-zt)*stm + zt * s_tp

        yt = activations.softmax(
            K.dot(ytm, self.W_o)
            + K.dot(stm, self.U_o)
            + K.dot(context, self.C_o)
            + self.b_o)

        if self.return_probabilities:
            return at, [yt, st]
        else:
            return yt, [yt, st]

    def compute_output_shape(self, input_shape):
        """
            For Keras internal compatability checking
        """
        if self.return_probabilities:
            return (None, self.timesteps, self.timesteps)
        else:
            return (None, self.timesteps, self.output_dim)

    def get_config(self):
        """
            For rebuilding models on load time.
        """
        config = {
            'output_dim': self.output_dim,
            'units': self.units,
            'return_probabilities': self.return_probabilities
        }
        base_config = super(AttentionDecoder, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [0]:
!pip install chatterbot

In [0]:
from chatterbot import ChatBot
from chatterbot.trainers import ListTrainer

In [0]:
chatterbot = ChatBot('Ron Obvious',read_only=True)

In [0]:
# trainer = ChatterBotCorpusTrainer(chatbot)

In [0]:
# trainer.train(friends_data)

In [0]:
chatterbot.set_trainer(ListTrainer)

In [0]:
chatterbot.train(x)

List Trainer: [####################] 100%


In [0]:
print(chatterbot.get_response("Hi"))

In [0]:
print(chatterbot.get_response("Hi"))

This guy says hello I wanna kill myself


In [0]:
print(chatterbot.get_response("kya kr rhi thi?"))

kya kr rhi thi?
