In [1]:
import numpy as np 
import pandas as pd
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from keras.callbacks import EarlyStopping
from keras.layers import Dropout
from sklearn.metrics import classification_report
from keras.callbacks import EarlyStopping

In [2]:
#df = pd.read_csv("dataset/ask_play_station_preprocessed.csv")
df = pd.read_csv("dataset/mixed_data_preprocessed_fixed.csv")
df.dropna(inplace = True)
df

Unnamed: 0,Company,User
0,@115820 I'm sorry we've let you down! Without ...,Way to drop the ball on customer service @1158...
1,@115820 We'd like to take a further look into ...,@AmazonHelp 3 different people have given 3 di...
2,@115822 I am unable to affect your account via...,@115823 I want my amazon payments account CLOS...
3,@115826 I'm sorry for the wait. You'll receive...,@115828 How about you guys figure out my Xbox ...
4,@115827 Thanks for your patience. ^KM,@AmazonHelp @115826 Yeah this is crazy we’re l...
...,...,...
18663,@157688 I'm reaching out to this location's le...,@ChipotleTweets Fort Worth off heritage Trace :(
18664,@157689 Sorry for the trouble. What location w...,@ChipotleTweets Slow down! Half/half rice shou...
18665,@157689 I'm sharing your concerns with their l...,"@ChipotleTweets 818 Howe St, Vancouver, BC. 12..."
18666,@157690 I'd be disappointed too. Let a manager...,@ChipotleTweets I just got home from chipotle ...


In [3]:
for index in df.index:
    df.loc[index,'Company'] = '<SOS> ' + df.loc[index,'Company'] + ' <EOS>'
df

Unnamed: 0,Company,User
0,<SOS> @115820 I'm sorry we've let you down! Wi...,Way to drop the ball on customer service @1158...
1,<SOS> @115820 We'd like to take a further look...,@AmazonHelp 3 different people have given 3 di...
2,<SOS> @115822 I am unable to affect your accou...,@115823 I want my amazon payments account CLOS...
3,<SOS> @115826 I'm sorry for the wait. You'll r...,@115828 How about you guys figure out my Xbox ...
4,<SOS> @115827 Thanks for your patience. ^KM <EOS>,@AmazonHelp @115826 Yeah this is crazy we’re l...
...,...,...
18663,<SOS> @157688 I'm reaching out to this locatio...,@ChipotleTweets Fort Worth off heritage Trace :(
18664,<SOS> @157689 Sorry for the trouble. What loca...,@ChipotleTweets Slow down! Half/half rice shou...
18665,<SOS> @157689 I'm sharing your concerns with t...,"@ChipotleTweets 818 Howe St, Vancouver, BC. 12..."
18666,<SOS> @157690 I'd be disappointed too. Let a m...,@ChipotleTweets I just got home from chipotle ...


In [4]:
train_data, test_data = train_test_split(df, test_size=0.3, random_state=42)

In [5]:
text_data = np.concatenate((train_data['User'].values, train_data['Company'].values))
text_data

array(['@Uber_Support You\'ve literally taken $50 worth from me at least for being "late" .... late being 2 minutes after I come outside',
       '@sainsburys Bracknell as always, the flagship shit hole of Berkshire. No one cares anymore and it wasn’t busy',
       "@XboxSupport Ya know.. it's really hard to figure out how to find out how to order spare parts for the Xbox Elite Controller when this page never works.. URL_POSITION",
       ...,
       '<SOS> @133980 Hi Louise, you are right we do not own the car park but as we share it with other stores we can feed any issues back to those who do. Especially if these issues are putting our customers safety at risk! 1/3 <EOS>',
       "<SOS> @116103 We'll make sure to get this straightened out. DM us, and we'll see you there. URL_POSITION <EOS>",
       "<SOS> @135412 I'm really sorry about this Clare. Can you DM me your full name, address, email and order number please? I'll look into this. Guzala <EOS>"],
      dtype=object)

In [10]:
MAX_NB_WORDS = 10000

tokenizer = Tokenizer(num_words=MAX_NB_WORDS, filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~', lower=True)
tokenizer.fit_on_texts(text_data)
word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))

Found 28790 unique tokens.


In [13]:
word_counts  = tokenizer.word_counts 
rare_words_number = 0 
for index, (key, value) in enumerate(word_counts.items()):
    if (value < 2):
        rare_words_number = rare_words_number + 1
        #print( index, key, value)
rare_words_number

16318

In [14]:
train_data['User'].values[0]

'@Uber_Support You\'ve literally taken $50 worth from me at least for being "late" .... late being 2 minutes after I come outside'

In [15]:
seq = tokenizer.texts_to_sequences([train_data['User'].values[0]])
seq

[[145,
  70,
  521,
  1096,
  732,
  863,
  1119,
  47,
  33,
  38,
  842,
  9,
  238,
  295,
  295,
  238,
  54,
  307,
  170,
  7,
  340,
  1460]]

In [16]:
tokenizer.sequences_to_texts(seq)

["uber support you've literally taken 50 worth from me at least for being late late being 2 minutes after i come outside"]

In [17]:
train_X = tokenizer.texts_to_sequences(train_data['User'].values)
train_y = tokenizer.texts_to_sequences(train_data['Company'].values)

In [18]:
train_X[0]

[145,
 70,
 521,
 1096,
 732,
 863,
 1119,
 47,
 33,
 38,
 842,
 9,
 238,
 295,
 295,
 238,
 54,
 307,
 170,
 7,
 340,
 1460]

In [19]:
MAX_ANSWER_LENGTH = 0
for el in train_data['Company']:
    MAX_ANSWER_LENGTH = MAX_ANSWER_LENGTH + len(el.split())
MAX_ANSWER_LENGTH = MAX_ANSWER_LENGTH / len(train_data['Company'])
print(MAX_ANSWER_LENGTH)

22.605647815106757


In [20]:
MAX_QUESTION_LENGTH = 20
print(MAX_QUESTION_LENGTH)
MAX_ANSWER_LENGTH = 20
print(MAX_ANSWER_LENGTH)

20
20


In [21]:
train_X = pad_sequences(train_X, maxlen=MAX_QUESTION_LENGTH, padding='post', truncating='post')
train_y = pad_sequences(train_y, maxlen=MAX_ANSWER_LENGTH, padding='post', truncating='post')

In [22]:
len(train_X[0])

20

In [23]:
train_X[0][:18]

array([ 145,   70,  521, 1096,  732,  863, 1119,   47,   33,   38,  842,
          9,  238,  295,  295,  238,   54,  307])

In [24]:
train_y_final_output = []
for i in train_y:
    train_y_final_output.append(i[1:]) 

train_y_final_output = pad_sequences(train_y_final_output, MAX_ANSWER_LENGTH, padding='post', truncating='post')

In [25]:
train_y_final_output[0][:50]

array([ 96,   1, 114, 102,   5,  12,   5,  24,  56,  22,   6,  28, 242,
       156,  18,  10,  88,  78,   8,   0])

In [26]:
train_y[0][:18]

array([  3,  96,   1, 114, 102,   5,  12,   5,  24,  56,  22,   6,  28,
       242, 156,  18,  10,  88])

In [27]:
from tensorflow.keras.utils import to_categorical
train_y_final_output = to_categorical(train_y_final_output, MAX_NB_WORDS)

In [28]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Embedding, LSTM, Input


enc_inp = Input(shape=(20, ))
dec_inp = Input(shape=(20, ))

In [29]:
VOCAB_SIZE = MAX_NB_WORDS
embed = Embedding(VOCAB_SIZE+1, output_dim=20, 
                  input_length=20,
                  trainable=True                  
                  )

In [30]:
enc_embed = embed(enc_inp)
enc_lstm = LSTM(200, return_sequences=True, return_state=True)
enc_op, h, c = enc_lstm(enc_embed)
enc_states = [h, c]


In [31]:
embed2 = Embedding(VOCAB_SIZE+1, output_dim=20, 
                  input_length=20,
                  trainable=True                  
                  )

In [32]:
dec_embed = embed2(dec_inp)
dec_lstm = LSTM(200, return_sequences=True, return_state=True)
dec_op, _, _ = dec_lstm(dec_embed, initial_state=enc_states)

In [33]:
dense = Dense(VOCAB_SIZE, activation='softmax')
dense_op = dense(dec_op)
model = Model([enc_inp, dec_inp], dense_op)
model.compile(loss='categorical_crossentropy',metrics=['acc'],optimizer='adam')
print(model.summary())

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 20)]         0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, 20)]         0           []                               
                                                                                                  
 embedding (Embedding)          (None, 20, 20)       200020      ['input_1[0][0]']                
                                                                                                  
 embedding_1 (Embedding)        (None, 20, 20)       200020      ['input_2[0][0]']                
                                                                                              

In [34]:
model.fit([train_X, train_y],train_y_final_output,epochs=30, validation_split=0.2,
    callbacks=[EarlyStopping(monitor='val_loss', patience=3, min_delta=0.0001)])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30


<keras.callbacks.History at 0x27654c9d3d0>

In [35]:
model.save_weights('weights/mixed_lstm.h5')

In [36]:
enc_model = Model(enc_inp, enc_states)

In [37]:
decoder_state_input_h = Input(shape=(200,))
decoder_state_input_c = Input(shape=(200,))

In [38]:
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

In [39]:
decoder_outputs, state_h, state_c = dec_lstm(dec_embed , 
                                    initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
#decoder_outputs = dense(decoder_outputs)

In [40]:
dec_model = Model([dec_inp]+ decoder_states_inputs,
                                      [decoder_outputs]+ decoder_states)

In [41]:
test_question = tokenizer.texts_to_sequences([test_data['User'].values[2]])

#test_question = tokenizer.texts_to_sequences([train_data['User'].values[200]])
test_question

[[108, 3485, 241, 21, 53, 836, 1072, 1, 6750, 631, 603, 283]]

In [42]:
test_X = pad_sequences(test_question, maxlen=MAX_QUESTION_LENGTH, padding='post', truncating='post')
test_X

array([[ 108, 3485,  241,   21,   53,  836, 1072,    1, 6750,  631,  603,
         283,    0,    0,    0,    0,    0,    0,    0,    0]])

In [43]:
test_answer = enc_model.predict( test_X )

In [44]:
empty_target_seq = np.zeros( ( 1 , 1) )
empty_target_seq

array([[0.]])

In [45]:
empty_target_seq[0, 0] = tokenizer.texts_to_sequences(['sos'])[0][0]
empty_target_seq

array([[3.]])

In [46]:
stop_condition = False
decoded_translation=''
i = 0
repeat = 0
while not stop_condition :
    dec_outputs , h, c= dec_model.predict([empty_target_seq] + test_answer )
    decoder_concat_input = dense(dec_outputs)
    sampled_word_index = np.argmax( decoder_concat_input[0, -1, :] )
    
    if i == 0:
        i = 1
        indes = np.argpartition(decoder_concat_input[0, -1, :], -10)[-10:]
        indes = tokenizer.sequences_to_texts([indes])
        print('\n', indes)
        print('\n', sampled_word_index)
    
    sampled_word = tokenizer.sequences_to_texts([[sampled_word_index]])[0] + ' '
    print(decoded_translation)
    if sampled_word != 'eos ':
        prev = decoded_translation
        decoded_translation += sampled_word
        if (prev.rstrip() == decoded_translation.rstrip()):
            repeat = repeat + 1
        else:
            repeat = 0

    if sampled_word == 'eos ' or len(decoded_translation.split()) > 18:
        stop_condition = True
    if repeat > 2:
        stop_condition = True

    empty_target_seq = np.zeros( ( 1 , 1 ) )  
    empty_target_seq[ 0 , 0 ] = sampled_word_index
    ## <SOS> - > hi
    ## hi --> <EOS>
    test_answer = [h, c]

print("Question: ", test_data['User'].values[2])
print("\nExpected: ", test_data['Company'].values[2])
print("\nGiven: ")
print(decoded_translation)


 ["if this we're we oh hey sorry please hello hi"]

 37

hi 
hi there 
hi there please 
hi there please check 
hi there please check your 
hi there please check your dm's 
hi there please check your dm's for 
hi there please check your dm's for more 
hi there please check your dm's for more instructions 
Question:  @Delta ^^ possibly because it was asking him to swipe his credit card? ^*

Expected:  <SOS> @136422 Thank you, Josh, and it's our pleasure to convey your family member to his destination and to give help if any need should arise. Standing by for you, if anything, good man. *ADB <EOS>

Given: 
hi there please check your dm's for more instructions 


In [47]:
def predict_answer(question, expected_answer):
    tokenized_question = tokenizer.texts_to_sequences([question])
    input_question = pad_sequences(tokenized_question, maxlen=MAX_QUESTION_LENGTH, padding='post', truncating='post')
    test_answer = enc_model.predict( input_question )
    empty_target_seq = np.zeros( ( 1 , 1) )
    empty_target_seq[0, 0] = tokenizer.texts_to_sequences(['sos'])[0][0]
    stop_condition = False
    decoded_translation=''
    i = 0
    repeat = 0
    while not stop_condition :
        dec_outputs , h, c= dec_model.predict([empty_target_seq] + test_answer )
        decoder_concat_input = dense(dec_outputs)
        sampled_word_index = np.argmax( decoder_concat_input[0, -1, :] )

        sampled_word = tokenizer.sequences_to_texts([[sampled_word_index]])[0] + ' '
        if sampled_word != 'eos ':
            prev = decoded_translation
            decoded_translation += sampled_word
            if (prev.rstrip() == decoded_translation.rstrip()):
                repeat = repeat + 1
            else:
                repeat = 0

        if sampled_word == 'eos ' or len(decoded_translation.split()) > 18:
            stop_condition = True
        if repeat > 2:
            stop_condition = True

        empty_target_seq = np.zeros( ( 1 , 1 ) )  
        empty_target_seq[ 0 , 0 ] = sampled_word_index
        test_answer = [h, c]

    print("Question: ", question)
    print("\nExpected: ", expected_answer)
    print("\nGiven: ")
    print(decoded_translation)

In [48]:
for index, row in test_data[:200].iterrows():
    predict_answer(row['User'], row['Company'])

Question:  @SouthwestAir Any chances of flying nonstop to/from Las Vegas/Memphis?  We've been traveling this route for a few years with layovers in either Dallas and/or Chicago and majority of the plane walks to the next gate together.  THANKS!!

Expected:  <SOS> @126117 Hi there. While we don’t have plans for direct flights between Las Vegas and Memphis in the immediate future. Please know that we are constantly reevaluating our flight schedules, and I have forwarded your feedback on to our Network Planning Team. ^LC <EOS>

Given: 
hi i'm sorry to hear this please dm your confirmation number so we can take a look at this 
Question:  @AskPlayStation Yeah I've tried everything. Nothing.

Expected:  <SOS> @118883 Please open the link sent previously and search for a deactivation form, please fill it to request the deactivation. <EOS>

Given: 
hi there please check your dm's for more instructions 
Question:  @Delta ^^ possibly because it was asking him to swipe his credit card? ^*

Expect

Question:  @125650 @comcastcares That’s the beauty of having a monopoly. They don’t even have to *try* to provide good service or fix problems.

Expected:  <SOS> @125649 Hi, Stephanie! Thanks for reaching out. Let’s get that Internet issue fixed. Please DM your full name, address, and phone number? -GO <EOS>

Given: 
hello i would like to look into this for you please send us a dm with your email address 
Question:  @AmericanAir talk about ripping off the public, your airfare prices are insanely ridiculous. In one day the price went up $110.  #Ripoff

Expected:  <SOS> @136915 It sounds like our seats are going fast, Kristel. All our fares are based on advanced purchase and seat availability. <EOS>

Given: 
we're sorry to hear this was your experience with us we hope you have a great flight 
Question:  @British_Airways Hence the word ‘goodwill’. Executive club for nothing less than 10 years doesn’t get you goodwill with your airline then that’s just sad. No class whatsoever was displaye

Question:  @comcastcares What’s D S?

Expected:  <SOS> @117234 I'm not sure what you mean, but I'll be happy to look into it. Would you please DM the full service address and phone number? -JN <EOS>

Given: 
hey there we can help you with the service you can find helpful troubleshooting tips here url position 
Question:  @115900 If you just want to tell me if there's internet outages in Salisbury Mass that'd be great.

Expected:  <SOS> @116676 Hi, I would love to help. Please DM me the phone # associated with the account &amp; the service address. -Jules <EOS>

Given: 
i apologize for the inconvenience i will be happy to help can you dm us your full name address 
Question:  @AskPlayStation Hi! Having issue with an old account, no idea what the login/email is for it but I know the ID

Expected:  <SOS> @116217 Hi there, Please check your DM's for further instructions. <EOS>

Given: 
we're sorry to hear that please send us a dm with your email address so we can connect 
Question:  @Southw

Question:  @sainsburys could you kindly let me know if the SmartShop app is out of action please?

Expected:  <SOS> @116809 Hi Andrew, we're not aware of any ongoing issues. Can you provide some more info about the issues you're having? Corey <EOS>

Given: 
hi there can you confirm the store you usually shop on please rach 
Question:  @Ask_Spectrum But you still haven’t answered my question: what happens if I am not here for my install appt?

Expected:  <SOS> @133178 Good Morning, Allen. How can we be of assistance today? ^AH <EOS>

Given: 
good morning i'm sorry to hear this please dm your confirmation number so we can look into this for 
Question:  @115913 Hope you &amp; your Family had a wonderful Holiday . BTW  __EMOJI__heavy_heart_exclamation️
 I LOVE T-Mobil . BUT I went to pay my bill an I asked about a wire I bought that didn't work , The store Rep was EXTREMELY RUDE told me just buy another wire . I don't have a newer phone (it's 3 yrs old ) or a lot of $$.

Expected:  <SOS> @

Question:  @1756 @115858 Mine is shite mate....crashing, turning itself off, apps randomly freezing, won’t let me call anyone @115858 @AppleSupport

Expected:  <SOS> @124304 We'd like to look into this with you. When did these issues begin? Tell us in a DM. URL_POSITION <EOS>

Given: 
we want to help please dm us your account's email address we'll take a look backstage nq url position 
Question:  i just spent $3.00 even on a boorito from @ChipotleTweets  __EMOJI__smiling_face_with_heart-eyes

Expected:  <SOS> @115972 Best day of the year! -Becky <EOS>

Given: 
hi there we can help you with the service issues please dm your account number and phone number so 
Question:  Always a great day when Lynne's your train manager #bigfan @VirginTrains

Expected:  <SOS> @134456 Which service was this on, Dan? ^LC <EOS>

Given: 
we're sorry to hear this is your experience with us we can help with anything else 
Question:  @AmericanAir Correct, after multiple phone calls, they protected us in Econ l

Question:  Just returned from Edinburgh on @British_Airways awful late there and stuck on plan on return. @120200 better every time.

Expected:  <SOS> @120499 Oh we're sorry to hear this, Peter. We hope it didn't disrupt your plans. ^Alex <EOS>

Given: 
hey there we don't have any info on this and be sure to share your feedback with the relevant 
Question:  @Ask_Spectrum Any outages in North Hollywood? All services are down here.

Expected:  <SOS> @126194 I can look into the service impairments. Please DM your account number and service address. ^SS <EOS>

Given: 
i am sorry to hear that please send us a dm with your email address so we can    
Question:  @SpotifyCares Al necessary updates done. Empty cache done... web based spotify is faster. But I think my Spotify Premium deserves to be fast.

Expected:  <SOS> @128142 Thanks for letting us know. When did this start happening? Does logging out and back in help? You can also try restarting the app /CB <EOS>

Given: 
we know how importa

Question:  @115913 AFTER about 20 years with Slowrizon, we have been with T for about 7 and are very satisfied with it. ONLY issue we had was the Note 8 promotion; we were told we would get 2 wireless chargers to cover both phones whether we chose the camera or card. BUT only 1 has shown up. Help?

Expected:  <SOS> @148740 @115913 We want any promotions you take part in to go smoothly so you can be worry-free. Send a DM my way and we can work on a solution together so you can get the most out of your Note 8. URL_POSITION *CharlesOpacki <EOS>

Given: 
115913 we want to make sure you are having issues with your internet please dm us your account number 
Question:  @Tesco Cant see one for spotify

Expected:  <SOS> @126129 Hi Emma, unfortunately we don't sell digital Spotify cards, I'm sorry for any disappointment this may cause. - Peter <EOS>

Given: 
hi there can you dm us your account's email address or username we'll take a look backstage nq url 
Question:  Flying the early morning ski

Question:  @115888 @SpotifyCares

Expected:  <SOS> @129423 Hey Jack! You can let us know here. What's happening exactly? Can you tell us more info about it? /JE <EOS>

Given: 
hey there can you dm us your account's email address we'll take a look backstage nq url position 
Question:  @Ask_Spectrum no #internet in #Dallas, TX. Is there an #outrage? Thank you

Expected:  <SOS> @135151 My apologies for the service interruption.  Please follow/DM your service phone number and I would be happy to look into this for you. ^JK <EOS>

Given: 
we have sent you a dm with your account's email address we'll take a look backstage nq url position 
Question:  @SpotifyCares Ok thx for thr explanation!

Expected:  <SOS> @135366 No worries at all! For anything else, we're just a tweet away /JP <EOS>

Given: 
hey there can you dm us your account's email address we'll take a look backstage nq url position 
Question:  @SpotifyCares URL_POSITION I cannot even find it on the app anymore.  I'm set to United St

Question:  @sprintcare Thanks!

Expected:  <SOS> @128974 You are most welcome! Thank you for choosing Sprint. -JC <EOS>

Given: 
we want to help you out please send us a dm with your email address so we can follow 
Question:  I lose faith in humanity on days like this. My friend sends me a  __EMOJI__wrapped_gift from @115821 after a crappy week and I come home and it’s been stolen.

Expected:  <SOS> @118921 I'm so sorry! We'd like to look into our available options. Please reach us here: URL_POSITION ^AG <EOS>

Given: 
hey there we don't have any info on this and want to help please dm your account number and 
Question:  As @AmericanAir Executive Platinum I tweet to them often. They are an amazing team!  Immediate responses, incredibly helpful-invaluable resource to their fliers. URL_POSITION

Expected:  <SOS> @121138 We appreciate the kudos, Julie. We're always here for you #TweetUsIfYouNeedUs! <EOS>

Given: 
we know how important it is working to be the recipe and we appreciate your 