In [2]:
import fasttext as ft
from gensim.models import FastText, Word2Vec, KeyedVectors
import MeCab as mc
import re
import string
import nltk
from nltk import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.tokenize import RegexpTokenizer
import os
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from keras.layers import Dense, Flatten, Dropout, LSTM, Embedding, Masking, GRU
from keras.models import Sequential, load_model
from keras.callbacks import ModelCheckpoint

# nltk.download('punkt')
# nltk.download('stopwords')

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
#load file Data_Intent.xlsx
def data(path, is_tsv):
    if is_tsv == True:
        data = pd.read_table(path)
    else:
        data = pd.read_excel(path)
    return data

data = data('qna_chitchat_the_friend.tsv', True)
# data.drop(data.index[487], inplace=True)
# data = data.reset_index()
# data.drop(['index'], axis=1, inplace=True)

#split data into 2 part: test part with the first row of each intent and train part in other.
def split_train_test(data, label_table):
    
    data_train = data_test = pd.DataFrame(columns=data.columns)
    for i in label_table.unique():
        if data[data['Answer']==i].shape[0]>=5:
            train_ = data[label_table==i].iloc[1:, :]
            test_ = data[label_table==i].iloc[:1, :]
            data_train = pd.concat([data_train, train_], axis=0)
            data_test = pd.concat([data_test, test_], axis=0)
            data_train = data_train.reset_index(drop=True)
            data_test = data_test.reset_index(drop=True)
    return data_train, data_test

data_train, data_test = split_train_test(data, data['Answer'])
#Data0 is the merge of train parts and test parts
data0 = pd.concat([data_train, data_test], axis=0)
data0 = data0.reset_index(drop=True)
data0.tail()

Unnamed: 0,Question,Answer,Source,Metadata
579,I'm feeling blue,I'm giving you a virtual hug right now.,qna_chitchat_the_friend,editorial:chitchat
580,I want to go shopping,I see.,qna_chitchat_the_friend,editorial:chitchat
581,Testing,Hello!,qna_chitchat_the_friend,editorial:chitchat
582,I am tired,I've heard really good things about naps.,qna_chitchat_the_friend,editorial:chitchat
583,What is wrong with you!,I'm so sorry.,qna_chitchat_the_friend,editorial:chitchat


In [4]:
abbr_dict={
    "smarty-pants":"smart",
    "Kthx":"thanks",
    "kthx":"thanks",
    "Thnx":"thanks",
    "thnx":"thanks",
    "bffs":"best friend",
    
    "what's":"what is",
    "what're":"what are",
    "who's":"who is",
    "who're":"who are",
    "where's":"where is",
    "where're":"where are",
    "when's":"when is",
    "when're":"when are",
    "how's":"how is",
    "how're":"how are",
    "you’re":"you are",
    "that’s": "that is",

    "i'm":"i am",
    "I’m": "i am",
    "i’m": "i am",
    "we're":"we are",
    "you're":"you are",
    "they're":"they are",
    "it's":"it is",
    "he's":"he is",
    "she's":"she is",
    "that's":"that is",
    "there's":"there is",
    "there're":"there are",

    "i've":"i have",
    "we've":"we have",
    "you've":"you have",
    "they've":"they have",
    "who've":"who have",
    "would've":"would have",
    "not've":"not have",

    "i'll":"i will",
    "we'll":"we will",
    "you'll":"you will",
    "he'll":"he will",
    "she'll":"she will",
    "it'll":"it will",
    "they'll":"they will",

    "isn't":"is not",
    "wasn't":"was not",
    "aren't":"are not",
    "weren't":"were not",
    "can't":"can not",
    "couldn't":"could not",
    "don't":"do not",
    "didn't":"did not",
    "shouldn't":"should not",
    "wouldn't":"would not",
    "doesn't":"does not",
    "haven't":"have not",
    "hasn't":"has not",
    "hadn't":"had not",
    "won't":"will not",
    "nighty": "night",
    "season's":"season is"
}


In [5]:
#stop word in japanese: load into Japanese.txt
def stop_word(path, is_jap):
    if is_jap == True:
        with open(path, 'r', encoding='utf-8') as file:
            content = file.read()
        stopword = content.split('\n')
    else:
        stopword = set(stopwords.words('english'))
    return stopword   

# split sentences into word and remove punc, stopword
def split_text(str_, is_jap):
    if is_jap == True:
        str_ = re.sub("[\s+\.\!\/_,$%^*(+\"\']+|[+——！，。？、~@#￥%……&*（）：；《）《》“”()»〔〕-]+", "",str_)
        tag = mc.Tagger('-Ochasen')
        kekka = tag.parse(str_)
        lines = kekka.split('\n')
        word = []
        for line in lines:
            col = line.split('\t')
            if col[0] != None and col[0] != 'EOS' and col[0]!='':
                if col[0] not in stopword:
                    word.append(col[0])
    else:
        if str_ != '':
            str_ = str_.lower()
            for key, values in abbr_dict.items():
                str_ = str_.replace(key, values)
            
            punc = list(string.punctuation)
            stopword = stop_word('', False)
            word = word_tokenize(str_)
            word = [i for i in word if i not in punc]
    return word

split_text("I'm giving you a virtual hug right now", False)

['i', 'am', 'giving', 'you', 'a', 'virtual', 'hug', 'right', 'now']

In [6]:
#transfer sample question into list of word for train, test data
def transform_text(data):
    question_transform = data.apply(lambda x: split_text(x, False))
    return question_transform.values

question_transform = transform_text(data0['Question'])
question_transform_train = transform_text(data_train['Question'])
question_transform_test = transform_text(data_test['Question'])


print(question_transform_test)

[list(['what', 'is', 'your', 'age']) list(['ask', 'me', 'anything'])
 list(['can', 'you', 'sleep']) list(['getting', 'tired', 'of', 'you'])
 list(['who', 'is', 'your', 'boss']) list(['cook', 'me', 'something'])
 list(['what', 'can', 'you', 'do']) list(['who', 'created', 'you'])
 list(['who', 'is', 'your', 'father']) list(['are', 'you', 'a', 'guy'])
 list(['how', 'happy', 'are', 'you'])
 list(['do', 'not', 'you', 'get', 'hungry'])
 list(['do', 'you', 'know', 'other', 'chatbots'])
 list(['what', 'is', 'your', 'favorite', 'color'])
 list(['what', 'is', 'your', 'name'])
 list(['what', 'do', 'you', 'think', 'about', 'love'])
 list(['what', 'do', 'you', 'think', 'about', 'ai'])
 list(['do', 'i', 'look', 'okay']) list(['what', 'should', 'i', 'do'])
 list(['what', 'do', 'you', 'think', 'about', 'cortana'])
 list(['do', 'you', 'want', 'to', 'rule', 'the', 'world'])
 list(['are', 'you', 'a', 'lesbian']) list(['you', 'are', 'a', 'genius'])
 list(['do', 'you', 'have', 'a', 'boyfriend']) list(['can

In [7]:
def model(path, limit):
    model = KeyedVectors.load_word2vec_format(path, limit=limit)
    return model

fasttext = model('./pre_train/cc.en.300.vec', 300000)

In [8]:
#list of vocab in a columns dataframe

def listword(df):
    list_word = [i for j in df for i in j]
    return list_word
list_word = listword(question_transform)

#list vocab from sample question
def vocab(df):
    list_word_set = list(set(df))
    return list_word_set

list_word_set = vocab(list_word)
print(list_word_set)

['report', 'full', 'pink', 'lol', 'anything', 'zip', 'haircut', 'other', 'that', 'stop', 'nobody', 'mom', 'marry', '1', 'name', 'born', 'agents', 'sisters', 'call', 'appreciate', 'long', 'songs', 'ready', 'sports', 'new', 'here', 'fake', 'owns', 'dreamy', 'anyone', 'we', 'more', 'for', 'which', 'what', 'pansexual', 'sorry', 'mean', 'digital', 'bored', 'testing', 'useless', 'meet', 'rad', 'father', 'eat', 'date', 'feeling', 'there', 'way', 'was', 'following', 'bye', 'make', 'partner', 'offended', 'soccer', 'boogers', 'very', 'robot', 'thank', 'quiet', 'hannukah', 'wrong', 'must', 'color', 'brothers', 'count', 'tired', 'homophobic', 'when', 'girl', 'a', 'of', 'hungry', 'did', 'lesbian', 'ask', 'sister', 'kinds', 'located', 'kidding', 'valentines', 'activity', 'ridiculous', 'from', 'her', 'face', 'ha', 'old', 'annoyed', 'boyfriend', 'siri', 'yesterday', 'ever', 'saying', 'run', 'sing', 'out', 'high', 'sleep', 'life', 'starving', 'and', 'hi', 'who', 'away', 'got', 'bad', 'omg', 'tell', 'co

In [9]:
vocab_ = list(fasttext.vocab) #list vocab in gensim --> not enough word from sample question list
print(len(vocab_))
not_enough = [i for i in list_word_set if i not in vocab_]

print('list vocab in sample question:', list_word_set)
print('-'*50)
print('The word is miss:', not_enough)


300000
list vocab in sample question: ['report', 'full', 'pink', 'lol', 'anything', 'zip', 'haircut', 'other', 'that', 'stop', 'nobody', 'mom', 'marry', '1', 'name', 'born', 'agents', 'sisters', 'call', 'appreciate', 'long', 'songs', 'ready', 'sports', 'new', 'here', 'fake', 'owns', 'dreamy', 'anyone', 'we', 'more', 'for', 'which', 'what', 'pansexual', 'sorry', 'mean', 'digital', 'bored', 'testing', 'useless', 'meet', 'rad', 'father', 'eat', 'date', 'feeling', 'there', 'way', 'was', 'following', 'bye', 'make', 'partner', 'offended', 'soccer', 'boogers', 'very', 'robot', 'thank', 'quiet', 'hannukah', 'wrong', 'must', 'color', 'brothers', 'count', 'tired', 'homophobic', 'when', 'girl', 'a', 'of', 'hungry', 'did', 'lesbian', 'ask', 'sister', 'kinds', 'located', 'kidding', 'valentines', 'activity', 'ridiculous', 'from', 'her', 'face', 'ha', 'old', 'annoyed', 'boyfriend', 'siri', 'yesterday', 'ever', 'saying', 'run', 'sing', 'out', 'high', 'sleep', 'life', 'starving', 'and', 'hi', 'who', 'a

In [10]:
#Transfer label from column intent into one hot encoding
def one_hot_encodeing(feature):

    onehot = np.zeros((len(feature), feature.nunique()))
    feature_list = list(feature.unique())
    for intent in range(len(feature)):
        index = feature_list.index(feature[intent])
        onehot[intent, index] = 1
    return onehot, feature_list

train_onehot, train_feature = one_hot_encodeing(data_train['Answer'])
test_onehot, test_feature = one_hot_encodeing(data_test['Answer'])
onehot_, feature_ = one_hot_encodeing(data0['Answer'])
print(train_onehot.shape)
print(test_onehot.shape)
print(onehot_.shape)

(508, 76)
(76, 76)
(584, 76)


In [11]:
#Creat a matrix from sample question with the dimention is row x max_lenght x 100
def word_embed(df, model, max_length):
    embed_all = []
    for word_split in df:

        if len(word_split) > 0:
            embed_token = []
            for word in word_split:
                if word in vocab_:
 
                    vec = model[word]
                    embed_token.append(vec)

            if len(embed_token) != 0:
                if len(embed_token) >= max_length: #if len(embed_token) > max_length then remove in larger
                    embed_token = embed_token[:max_length]
                else: #if len(embed_token) < max_length then add 0 for enough
                    add_vect = list(np.zeros((max_length - len(embed_token), 300)))
                    embed_token = np.concatenate((embed_token, add_vect))

            else:
                print(False)

        else:
            print(False)
        embed_all.append(embed_token)
    return np.asarray(embed_all)

#Train, test data to vector
all_ = word_embed(question_transform, fasttext, 10)
train = word_embed(question_transform_train, fasttext, 10)
test = word_embed(question_transform_test, fasttext, 10)
print(train.shape)
print(test.shape)

(508, 10, 300)
(76, 10, 300)


In [12]:
#creat LSTM model with input_shape = max_lengh x 100
def LSTM_(X, y, input_dim, epoch, batch_size, output_dim):
    model = Sequential()
    model.add(LSTM(units=128, return_sequences=True, input_shape=(input_dim)))
    model.add(Dropout(0.2))
    model.add(GRU(units=64, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(units=32))
    model.add(Dropout(0.2))
    model.add(Dense(units=output_dim, activation='softmax'))
    
    model.compile(loss='categorical_crossentropy',
                 optimizer='Adam', 
                 metrics=['accuracy'])
    if not os.path.exists('keras_model'):
        os.makedirs('keras_model')
    save_weight = os.path.join('keras_model', 'LSTM_weight.{loss:.4f}.hdf5')
    save_best = ModelCheckpoint(save_weight, monitor='loss', save_best_only=True, save_weights_only=False, mode='min', period=1)
    
    model.summary()
    
    history = model.fit(X, y, epochs=epoch, batch_size=batch_size, callbacks=[save_best])
    
    return model



Creat Model check points for model save best loss in each epoch, if loss new smaller than all file saved, it will save a new file

In [13]:
# train with epochs = 1000 and batch_size = 32
input_dim = (train.shape[1],train.shape[2])
model = LSTM_(train, train_onehot, input_dim, 1000, 32, onehot_.shape[1])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 10, 128)           219648    
_________________________________________________________________
dropout_1 (Dropout)          (None, 10, 128)           0         
_________________________________________________________________
gru_1 (GRU)                  (None, 10, 64)            37056     
_________________________________________________________________
dropout_2 (Dropout)          (None, 10, 64)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 32)                12416     
_________________________________________________________________
dropout_3 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 76)                2508      
Total para

Epoch 72/1000
Epoch 73/1000
Epoch 74/1000
Epoch 75/1000
Epoch 76/1000
Epoch 77/1000
Epoch 78/1000
Epoch 79/1000
Epoch 80/1000
Epoch 81/1000
Epoch 82/1000
Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000
Epoch 90/1000
Epoch 91/1000
Epoch 92/1000
Epoch 93/1000
Epoch 94/1000
Epoch 95/1000
Epoch 96/1000
Epoch 97/1000
Epoch 98/1000
Epoch 99/1000
Epoch 100/1000
Epoch 101/1000
Epoch 102/1000
Epoch 103/1000
Epoch 104/1000
Epoch 105/1000
Epoch 106/1000
Epoch 107/1000
Epoch 108/1000
Epoch 109/1000
Epoch 110/1000
Epoch 111/1000
Epoch 112/1000
Epoch 113/1000
Epoch 114/1000
Epoch 115/1000
Epoch 116/1000
Epoch 117/1000
Epoch 118/1000
Epoch 119/1000
Epoch 120/1000
Epoch 121/1000
Epoch 122/1000
Epoch 123/1000
Epoch 124/1000
Epoch 125/1000
Epoch 126/1000
Epoch 127/1000
Epoch 128/1000
Epoch 129/1000
Epoch 130/1000
Epoch 131/1000
Epoch 132/1000
Epoch 133/1000
Epoch 134/1000
Epoch 135/1000
Epoch 136/1000
Epoch 137/1000
Epoch 138/1000
Epoch 139/1000
Epoch 14

Epoch 155/1000
Epoch 156/1000
Epoch 157/1000
Epoch 158/1000
Epoch 159/1000
Epoch 160/1000
Epoch 161/1000
Epoch 162/1000
Epoch 163/1000
Epoch 164/1000
Epoch 165/1000
Epoch 166/1000
Epoch 167/1000
Epoch 168/1000
Epoch 169/1000
Epoch 170/1000
Epoch 171/1000
Epoch 172/1000
Epoch 173/1000
Epoch 174/1000
Epoch 175/1000
Epoch 176/1000
Epoch 177/1000
Epoch 178/1000
Epoch 179/1000
Epoch 180/1000
Epoch 181/1000
Epoch 182/1000
Epoch 183/1000
Epoch 184/1000
Epoch 185/1000
Epoch 186/1000
Epoch 187/1000
Epoch 188/1000
Epoch 189/1000
Epoch 190/1000
Epoch 191/1000
Epoch 192/1000
Epoch 193/1000
Epoch 194/1000
Epoch 195/1000
Epoch 196/1000
Epoch 197/1000
Epoch 198/1000
Epoch 199/1000
Epoch 200/1000
Epoch 201/1000
Epoch 202/1000
Epoch 203/1000
Epoch 204/1000
Epoch 205/1000
Epoch 206/1000
Epoch 207/1000
Epoch 208/1000
Epoch 209/1000
Epoch 210/1000
Epoch 211/1000
Epoch 212/1000
Epoch 213/1000
Epoch 214/1000
Epoch 215/1000
Epoch 216/1000
Epoch 217/1000
Epoch 218/1000
Epoch 219/1000
Epoch 220/1000
Epoch 221/

Epoch 237/1000
Epoch 238/1000
Epoch 239/1000
Epoch 240/1000
Epoch 241/1000
Epoch 242/1000
Epoch 243/1000
Epoch 244/1000
Epoch 245/1000
Epoch 246/1000
Epoch 247/1000
Epoch 248/1000
Epoch 249/1000
Epoch 250/1000
Epoch 251/1000
Epoch 252/1000
Epoch 253/1000
Epoch 254/1000
Epoch 255/1000
Epoch 256/1000
Epoch 257/1000
Epoch 258/1000
Epoch 259/1000
Epoch 260/1000
Epoch 261/1000
Epoch 262/1000
Epoch 263/1000
Epoch 264/1000
Epoch 265/1000
Epoch 266/1000
Epoch 267/1000
Epoch 268/1000
Epoch 269/1000
Epoch 270/1000
Epoch 271/1000
Epoch 272/1000
Epoch 273/1000
Epoch 274/1000
Epoch 275/1000
Epoch 276/1000
Epoch 277/1000
Epoch 278/1000
Epoch 279/1000
Epoch 280/1000
Epoch 281/1000
Epoch 282/1000
Epoch 283/1000
Epoch 284/1000
Epoch 285/1000
Epoch 286/1000
Epoch 287/1000
Epoch 288/1000
Epoch 289/1000
Epoch 290/1000
Epoch 291/1000
Epoch 292/1000
Epoch 293/1000
Epoch 294/1000
Epoch 295/1000
Epoch 296/1000
Epoch 297/1000
Epoch 298/1000
Epoch 299/1000
Epoch 300/1000
Epoch 301/1000
Epoch 302/1000
Epoch 303/

Epoch 319/1000
Epoch 320/1000
Epoch 321/1000
Epoch 322/1000
Epoch 323/1000
Epoch 324/1000
Epoch 325/1000
Epoch 326/1000
Epoch 327/1000
Epoch 328/1000
Epoch 329/1000
Epoch 330/1000
Epoch 331/1000
Epoch 332/1000
Epoch 333/1000
Epoch 334/1000
Epoch 335/1000
Epoch 336/1000
Epoch 337/1000
Epoch 338/1000
Epoch 339/1000
Epoch 340/1000
Epoch 341/1000
Epoch 342/1000
Epoch 343/1000
Epoch 344/1000
Epoch 345/1000
Epoch 346/1000
Epoch 347/1000
Epoch 348/1000
Epoch 349/1000
Epoch 350/1000
Epoch 351/1000
Epoch 352/1000
Epoch 353/1000
Epoch 354/1000
Epoch 355/1000
Epoch 356/1000
Epoch 357/1000
Epoch 358/1000
Epoch 359/1000
Epoch 360/1000
Epoch 361/1000
Epoch 362/1000
Epoch 363/1000
Epoch 364/1000
Epoch 365/1000
Epoch 366/1000
Epoch 367/1000
Epoch 368/1000
Epoch 369/1000
Epoch 370/1000
Epoch 371/1000
Epoch 372/1000
Epoch 373/1000
Epoch 374/1000
Epoch 375/1000
Epoch 376/1000
Epoch 377/1000
Epoch 378/1000
Epoch 379/1000
Epoch 380/1000
Epoch 381/1000
Epoch 382/1000
Epoch 383/1000
Epoch 384/1000
Epoch 385/

Epoch 401/1000
Epoch 402/1000
Epoch 403/1000
Epoch 404/1000
Epoch 405/1000
Epoch 406/1000
Epoch 407/1000
Epoch 408/1000
Epoch 409/1000
Epoch 410/1000
Epoch 411/1000
Epoch 412/1000
Epoch 413/1000
Epoch 414/1000
Epoch 415/1000
Epoch 416/1000
Epoch 417/1000
Epoch 418/1000
Epoch 419/1000
Epoch 420/1000
Epoch 421/1000
Epoch 422/1000
Epoch 423/1000
Epoch 424/1000
Epoch 425/1000
Epoch 426/1000
Epoch 427/1000
Epoch 428/1000
Epoch 429/1000
Epoch 430/1000
Epoch 431/1000
Epoch 432/1000
Epoch 433/1000
Epoch 434/1000
Epoch 435/1000
Epoch 436/1000
Epoch 437/1000
Epoch 438/1000
Epoch 439/1000
Epoch 440/1000
Epoch 441/1000
Epoch 442/1000
Epoch 443/1000
Epoch 444/1000
Epoch 445/1000
Epoch 446/1000
Epoch 447/1000
Epoch 448/1000
Epoch 449/1000
Epoch 450/1000
Epoch 451/1000
Epoch 452/1000
Epoch 453/1000
Epoch 454/1000
Epoch 455/1000
Epoch 456/1000
Epoch 457/1000
Epoch 458/1000
Epoch 459/1000
Epoch 460/1000
Epoch 461/1000
Epoch 462/1000
Epoch 463/1000
Epoch 464/1000
Epoch 465/1000
Epoch 466/1000
Epoch 467/

Epoch 483/1000
Epoch 484/1000
Epoch 485/1000
Epoch 486/1000
Epoch 487/1000
Epoch 488/1000
Epoch 489/1000
Epoch 490/1000
Epoch 491/1000
Epoch 492/1000
Epoch 493/1000
Epoch 494/1000
Epoch 495/1000
Epoch 496/1000
Epoch 497/1000
Epoch 498/1000
Epoch 499/1000
Epoch 500/1000
Epoch 501/1000
Epoch 502/1000
Epoch 503/1000
Epoch 504/1000
Epoch 505/1000
Epoch 506/1000
Epoch 507/1000
Epoch 508/1000
Epoch 509/1000
Epoch 510/1000
Epoch 511/1000
Epoch 512/1000
Epoch 513/1000
Epoch 514/1000
Epoch 515/1000
Epoch 516/1000
Epoch 517/1000
Epoch 518/1000
Epoch 519/1000
Epoch 520/1000
Epoch 521/1000
Epoch 522/1000
Epoch 523/1000
Epoch 524/1000
Epoch 525/1000
Epoch 526/1000
Epoch 527/1000
Epoch 528/1000
Epoch 529/1000
Epoch 530/1000
Epoch 531/1000
Epoch 532/1000
Epoch 533/1000
Epoch 534/1000
Epoch 535/1000
Epoch 536/1000
Epoch 537/1000
Epoch 538/1000
Epoch 539/1000
Epoch 540/1000
Epoch 541/1000
Epoch 542/1000
Epoch 543/1000
Epoch 544/1000
Epoch 545/1000
Epoch 546/1000
Epoch 547/1000
Epoch 548/1000
Epoch 549/

Epoch 565/1000
Epoch 566/1000
Epoch 567/1000
Epoch 568/1000
Epoch 569/1000
Epoch 570/1000
Epoch 571/1000
Epoch 572/1000
Epoch 573/1000
Epoch 574/1000
Epoch 575/1000
Epoch 576/1000
Epoch 577/1000
Epoch 578/1000
Epoch 579/1000
Epoch 580/1000
Epoch 581/1000
Epoch 582/1000
Epoch 583/1000
Epoch 584/1000
Epoch 585/1000
Epoch 586/1000
Epoch 587/1000
Epoch 588/1000
Epoch 589/1000
Epoch 590/1000
Epoch 591/1000
Epoch 592/1000
Epoch 593/1000
Epoch 594/1000
Epoch 595/1000
Epoch 596/1000
Epoch 597/1000
Epoch 598/1000
Epoch 599/1000
Epoch 600/1000
Epoch 601/1000
Epoch 602/1000
Epoch 603/1000
Epoch 604/1000
Epoch 605/1000
Epoch 606/1000
Epoch 607/1000
Epoch 608/1000
Epoch 609/1000
Epoch 610/1000
Epoch 611/1000
Epoch 612/1000
Epoch 613/1000
Epoch 614/1000
Epoch 615/1000
Epoch 616/1000
Epoch 617/1000
Epoch 618/1000
Epoch 619/1000
Epoch 620/1000
Epoch 621/1000
Epoch 622/1000
Epoch 623/1000
Epoch 624/1000
Epoch 625/1000
Epoch 626/1000
Epoch 627/1000
Epoch 628/1000
Epoch 629/1000
Epoch 630/1000
Epoch 631/

Epoch 647/1000
Epoch 648/1000
Epoch 649/1000
Epoch 650/1000
Epoch 651/1000
Epoch 652/1000
Epoch 653/1000
Epoch 654/1000
Epoch 655/1000
Epoch 656/1000
Epoch 657/1000
Epoch 658/1000
Epoch 659/1000
Epoch 660/1000
Epoch 661/1000
Epoch 662/1000
Epoch 663/1000
Epoch 664/1000
Epoch 665/1000
Epoch 666/1000
Epoch 667/1000
Epoch 668/1000
Epoch 669/1000
Epoch 670/1000
Epoch 671/1000
Epoch 672/1000
Epoch 673/1000
Epoch 674/1000
Epoch 675/1000
Epoch 676/1000
Epoch 677/1000
Epoch 678/1000
Epoch 679/1000
Epoch 680/1000
Epoch 681/1000
Epoch 682/1000
Epoch 683/1000
Epoch 684/1000
Epoch 685/1000
Epoch 686/1000
Epoch 687/1000
Epoch 688/1000
Epoch 689/1000
Epoch 690/1000
Epoch 691/1000
Epoch 692/1000
Epoch 693/1000
Epoch 694/1000
Epoch 695/1000
Epoch 696/1000
Epoch 697/1000
Epoch 698/1000
Epoch 699/1000
Epoch 700/1000
Epoch 701/1000
Epoch 702/1000
Epoch 703/1000
Epoch 704/1000
Epoch 705/1000
Epoch 706/1000
Epoch 707/1000
Epoch 708/1000
Epoch 709/1000
Epoch 710/1000
Epoch 711/1000
Epoch 712/1000
Epoch 713/

Epoch 729/1000
Epoch 730/1000
Epoch 731/1000
Epoch 732/1000
Epoch 733/1000
Epoch 734/1000
Epoch 735/1000
Epoch 736/1000
Epoch 737/1000
Epoch 738/1000
Epoch 739/1000
Epoch 740/1000
Epoch 741/1000
Epoch 742/1000
Epoch 743/1000
Epoch 744/1000
Epoch 745/1000
Epoch 746/1000
Epoch 747/1000
Epoch 748/1000
Epoch 749/1000
Epoch 750/1000
Epoch 751/1000
Epoch 752/1000
Epoch 753/1000
Epoch 754/1000
Epoch 755/1000
Epoch 756/1000
Epoch 757/1000
Epoch 758/1000
Epoch 759/1000
Epoch 760/1000
Epoch 761/1000
Epoch 762/1000
Epoch 763/1000
Epoch 764/1000
Epoch 765/1000
Epoch 766/1000
Epoch 767/1000
Epoch 768/1000
Epoch 769/1000
Epoch 770/1000
Epoch 771/1000
Epoch 772/1000
Epoch 773/1000
Epoch 774/1000
Epoch 775/1000
Epoch 776/1000
Epoch 777/1000
Epoch 778/1000
Epoch 779/1000
Epoch 780/1000
Epoch 781/1000
Epoch 782/1000
Epoch 783/1000
Epoch 784/1000
Epoch 785/1000
Epoch 786/1000
Epoch 787/1000
Epoch 788/1000
Epoch 789/1000
Epoch 790/1000
Epoch 791/1000
Epoch 792/1000
Epoch 793/1000
Epoch 794/1000
Epoch 795/

Epoch 811/1000
Epoch 812/1000
Epoch 813/1000
Epoch 814/1000
Epoch 815/1000
Epoch 816/1000
Epoch 817/1000
Epoch 818/1000
Epoch 819/1000
Epoch 820/1000
Epoch 821/1000
Epoch 822/1000
Epoch 823/1000
Epoch 824/1000
Epoch 825/1000
Epoch 826/1000
Epoch 827/1000
Epoch 828/1000
Epoch 829/1000
Epoch 830/1000
Epoch 831/1000
Epoch 832/1000
Epoch 833/1000
Epoch 834/1000
Epoch 835/1000
Epoch 836/1000
Epoch 837/1000
Epoch 838/1000
Epoch 839/1000
Epoch 840/1000
Epoch 841/1000
Epoch 842/1000
Epoch 843/1000
Epoch 844/1000
Epoch 845/1000
Epoch 846/1000
Epoch 847/1000
Epoch 848/1000
Epoch 849/1000
Epoch 850/1000
Epoch 851/1000
Epoch 852/1000
Epoch 853/1000
Epoch 854/1000
Epoch 855/1000
Epoch 856/1000
Epoch 857/1000
Epoch 858/1000
Epoch 859/1000
Epoch 860/1000
Epoch 861/1000
Epoch 862/1000
Epoch 863/1000
Epoch 864/1000
Epoch 865/1000
Epoch 866/1000
Epoch 867/1000
Epoch 868/1000
Epoch 869/1000
Epoch 870/1000
Epoch 871/1000
Epoch 872/1000
Epoch 873/1000
Epoch 874/1000
Epoch 875/1000
Epoch 876/1000
Epoch 877/

Epoch 893/1000
Epoch 894/1000
Epoch 895/1000
Epoch 896/1000
Epoch 897/1000
Epoch 898/1000
Epoch 899/1000
Epoch 900/1000
Epoch 901/1000
Epoch 902/1000
Epoch 903/1000
Epoch 904/1000
Epoch 905/1000
Epoch 906/1000
Epoch 907/1000
Epoch 908/1000
Epoch 909/1000
Epoch 910/1000
Epoch 911/1000
Epoch 912/1000
Epoch 913/1000
Epoch 914/1000
Epoch 915/1000
Epoch 916/1000
Epoch 917/1000
Epoch 918/1000
Epoch 919/1000
Epoch 920/1000
Epoch 921/1000
Epoch 922/1000
Epoch 923/1000
Epoch 924/1000
Epoch 925/1000
Epoch 926/1000
Epoch 927/1000
Epoch 928/1000
Epoch 929/1000
Epoch 930/1000
Epoch 931/1000
Epoch 932/1000
Epoch 933/1000
Epoch 934/1000
Epoch 935/1000
Epoch 936/1000
Epoch 937/1000
Epoch 938/1000
Epoch 939/1000
Epoch 940/1000
Epoch 941/1000
Epoch 942/1000
Epoch 943/1000
Epoch 944/1000
Epoch 945/1000
Epoch 946/1000
Epoch 947/1000
Epoch 948/1000
Epoch 949/1000
Epoch 950/1000
Epoch 951/1000
Epoch 952/1000
Epoch 953/1000
Epoch 954/1000
Epoch 955/1000
Epoch 956/1000
Epoch 957/1000
Epoch 958/1000
Epoch 959/

Epoch 975/1000
Epoch 976/1000
Epoch 977/1000
Epoch 978/1000
Epoch 979/1000
Epoch 980/1000
Epoch 981/1000
Epoch 982/1000
Epoch 983/1000
Epoch 984/1000
Epoch 985/1000
Epoch 986/1000
Epoch 987/1000
Epoch 988/1000
Epoch 989/1000
Epoch 990/1000
Epoch 991/1000
Epoch 992/1000
Epoch 993/1000
Epoch 994/1000
Epoch 995/1000
Epoch 996/1000
Epoch 997/1000
Epoch 998/1000
Epoch 999/1000
Epoch 1000/1000


In [14]:
#sort the model saved and load the best model is the first list file
def load_(path):
    list_ = os.listdir(path)
    list_.sort()
    file_name = os.path.join(path, list_[0])
    print(file_name)
    model_predict = load_model(file_name, '')
    return model_predict

model_predict = load_('keras_model')

keras_model\LSTM_weight.0.0068.hdf5


In [15]:
#evaluate model
def evaluate_model(X, y, model):
    loss = model.evaluate(X, y, verbose=0)
    return loss

evaluate_model(test, test_onehot, model_predict)

[3.0770711271386397, 0.5921052615893515]

In [16]:
def predict(sent, model, model_predict, max_length, threshold):
    if sent == '':
        result = 'not empty'
    else:
        split_ = split_text(sent, False)
        vect = word_embed([split_], model, max_length)
        predict = model_predict.predict(vect)
        max_prob = np.max(predict)
        if max_prob >= threshold: #add threshold to predict sent
            result = train_feature[np.argmax(predict[0])]
        else:
            result = 'No meaning'
    return result

display again result of test data

In [17]:
for index in range(data_test.shape[0]):
    sent = data_test.loc[index, 'Question']
    actual = data_test.loc[index, 'Answer']
    pre = predict(sent, fasttext, model_predict, 10, 0)
    print('sentences:', sent)
    print('predict:', pre)
    print('actual:', actual)
    if actual == pre:
        print(True)
    else:
        print(False)
    print('-'*100)
    
print(predict('I am tired', fasttext, model_predict, 10, 0.5))

sentences: What's your age?
predict: I don't really have an age.
actual: I don't really have an age.
True
----------------------------------------------------------------------------------------------------
sentences: Ask me anything
predict: I'm a much better answerer than asker.
actual: I'm a much better answerer than asker.
True
----------------------------------------------------------------------------------------------------
sentences: Can you sleep?
predict: I don't have the hardware for that.
actual: I don't have the hardware for that.
True
----------------------------------------------------------------------------------------------------
sentences: Getting tired of you
predict: Swing and a miss.
actual: Swing and a miss.
True
----------------------------------------------------------------------------------------------------
sentences: Who's your boss?
predict: I'm here for you!
actual: I'm here for you!
True
-------------------------------------------------------------------

True
----------------------------------------------------------------------------------------------------
sentences: Talk to you later
predict: Chat away!
actual: Bye.
False
----------------------------------------------------------------------------------------------------
sentences: Hiya
predict: Hi!
actual: Hi!
True
----------------------------------------------------------------------------------------------------
sentences: Good night
predict: Nighty night!
actual: Nighty night!
True
----------------------------------------------------------------------------------------------------
sentences: How are you?
predict: I'm doing great, thanks for asking!
actual: I'm doing great, thanks for asking!
True
----------------------------------------------------------------------------------------------------
sentences: Hello Google
predict: That's not me, but hello nonetheless!
actual: That's not me, but hello nonetheless!
True
----------------------------------------------------------------