In [6]:
import nltk
import gensim
from gensim.parsing.preprocessing import STOPWORDS
from gensim.models import Word2Vec
from nltk.stem import WordNetLemmatizer, SnowballStemmer

In [7]:
#Data
data=[{"tag": "welcome",
"patterns": ["Hi", "How are you", "Is any one to talk?", "Hello", "hi are you available"],
"responses": ["Hello, thanks for contacting us", "Good to see you here"," Hi there, how may I assist you?"]

        },
{"tag": "goodbye",
"patterns": ["Bye", "See you later", "Goodbye", "I will come back soon"],
"responses": ["See you later, thanks for visiting", "have a great day ahead", "Wish you Come back again soon."]
        },

{"tag": "thankful",
"patterns": ["Thanks for helping me", "Thank your guidance", "That's helpful and kind from you"],
"responses": ["Happy to help!", "Any time!", "My pleasure", "It is my duty to help you"]
        },
        {"tag": "hoursopening",
"patterns": ["What hours are you open?", "Tell your opening time?", "When are you open?", "Just your timing please"],
"responses": ["We're open every day 8am-7pm", "Our office hours are 8am-7pm every day", "We open office at 8 am and close at 7 pm"]
        },

{"tag": "payments",
"patterns": ["Can I pay using credit card?", " Can I pay using Mastercard?", " Can I pay using cash only?" ],
"responses": ["We accept VISA, Mastercard and credit card", "We accept credit card, debit cards and cash. Please don’t worry"]
        }
   ]

### Preprocessing

def lemmatize_stemming(text):
    stemmer = SnowballStemmer("english")
    return stemmer.stem(WordNetLemmatizer().lemmatize(text, pos='v'))

def preprocess(text):
    result=[]
    for token in gensim.utils.simple_preprocess(text) :
        if token not in gensim.parsing.preprocessing.STOPWORDS and len(token) > 3:
            result.append(lemmatize_stemming(token))
            
    return result

In [17]:
preprocessed_data=[]
for i in data:
    patterns=i['patterns']
    for j in patterns:
        preprocessed_data.append(preprocess(j))
    responses=i['responses']
    for j in responses:
        preprocessed_data.append(preprocess(j))

In [18]:
preprocessed_data

[[],
 [],
 ['talk'],
 ['hello'],
 ['avail'],
 ['hello', 'thank', 'contact'],
 ['good'],
 ['assist'],
 [],
 ['later'],
 ['goodby'],
 ['come', 'soon'],
 ['later', 'thank', 'visit'],
 ['great', 'ahead'],
 ['wish', 'come', 'soon'],
 ['thank', 'help'],
 ['thank', 'guidanc'],
 ['help', 'kind'],
 ['happi', 'help'],
 ['time'],
 ['pleasur'],
 ['duti', 'help'],
 ['hour', 'open'],
 ['tell', 'open', 'time'],
 ['open'],
 ['time'],
 ['open'],
 ['offic', 'hour'],
 ['open', 'offic', 'close'],
 ['credit', 'card'],
 ['mastercard'],
 ['cash'],
 ['accept', 'visa', 'mastercard', 'credit', 'card'],
 ['accept', 'credit', 'card', 'debit', 'card', 'cash', 'worri']]

### Using word2vec

In [19]:
Model= Word2Vec(preprocessed_data,min_count=1,size=300,workers=4)

In [20]:
Model.save("word2vec.model")
Model.save("model.bin")

In [21]:
model = Word2Vec.load('model.bin')

In [22]:
 vocab = list(model.wv.vocab)

In [23]:
vocab

['talk',
 'hello',
 'avail',
 'thank',
 'contact',
 'good',
 'assist',
 'later',
 'goodby',
 'come',
 'soon',
 'visit',
 'great',
 'ahead',
 'wish',
 'help',
 'guidanc',
 'kind',
 'happi',
 'time',
 'pleasur',
 'duti',
 'hour',
 'open',
 'tell',
 'offic',
 'close',
 'credit',
 'card',
 'mastercard',
 'cash',
 'accept',
 'visa',
 'debit',
 'worri']

### Finding most similar words

In [33]:
similar_words = model.most_similar(preprocess('thanks'))
print(similar_words)

[('happi', 0.11218877881765366), ('talk', 0.10319676995277405), ('visa', 0.06491489708423615), ('visit', 0.06172134354710579), ('great', 0.06109871715307236), ('soon', 0.061037011444568634), ('hour', 0.06045321375131607), ('tell', 0.055567894130945206), ('kind', 0.05286099389195442), ('guidanc', 0.05013928562402725)]


  """Entry point for launching an IPython kernel.
