In [None]:
!python3 -m spacy download en_core_web_sm

**IMPORT STATEMENTS**

In [None]:
%%capture
# IMPORTS
from collections import Counter
from nltk import pos_tag, word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from string import punctuation
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import nltk
import numpy as np
import pickle
import random
import re
import spacy
import string
import warnings

# Settings and Data Loading
warnings.filterwarnings('ignore')
data = pickle.load(open('../data/vball_kb.pickle', 'rb'))
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
nltk.download('wordnet', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)

nlp = spacy.load("en_core_web_sm")

# Load Knowledge Base Into List
data_list = []
for sents in data.values():
  data_list.extend(sents)
data_list = list(set(data_list))

**FUNCTIONS**

In [None]:
# Vader Sentiment Function 
def preprocessing_text(text):
  tokens = nltk.word_tokenize(text)
  important_tokens = [t for t in tokens if t not in stopwords.words('english')]
  wnl = WordNetLemmatizer()
  lemmatized = [wnl.lemmatize(t) for t in important_tokens]
  str_list = ' '.join([str(elem) for elem in lemmatized])
  return str_list

def sentence_sentiment(text):
  analyzer = SentimentIntensityAnalyzer()
  vs = analyzer.polarity_scores(text)
  max_key = ''
  if vs.get('neg') > 0 or vs.get('pos') > 0:
    if vs.get('neg') > vs.get('pos'):
      max_key = 'neg'
    else:
      max_key = 'pos'
  else:
    max_key = max(vs, key=vs.get)
  return max_key

In [None]:
# POS Function
def get_hotwords(text):
    result = []
    pos_tag = ['PROPN', 'ADJ', 'NOUN', 'VERB'] # 1
    doc = nlp(text.lower()) # 2
    for token in doc:
        if(token.text in nlp.Defaults.stop_words or token.text in punctuation): # Text Preprocessing
            continue
        if(token.pos_ in pos_tag):
            result.append(token.text)
    return result

In [None]:
# Greetings
def greeting_response(text):
  text = text.lower()

  bot_greetings = ['howdy', 'hi', 'hey', 'hello', 'hola']
  user_greetings = ['hi', 'hey', 'hello', 'hola', 'greetings', 'wassup', 'whats good', 'sup', 'yo']

  if any(word in user_greetings for word in text.split()):
    return random.choice(bot_greetings)

In [None]:
# Bot Responses
def index_sort(lst):
  lst_and_idx = [(num, idx) for idx, num in enumerate(lst)]
  lst_and_idx.sort(reverse=True)
  lst_idx = [tup[1] for tup in lst_and_idx]
  return lst_idx

def bot_response(user_input):
  # Put user_input in the data
  user_input = user_input.lower()
  data_list.append(user_input)

  # Find similaritiy score between user_input and all sentence data, using cosine similarity
  count_matrix = CountVectorizer().fit_transform(data_list)
  similarity_scores = cosine_similarity(count_matrix[-1], count_matrix)
  similarity_scores_list = similarity_scores.flatten()

  # Get the value of indexes, from highest to lowest similarity score
  index = index_sort(similarity_scores_list)
  index = index[1:]

  # Grab highest similar sentences to user's query. Limit it to only 2 sentences
  sentence_limit, bot_response = 0, ''
  for i in range(len(index)):
    if similarity_scores_list[index[i]] > 0.0:
      bot_response = f'{bot_response} {data_list[index[i]]}'
      sentence_limit += 1
    if sentence_limit > 1:
      break

  # If there are no similar sentences, return nothing to be found
  if not bot_response:
    bot_response = 'I am sorry, but I do not understand.'

  # Remove user input from data KB
  data_list.remove(user_input)

  return bot_response

**USER MODEL**

In [None]:
class User:
  def __init__(self, name, keywords, likes_list, dislike_list):
    self.name = name.title()
    self.keywords = keywords
    self.likes_list = likes_list
    self.dislike_list = dislike_list

In [None]:
users = []
liked_items = []
disliked_items = []

**MAIN**

To access your user model, have one conversation with the bot, then exit out.

Then, re-run this cell, and input your name again, and it will show your user model

In [None]:
# Chatbot Code
print('Type "EXIT" or "exit" once you are done talking to the bot')
print("-> Volleybot: Hey, my name is VolleyBot. Nice to meet you. What is your name?")
name_input = input('-> Me: ').split()[-1]

if any(user.name == name_input for user in users):
  for user in users:
    if user.name == name_input:
      print(f'-> Volleybot: Welcome back, {user.name}! Here is what I got from our last session!')
      print(f'Your Likes: {user1.likes_list}')
      print(f'Your Dislikes: {user1.dislike_list}')
else:
  print(f"-> Volleybot: Hey {name_input}, got any questions about volleyball?")

while True:
  try:
    print('-> Me: ', end='')
    user_input = input()

    if user_input.lower() == 'exit':
      raise SystemExit

    if sentence_sentiment(preprocessing_text(user_input)) == 'neg':
      disliked_items.append(get_hotwords(user_input))
    elif sentence_sentiment(preprocessing_text(user_input)) == 'pos':
      liked_items.append(get_hotwords(user_input))

    if greeting_response(user_input):
      print(f'-> Volleybot: {greeting_response(user_input)}')
    else:
      print(f'-> Volleybot: {bot_response(user_input)}')

  except(KeyboardInterrupt, EOFError, SystemExit):
    user1 = User(name_input, liked_items + disliked_items, liked_items, disliked_items)
    users.append(user1)
    print(f'-> Volleybot: Adios! It was good chatting with you, {user1.name}!')
    break