### NLP Project - Sommeliere chat bot

Develop a chat bot which answers client's questions by finding the matching question and answer in the database.

In [1]:
# About the information of the wine database, I extracted them from below websites:
# 1. https://www.abcfws.com/12-commonly-asked-wine-questions-answered 
# 2. https://www.wine.com/# 

# There are 22 wine questions and paired answers. 

wine_database = {
    "Should I decant wine before I drink it?"                    : "The overwhelming majority of wines purchased in our stores are intended for immediate consumption and don’t need to be decanted.",
    "What is the best way to store wine?"                        : "Keep storage temperature at around 55 degrees Fahrenheit and keep bottles horizontally to keep the corks from drying out.",
    "At what temperature should wine be served?"                 : "Serving temperature has everything to do with the wine’s style.",
    "Should I pay attention to wine ratings?"                    : "Yes, using them strictly as a guide.",
    "What do wine ratings mean?"                                 : "These ratings are the opinion of one person. If you enjoy one's style, odds are you would love most wines that he/she scored well.",
    "What are sulfites?"                                         : "Sulfites, or sulfur dioxide (SO2), are chemical compounds that occur naturally at low levels during the fermentation process of winemaking.",
    "Once you have chilled wine, do you need to keep it chilled?": "A chilled wine can go from the refrigerator to the table and back again without suffering any negative effects to the quality of the wine.",
    "How long will wine last once it is opened?"                 : "It depends on the wine.",
    "What wine is best to serve for a large group?"              : "Prosecco or Cava.",
    "How can you tell if a wine is bad when you open it?"        : "Check if the wine lose its color, and taste if cork taint.",
    "Why do some wines give me a headache?"                      : "Because the tannins and sugar in the wine.",
    "What is a tannin?"                                          : "Tannins are a group of chemical compounds found in a lot of the foods and drinks we enjoy every day.",
    "Is Tannins good?"                                           : "Well, Tannins is good in any case unless you don’t like the feel of them.",
    "Difference between Old World and New World wine?"           : "Old World wines are European and New World wines are everything else.",
    "Could you recommend me the wine with chocolate flavor?"     : "Try our ChocolatRouge, Fondante Ciello and Cocoa di Vine.",
    "Could you recommend me some creamy wine?"                   : "Check out the white wine of Chardonnay and white wine of Burgundy.",
    "I like the wine with aromas of dark fruits of boysenberry"  : "Try our Rutherford Hill Cabernet Sauvignon 2016! It has aromas of dark fruits of boysenberry.",
    "Wine notes of dusted blackberries, plum, and cherry lead to a back-drift of toast and fleeting tobacco." : "Try our Turnbull Cabernet Sauvignon 2019!",
    "Wine with straw-yellow hue, clean intense aroma, and crisp, well-balanced taste"                         : "Try our Santa Margherita Pinot Grigio 2021!",
    "Sparkling wine. The finish is light and refreshing with a tantalizing hint of sweetness."                : "Try our La Marca Prosecco!",
    "Sparkling wine with floral notes and hints of fruit flavors, such as peach, tropical fruits and lychee"  : "Try our Cupcake Moscato D'Asti!",
    "What is the best wine you have?"                            : "Best wine varies by personal preference, but our best sell wine is Cupcake Moscato D'Asti!"
    }

In [2]:
def Sommeliere_bot(database = wine_database, threshold = 0.4):
  import numpy as np
  import pandas as pd
  import nltk
  from nltk.stem.snowball import SnowballStemmer
  from sklearn.feature_extraction.text import TfidfVectorizer
  s_stemmer = SnowballStemmer(language = 'english')
  tfidf = TfidfVectorizer(stop_words = 'english')

  def preprocess_database(database):
    lst_ans  = list(database.values())
    lst_ques = []
    questions = [question.lower().replace(".", "").replace("?", "").replace(",", "").split(" ") for question in database]
    for question in questions:
      string = " ".join(s_stemmer.stem(word) for word in question)
      lst_ques.append(string)
    return lst_ques, lst_ans

  def convert_query2vec(q):
    q = [w.lower().replace(".", "").replace("?", "").replace(",", "") for w in q.split()]
    q = [" ".join(s_stemmer.stem(w) for w in q)]
    q_vec = tfidf.transform(q)
    key_word_q = tfidf.get_feature_names_out()[q_vec.toarray().argmax()]
    return q_vec, key_word_q
  
  def cosine_similarity_func(q_vec, dtm):
    q = q_vec.todense()
    d = dtm.todense()
    unit_q =  q / np.sqrt(np.sum(np.square(q), axis = 1))
    unit_d =  d / np.sqrt(np.sum(np.square(d), axis = 1))
    similarity = unit_q.dot(unit_d.T)
    return similarity

  def pair_answer(lst_ans, scores):
    if max(scores.tolist()[0]) < threshold:
      return "We're sorry. We were not able to find a match."
    else:
      return lst_ans[scores.argsort().tolist()[0][-1]]
  
  lst_ques, lst_ans = preprocess_database(database)
  dtm = tfidf.fit_transform(lst_ques)
  q = input('Hi! I am your online sommeliere, how could I help you about wine today?\n\n')
  q_vec, key_word_q = convert_query2vec(q)
  print(f'\nThanks for your question :) The key word of your question is: {key_word_q}\n')
  scores = cosine_similarity_func(q_vec, dtm)
  prediction = pair_answer(lst_ans, scores)
  return prediction

In [3]:
Sommeliere_bot(threshold = 0.4)

Hi! I am your online sommeliere, how could I help you about wine today?

Could you recommend me the wine with aroma of tropical fruits?

Thanks for your question :) The key word of your question is: tropic



"Try our Cupcake Moscato D'Asti!"