**Problem Statement**

Create an end to end customer support and feedback chatbot


**Members**

Nikhil Shegokar | 
Namrata Dhobale | 
Sanika Pareek

# **Pre-Requisites**

In [None]:
!pip install transformers

In [None]:
import pandas as pd
import re
import torch

In [None]:
dataa = pd.read_csv('/content/drive/MyDrive/ml_training_data/cleaned_data.csv')

In [None]:
dataa = dataa[:1000]

In [None]:
question = dataa['question']
responsee = dataa['response']

# **BERT Model**

In [None]:
from transformers import BertForQuestionAnswering
model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')

Downloading:   0%|          | 0.00/443 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.25G [00:00<?, ?B/s]

In [None]:
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

In [None]:
def answer_question(question, answer_text):

    input_ids = tokenizer.encode(question, answer_text)
    sep_index = input_ids.index(tokenizer.sep_token_id)
    num_seg_a = sep_index + 1
    num_seg_b = len(input_ids) - num_seg_a
    segment_ids = [0]*num_seg_a + [1]*num_seg_b
    assert len(segment_ids) == len(input_ids)
    
    outputs = model(torch.tensor([input_ids]),
                    token_type_ids=torch.tensor([segment_ids]), 
                    return_dict=True) 

    start_scores = outputs.start_logits
    end_scores = outputs.end_logits

    answer_start = torch.argmax(start_scores)
    answer_end = torch.argmax(end_scores)

    tokens = tokenizer.convert_ids_to_tokens(input_ids)
    answer = tokens[answer_start]

    for i in range(answer_start + 1, answer_end + 1):       
        if tokens[i][0:2] == '##':
            answer += tokens[i][2:]
        else:
            answer += ' ' + tokens[i]


    if answer != '[SEP]':
      return (answer)
    else:
      return "Your query does not exist in this database"

In [None]:
temptext = "Sunset is the time of day when our sky meets the outer space solar winds. There are blue, pink, and purple swirls, spinning and twisting, like clouds of balloons caught in a whirlwind. The sun moves slowly to hide behind the line of horizon, while the moon races to take its place in prominence atop the night sky. People slow to a crawl, entranced, fully forgetting the deeds that must still be done. There is a coolness, a calmness, when the sun does set."

# **MATHEMATICAL DATA SIMILARITY INDEX**

In [None]:
def message_probability(user_message, recognised_words, single_response=False, required_words=[]):
    message_certainty = 0
    has_required_words = True

    # Counts how many words are present in each predefined message
    for word in user_message:
        if word in recognised_words:
            message_certainty += 1

    # Calculates the percent of recognised words in a user message
    percentage = float(message_certainty) / float(len(recognised_words))

    # Checks that the required words are in the string
    for word in required_words:
        if word not in user_message:
            has_required_words = False
            break

    # Must either have the required words, or be a single response
    if has_required_words or single_response:
        return int(percentage * 100)
    else:
        return 0


In [None]:
def check_all_messages(message):
    highest_prob_list = {}

    def response(bot_response, list_of_words, single_response=False, required_words=[]):
        nonlocal highest_prob_list
        highest_prob_list[bot_response] = message_probability(message, list_of_words, single_response, required_words)

    # sample responses
    response('Hello!', ['hello'], single_response=True)
    response('See you!', ['bye', 'goodbye'], single_response=True)
    response('I\'m doing fine, and you?', ['how', 'are', 'you', 'doing'], required_words=['how'])
    response('You\'re welcome!', ['thank', 'thanks'], single_response=True)
    response('Thank you!', ['i', 'love', 'code', 'palace'], required_words=['code', 'palace'])

    for i in range(len(question)):
      response(responsee[i], question[i].split(), single_response=True)

    # print(highest_prob_list)
    best_match = max(highest_prob_list, key=highest_prob_list.get)
    # print(highest_prob_list[best_match])

    return answer_question(message, temptext) if highest_prob_list[best_match] < 45 else best_match


# **Chat Bot Implementation**

In [None]:
# Used to get the response
def get_response(user_input):
    if user_input == 'quit':
      return
    split_message = re.split(r'\s+|[,;?!.-]\s*', user_input.lower())
    response = check_all_messages(split_message)
    return response

# Testing the response system
while True:
    print('Bot: ' + get_response(input("You: ")))

You: hello
Bot: Hello!
You: when does sunset occur?
Bot: when our sky meets the outer space solar winds
You: whenever i contact customer support they tell me i have shortcode enabled
Bot: please send us a private message so that we can further assist you just click message at the top of your profile
