In [1]:
import os
from sklearn.metrics.pairwise import pairwise_distances_argmin
import numpy as np

from chatterbot import ChatBot
from chatterbot.trainers import ChatterBotCorpusTrainer

from utils import text_prepare, load_embeddings, question_to_vec, unpickle_file


class ThreadRanker:
    def __init__(self):
        self.word_embeddings, self.embeddings_dim = load_embeddings('./starspace_embeddings/data/stackoverflow_duplicate.tsv')
        self.thread_embeddings_folder = './data/thread_embeddings_by_tag'

    def __load_embeddings_by_tag(self, tag_name):
        embeddings_path = os.path.join(self.thread_embeddings_folder, tag_name + ".pkl")
        thread_ids, thread_embeddings = unpickle_file(embeddings_path)
        return thread_ids, thread_embeddings

    def get_best_thread(self, question, tag_name):
        """ Returns id of the most similar thread for the question.
            The search is performed across the threads with a given tag.
        """
        thread_ids, thread_embeddings = self.__load_embeddings_by_tag(tag_name)
        
        question_vec = question_to_vec(question, self.word_embeddings, self.embeddings_dim)
        best_thread = pairwise_distances_argmin(np.array([question_vec]), thread_embeddings, metric='cosine')
        
        return thread_ids[best_thread]


class DialogueManager:
    def __init__(self):
        print("Loading resources...")

        # Intent recognition:
        self.intent_recognizer = unpickle_file('./model_artifacts/intent_recognizer.pkl')
        self.tfidf_vectorizer = unpickle_file('./model_artifacts/tfidf_vectorizer.pkl')

        self.ANSWER_TEMPLATE = 'I think its about %s\nThis thread might help you: https://stackoverflow.com/questions/%s'

        # Goal-oriented part:
        self.tag_classifier = unpickle_file('./model_artifacts/tag_classifier.pkl')
        self.thread_ranker = ThreadRanker()

    def create_chitchat_bot(self):
        """Initializes self.chitchat_bot with some conversational model."""

        self.chatbot = ChatBot('Scott')

        trainer = ChatterBotCorpusTrainer(self.chatbot)
        trainer.train("chatterbot.corpus.english")
       
    def generate_answer(self, question):
        """Combines stackoverflow and chitchat parts using intent recognition."""
        
        prepared_question = text_prepare(question) 
        features = self.tfidf_vectorizer.transform(prepared_question) 
        intent = self.intent_recognizer.predict(features) 

        # Chit-chat part:   
        if intent == 'dialogue':
            # Pass question to chitchat_bot to generate a response.       
            response = self.chatbot.get_response(question) 
            return response
        
        # Goal-oriented part:
        else:        
            # Pass features to tag_classifier to get predictions.
            tag = self.tag_classifier(features)
            
            # Pass prepared_question to thread_ranker to get predictions.
            thread_id = self.thread_ranker.get_best_thread(prepared_question, tag)
           
            return self.ANSWER_TEMPLATE % (tag, thread_id)



[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/lincoln/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
