In [39]:
import pandas as pd

In [44]:
class ChatBot():

    def __init__(self, vector_db_path: str, context_len: int =3, 
                 similarity_type: str = 'retrieval', character: str = 'Leonard', 
                 verbose: int =0):
        '''
        Bot makes conversation session and reply to you.
        This construcor reads heavy vectorized file/database to look for suitable answers during conversation so it take a wile
        Inputs:
            - vector_db_path:path to file wich will be used to retriev answers and context
            - contexlen: max number of previous question and answers (both counts) to take into account during retrieval preocess (depth of context),  must be in [0,5]
            - similarity_type: method to find appropriate answer. Only 'retrieval' implemented right now
            - character: character of bot. Do not change if you don't participate in data gathering
            - verbose: used for debugging (print extra info after each reply)
        '''
        self.context_len = context_len
        self.prev_conversation = []
        self.qa_iter_count = 0 
        self.current_q = ''
        self.current_a = ''
        self.character = character.lower()
        self.session_stop_word = 'stop'
        self.debug_verbose = verbose
        self.similarity_type = similarity_type
        
        if similarity_type != 'retrieval':
            raise ValueError(f'{similarity_type=} not implemented yet, see documntation')

        if context_len < 0 or context_len > 5:
            raise ValueError(f'{context_len=} not in [0...5] range. Dataset for retrieval algorithm was build with maximum 5 previous interactions')

        # self.vdf = pd.read_excel('C:\\Users\\satyr\\Documents\\edu\\nlp2\\hw1\\data\\eng_script_vectorized.xlsx')
        if verbose !=0: print('Reading vector db, wait a bit...')
        self.vdf = pd.read_excel(vector_db_path)
        if self.character not in self.vdf['person'].str.lower().unique():
            raise ValueError(f"{self.character=} is absent is training data. Use one of {self.vdf['person'].str.lower().unique()}")
        else:
            self.vdf = self.vdf[self.vdf['person'].str.lower() == self.character]
            if verbose !=0: print(f"Working dataset shape: {self.vdf.shape}")

    def qa_iter(self, question: str = '') -> str:
        '''
        Single iteration of question and answer. Listen your question, find appropriate answer, log both for future interactions
        '''
        self.current_q = question.lower()
        if len(self.prev_conversation) > self.context_len:
            self.prev_conversation.pop(0)
        self.prev_conversation.append(self.current_q)
        self.qa_iter_count += 1
        self.current_a = 'ok, nice to hear from you'.lower()
        if len(self.prev_conversation) > self.context_len:
            self.prev_conversation.pop(0)
        self.prev_conversation.append(self.current_a)
        return self.current_a

    def qa_session(self):
        '''
        this function are used only for debugging in local env, use Telegram Bot wrapper in prod
        '''
        if self.qa_iter_count == 0:
            print(f"Hey, I'm {self.character} from TBB. Print '{self.session_stop_word}' to reset session.\n")
        while True:
            self.current_q = input().lower()
            if self.current_q == 'stop':
                print("Good bye.")
                break
            self.qa_iter(self.current_q)
            print(f"You said: '{self.current_q}'. Answer is {self.current_a}\n")
            if self.debug_verbose !=0:
                print(self.__dict__)

        
        

bot = ChatBot(vector_db_path = "C:\\Users\\satyr\\Documents\\edu\\nlp2\\hw1\\data\\eng_script_vectorized.xlsx",
              context_len = 3, 
              similarity_type = 'retrieval', 
              character = 'Leonard',
              verbose=1 )
bot.qa_session()

Reading vector db, wait a bit...
Working dataset shape: (8689, 23)
Hey, I'm leonard from TBB. Print 'stop' to reset session.



 stop


Good bye.


In [17]:
# user_input = ''
# while True:
#     user_input = input()
#     print(f'You said: {user_input}')
#     if user_input.lower() == 'stop':
#         break

 Hello leonard


You said: Hello leonard


 you hear me good!


You said: you hear me good!


 stop


You said: stop
