In [None]:
#hide
%load_ext autoreload
%autoreload 2

In [None]:
# default_exp dialog_system

In [None]:
# export
from let_me_answer_for_you import settings
import deeppavlov
import logging
from unittest.mock import patch
from collections import defaultdict

import pandas as pd 

logging.basicConfig(
    #filename='example.log',
    format='%(asctime)s %(levelname)s:%(message)s',
    level=logging.ERROR,
    datefmt='%I:%M:%S'
)

logging.debug(" Debug Log Active")
logging.info("Hello! Welcome to our automated dialog system!")
logging.warning(' Warning Log Active')

# Dialog System 
> Implements the `question_response`, `new_question_answer` and `new_context` methods

In [None]:
#export
class DialogSystem:
    ''' The DialogSystem class implements the main methods
    defined in the settings module. \n
    INPUT: \n
    - context_data_file: csv file of contexts (default: None)\n
    - faq_data_file: csv file of FAQs (default: None)\n
    - configs_faq: json config file (default: None)\n
    - download_models: Indicates if download configuration files (default: True)\n
    
    If the context or the faq files are not provided, a *data* directory with the missing files,
    will be created (in the same path where the module is running). \n
    When an instance is created, the 'run_shell_installs', 'load_and_prepare_data'
    and 'load_qa_models' of the settings module  are called. Also the *data* and *qa_models* 
    attributes are created, they store the dataframes and models information, respectively.\n
    
    
    If the dataframes are provided they must have the following columns:    
    
     1. context dataframe columns: 'topic', 'context'
     2. faq dataframe columns: 'Question, 'Answer'
    '''
    def __init__(
        self,
        context_data_file=None,
        faq_data_file=None,
        configs_faq=None,
        download_models=True
    ):
        settings.run_shell_installs()
        self.data = {'context': defaultdict(str), 'faq': defaultdict(str)}
        self.download = download_models
        settings.load_and_prepare_data(
            context_data_file=context_data_file,
            faq_data_file=faq_data_file,
            configs_faq=configs_faq,
            data=self.data
        )
        self.qa_models = settings.load_qa_models(
            config_tfidf=self.data['faq']['config'], download=self.download
        )

    def question_answer(self, question):
        ''' Gets answers to a question. \n
        INPUT: \n
        - *question* parameter \n
        The method creates the following attributes:\n
        - 'self.question' -> the input parameter \n
        - 'self.responses' -> a dict of possible responses \n
        - 'self.formatted_responses' -> a formatted string of the possible responses
        
        This method calls the functions `settings.get_response` and `settings.format_responses`
        '''

        self.question, self.responses = settings.get_responses(
            self.data['context']['df'],
            question,
            self.qa_models,
            nb_squad_results=1
        )
        self.flatten_responses, self.formatted_responses = settings.format_responses(
            self.responses
        )

    def new_question_answer(self, question, answer):
        '''Adds a new question-answer pair.\n
        INPUT:\n
        - question\n
        - answer\n        
        
        The new question-answer pair is stored in the path *self.data['faq']['path']*
        and the models in *qa_models['faq']* get re-trained by calling the function
        `deeppavlaov.train_model` 
        
        '''
        _faq = self.data['faq']
        new_faq = pd.DataFrame({'Question': [question], 'Answer': [answer]})
        _faq['df'] = _faq['df'].append(new_faq)
        _faq['df'].to_csv(_faq['path'], index=False)
        self.qa_models['faq']['tfidf'] = deeppavlov.train_model(
            _faq['config'], download=False
        )
        self.question, self.answer = question, answer
        logging.info('FAQ dataset and model updated..')

    def new_context(self, topic, context):
        ''' Adds a new context. \n
        INPUT:\n
        - topic (The title of the context)
        - context
        
        The new context is stored in the path *self.data['context']['path']*
        '''
        _ctx = self.data['context']
        new_context = pd.DataFrame({'topic': [topic], 'context': [context]})
        _ctx['df'] = _ctx['df'].append(new_context)
        _ctx['df'].to_csv(_ctx['path'], index=False)
        self.topic, self.context = topic, context
        logging.info('contexts dataset updated..')

In [None]:
from nbdev.showdoc import *

method_list_f = lambda Foo: [func for func in dir(Foo) if callable(getattr(Foo, func)) and not func.startswith("__")]
show_doc(DialogSystem)
for method in method_list_f(DialogSystem):
    show_doc( getattr(DialogSystem, method))

<h2 id="DialogSystem" class="doc_header"><code>class</code> <code>DialogSystem</code><a href="" class="source_link" style="float:right">[source]</a></h2>

> <code>DialogSystem</code>(**`context_data_file`**=*`None`*, **`faq_data_file`**=*`None`*, **`configs_faq`**=*`None`*, **`download_models`**=*`True`*)

The DialogSystem class implements the main methods
defined in the settings module. 

INPUT: 

- context_data_file: csv file of contexts (default: None)

- faq_data_file: csv file of FAQs (default: None)

- configs_faq: json config file (default: None)

- download_models: Indicates if download configuration files (default: True)


If the context or the faq files are not provided, a *data* directory with the missing files,
will be created (in the same path where the module is running). 

When an instance is created, the 'run_shell_installs', 'load_and_prepare_data'
and 'load_qa_models' of the settings module  are called. Also the *data* and *qa_models* 
attributes are created, they store the dataframes and models information, respectively.

<h4 id="DialogSystem.new_context" class="doc_header"><code>DialogSystem.new_context</code><a href="__main__.py#L80" class="source_link" style="float:right">[source]</a></h4>

> <code>DialogSystem.new_context</code>(**`topic`**, **`context`**)

Adds a new context. 

INPUT:

- topic (The title of the context)
- context

The new context is stored in the path *self.data['context']['path']*

<h4 id="DialogSystem.new_question_answer" class="doc_header"><code>DialogSystem.new_question_answer</code><a href="__main__.py#L59" class="source_link" style="float:right">[source]</a></h4>

> <code>DialogSystem.new_question_answer</code>(**`question`**, **`answer`**)

Adds a new question-answer pair.

INPUT:

- question

- answer


The new question-answer pair is stored in the path *self.data['faq']['path']*
and the models in *qa_models['faq']* get re-trained by calling the function
`deeppavlaov.train_model` 

<h4 id="DialogSystem.question_answer" class="doc_header"><code>DialogSystem.question_answer</code><a href="__main__.py#L37" class="source_link" style="float:right">[source]</a></h4>

> <code>DialogSystem.question_answer</code>(**`question`**)

Gets answers to a question. 

INPUT: 

- *question* parameter 

The method creates the following attributes:

- 'self.question' -> the input parameter 

- 'self.responses' -> a dict of possible responses 

- 'self.formatted_responses' -> a formatted string of the possible responses

This method calls the functions `settings.get_response` and `settings.format_responses`

### Test Example

In [None]:
#test
import tempfile
from os import path
with tempfile.TemporaryDirectory() as tmpdirname:

    ds = DialogSystem(
        faq_data_file=path.join(tmpdirname, 'faq_example.csv'),
        context_data_file=path.join(tmpdirname, 'context_example.csv'),
        download_models=False
    )

    ds.question_answer(question='What is Intekglobal?')

    assert ds.question == 'What is Intekglobal?'
    assert isinstance(ds.responses['squad'], dict)
    assert isinstance(ds.responses['faq'], dict)
    
    
    logging.info(f'{ds.question} \n\n {ds.formatted_responses}')

    assert 'This should not be in the current set of responses' not in  ds.formatted_responses 
    
    logging.info(f' dict of responses: {ds.responses}')
    logging.info(f'{ds.question} \n\n {ds.formatted_responses}')

    ds.new_question_answer(
        question='What day is today?', answer='Today is the day!'
    )
    ds.question_answer(question='What day is today?')

    assert 'Today is the day!' in ds.formatted_responses

    context = '''Space Exploration Technologies Corp., trading as SpaceX,
    is an American aerospace manufacturer and space transportation services company headquartered
    in Hawthorne, California. It was founded in 2002 by Elon Musk with the goal of reducing 
    space transportation costs to enable the colonization of Mars. SpaceX has developed several launch 
    vehicles,the Starlink satellite constellation, and the Dragon spacecraft.
    '''

    ds.new_context(topic='SpaceX', context=context)
    ds.question_answer(question='What are SpaceX initials stand for?')

    logging.info(f'{ds.question} \n\n {ds.formatted_responses}')

    assert 'Space Exploration Technologies Corp.' in ds.formatted_responses

[nltk_data] Downloading package punkt to /home/jovyan/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /home/jovyan/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package perluniprops to
[nltk_data]     /home/jovyan/nltk_data...
[nltk_data]   Package perluniprops is already up-to-date!
[nltk_data] Downloading package nonbreaking_prefixes to
[nltk_data]     /home/jovyan/nltk_data...
[nltk_data]   Package nonbreaking_prefixes is already up-to-date!


In [None]:
#hide