In [None]:
#hide
%load_ext autoreload
%autoreload 2

!pip install -r ../requirements.txt > /dev/null

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
# default_exp settings

In [None]:
# export
from deeppavlov.core.common.paths import get_settings_path
from deeppavlov import configs, build_model, train_model
import json
from os import path, popen, mkdir
from shutil import copyfile
import pandas as pd
import numpy as np

from pathlib import Path
import logging
from collections import defaultdict



In [None]:
#hide
logging.basicConfig(
    #filename='example.log',
    format='%(asctime)s %(levelname)s:%(message)s',
    level=logging.DEBUG,
    datefmt='%I:%M:%S'
)

logging.info("Hello! Welcome to our automated dialog system!")
logging.debug(" Debug Log Active")
logging.warning(' Warning Log Active')
logging.error(' Error Log Active ')

09:08:43 INFO:Hello! Welcome to our automated dialog system!
09:08:43 DEBUG: Debug Log Active
09:08:43 ERROR: Error Log Active 


In [None]:
#export
def change_log_config():
    '''Change Deeppavlov configuration files to ERROR mode
    '''
    settings_file = path.join(get_settings_path(), 'log_config.json')
    #logs_key = 'disable_existing_loggers'

    settings_json = json.load(open(settings_file))
    settings_json['handlers']['file']['level'] = 'ERROR'
    settings_json['handlers']['stderr']['level'] = 'ERROR'
    settings_json['handlers']['stdout']['level'] = 'ERROR'
    settings_json['handlers']['uvicorn_handler']['level'] = 'ERROR'

    settings_json['loggers']['deeppavlov']['level'] = 'ERROR'
    settings_json['loggers']['deeppavlov']['propagate'] = True

    settings_json['loggers']['uvicorn.access']['level'] = 'ERROR'
    settings_json['loggers']['uvicorn.access']['propagate'] = True

    settings_json['loggers']['uvicorn.error']['level'] = 'ERROR'
    settings_json['loggers']['uvicorn.error']['propagate'] = True

    #settings_json[logs_key] = False

    json.dump(settings_json, open(settings_file, 'w'))


def run_shell_installs():
    ''' Run install commands
    '''
    logging.info(f'..Installing NLP libraries')
    change_log_config()

    command_strings = (
        ' pip install deeppavlov', ' python -m deeppavlov install squad',
        ' python -m deeppavlov install squad_bert',
        ' python -m deeppavlov install fasttext_avg_autofaq',
        ' python -m deeppavlov install fasttext_tfidf_autofaq',
        ' python -m deeppavlov install tfidf_autofaq',
        ' python -m deeppavlov install tfidf_logreg_autofaq ',
        ' python -m deeppavlov install tfidf_logreg_en_faq'
    )
    for command in command_strings:

        logging.debug(command)
        logging.debug(popen(command).read())

# Dialog System
> Question Answering Automated Dialog System

In [None]:
#hide
run_shell_installs()

09:08:43 INFO:..Installing NLP libraries
09:08:43 DEBUG: pip install deeppavlov

09:08:46 DEBUG: python -m deeppavlov install squad

09:08:49 DEBUG: python -m deeppavlov install squad_bert
Collecting git+https://github.com/deepmipt/bert.git@feat/multi_gpu
  Cloning https://github.com/deepmipt/bert.git (to revision feat/multi_gpu) to /tmp/pip-req-build-d2gazx8s
Building wheels for collected packages: bert-dp
  Building wheel for bert-dp (setup.py): started
  Building wheel for bert-dp (setup.py): finished with status 'done'
  Created wheel for bert-dp: filename=bert_dp-1.0-py3-none-any.whl size=23580 sha256=eef14d7cb585547b2cca8fa9bd8d298a9240b5c271db0013f172f4af5bcc289d
  Stored in directory: /tmp/pip-ephem-wheel-cache-ji69l2y6/wheels/44/29/b2/ee614cb7f97ba5c2d220029eaede3af4b74331ad31d6e2f4eb
Successfully built bert-dp

09:08:57 DEBUG: python -m deeppavlov install fasttext_avg_autofaq

09:09:01 DEBUG: python -m deeppavlov install fasttext_tfidf_autofaq

09:09:05 DEBUG: python -m deepp

In [None]:
#export
def action_over_list_f(arr, v):
    ''' v[0] and v[1] are dictionaries
        arr is array of dictionaries 
    '''

    k_id, v_id = next(iter(v[0].items()))

    for p, a in enumerate(arr):
        if k_id in a.keys() and a[k_id] == v_id:
            for k_rep, v_rep in v[1].items():
                arr[p][k_rep] = v_rep


def replacement_f(model_config, **args):
    '''Replaces the model config dictionary with new values
    '''
    for k, v in args.items():
        if isinstance(v, dict):
            replacement_f(model_config[k], **v)
        if isinstance(v, str):
            model_config[k] = v
        if isinstance(model_config[k], list):
            action_over_list_f(model_config[k], v)

In [None]:
# test action_over_list_f
from random import randint


def gen_list_keys_for_tests():
    '''This is functio is used for tests
    '''

    str_n = lambda x: f'{x}_{randint(1,10):1}'
    gen_dict_list = lambda: {
        'id': str_n('id'),
        'key1': str_n('v1'),
        'key2': str_n('v2'),
        'key3': str_n('v3')
    }

    pipe_list = [gen_dict_list() for _ in range(randint(3, 10))]

    rand_id = pipe_list[randint(0, len(pipe_list) - 1)]['id']
    rand_key = f'key{randint(1, 3)}' 

    new_rand_val = str_n('new')
    args = {
        'chains': {
            'pipe': [{
                'id': rand_id
            }, {
               rand_key : new_rand_val
            }]
        }
    }

    return pipe_list, rand_id, rand_key, args, new_rand_val


def test_action_over_list_f():


    pipe_list, rand_id, rand_key, args, new_rand_val = gen_list_keys_for_tests()

    assert all(
        new_rand_val not in pipe_elem.values() for pipe_elem in pipe_list
    )

    action_over_list_f(pipe_list, args['chains']['pipe'])

    assert any(
        rand_key in pipe_elem.keys() and
        new_rand_val in pipe_elem.values() for pipe_elem in pipe_list
    )


def test_replacement_f_list():

    pipe_list, rand_id, rand_key, args, new_rand_val = gen_list_keys_for_tests()

    mod_conf = {'chains': {'pipe': pipe_list}}

    assert all(
        new_rand_val not in pipe_elem.values()
        for pipe_elem in mod_conf['chains']['pipe']
    )

    replacement_f(model_config=mod_conf, **args)
    assert any(
        rand_key in pipe_elem.keys() and
        new_rand_val in pipe_elem.values()
        for pipe_elem in mod_conf['chains']['pipe']
    )


def test_replacement_f_val():
    args = {'key3': 'newvalue'}
    mod_conf = {'key1': 'val1', 'key2': 'val2', 'key3': 'val3'}
    replacement_f(model_config=mod_conf, **args)
    assert all(
        arg_k in mod_conf.keys() and arg_v in mod_conf.values()
        for arg_k, arg_v in args.items()
    )


def test_replacement_f_dict():
    args = {'1_key_3': {'2_key_2': 'newvalue'}}
    mod_conf = {'1_key_3': {'2_key_2': 'oldvalue'}, '0_key_': '0_val'}
    replacement_f(model_config=mod_conf, **args)
    assert mod_conf['1_key_3']['2_key_2'] == 'newvalue'


test_action_over_list_f()
test_replacement_f_list()
test_replacement_f_val()
test_replacement_f_dict()

In [None]:
#export
def updates_faq_config_file(
    configs_path,
    **args
):
    '''Updates deepplavov json config file 
    '''
    #set FAQ data in config file
    model_config = json.load(open(configs_path))

    if 'data_url' in model_config['dataset_reader']:
        del model_config['dataset_reader']['data_url']

    replacement_f(model_config=model_config,**args)

    json.dump(model_config, open(configs_path, 'w'))

In [None]:
#test updates_faq_config_file
import tempfile
from shutil import copyfile


def gen_list_keys_for_tests():

    str_n = lambda x: f'{x}_{randint(1,10):1}'
    gen_dict_list = lambda: {
        'id': str_n('id'),
        'key1': str_n('v1'),
        'key2': str_n('v2'),
        'key3': str_n('v3')
    }

    pipe_list = [gen_dict_list() for _ in range(randint(3, 10))]

    rand_id = pipe_list[randint(0, len(pipe_list) - 1)]['id']
    rand_key =  f'key{randint(1, 3)}' 

    new_rand_val = str_n('new')
    pipe_dict = {'pipe': [{'id': rand_id}, {rand_key: new_rand_val}]}
    args = {'chainer': pipe_dict}

    return pipe_list, rand_id, rand_key, args, new_rand_val


def test_updates_faq_config_file_update_string():

    with tempfile.TemporaryDirectory() as tmpdirname:

        tmp_config_file = path.join(tmpdirname, 'tmp_file.json')

        copyfile(configs.faq.tfidf_logreg_en_faq, tmp_config_file)

        assert path.isfile(tmp_config_file)

        updates_faq_config_file(
            configs_path=tmp_config_file,
            dataset_reader={'data_path': 'fictional_csv_file.csv'}
        )

        config_json = json.load(open(tmp_config_file))
        assert 'data_path' in config_json['dataset_reader']


def test_updates_faq_config_file_update_list():

    with tempfile.TemporaryDirectory() as tmpdirname:

        tmp_config_file = path.join(tmpdirname, 'tmp_file.json')

        pipe_list, rand_id, rand_key, args, new_rand_val = gen_list_keys_for_tests(
        )
        mod_conf = {
            'chainer': {
                'pipe': pipe_list
            },
            'dataset_reader': 'dataset_reader_dictionary'
        }

        json.dump(mod_conf, open(tmp_config_file, 'w'))

        assert path.isfile(tmp_config_file)

        updates_faq_config_file(configs_path=tmp_config_file, **args)

        config_json = json.load(open(tmp_config_file))
   
        assert any(
            rand_key in pipe_elem.keys() and new_rand_val in pipe_elem.values()
            for pipe_elem in config_json['chainer']['pipe']
        )


test_updates_faq_config_file_update_string()
test_updates_faq_config_file_update_list()

In [None]:
#export
def select_faq_responses(faq_model, question):
    '''Calls Deeppavlov FAQ model
    '''
    return faq_model([question])[0]

In [None]:
#test faq responses
import tempfile
from shutil import copyfile


def gen_mock_csv_file(tmpdirname, faqs):

    temp_faq_csv = path.join(tmpdirname, 'tmp_faq.csv')

    pd.DataFrame(faqs).to_csv(temp_faq_csv, index=False)

    return temp_faq_csv


def gen_mock_vocab_answers(tmpdirname, vocabs):

    temp_dict_file = path.join(tmpdirname, 'temp_vocab_answers.dict')
    vocabs_text = '\n'.join(
        t + '\t' + str(f) for t, f in zip(vocabs['text'], vocabs['freq'])
    )

    f = open(temp_dict_file, 'w')
    f.write(vocabs_text)
    f.close()

    return temp_dict_file


def gen_faq_config(tmpdirname, vocab_file, faq_file):

    temp_configs_faq = path.join(tmpdirname, 'temp_config_faq.json')
    copyfile(configs.faq.tfidf_logreg_en_faq, temp_configs_faq)

    changes_dict = {'save_path': vocab_file, 'load_path': vocab_file}
    id_dict = {'id': 'answers_vocab'}

    updates_faq_config_file(
        configs_path=temp_configs_faq,
        chainer={'pipe': [id_dict, changes_dict]},
        dataset_reader={'data_path': faq_file}
    )

    return temp_configs_faq


def test_faq_response_with_minimum_faqs_in_dataframe_fail_case():
    with tempfile.TemporaryDirectory() as tmpdirname:

        faqs = {
            'Question': ['Is Covid erradicated?'],
            'Answer': ['Definitely not!']
        }

        vocabs = {'text': ['This is a vocab example'], 'freq': [1]}

        faq_file = gen_mock_csv_file(tmpdirname, faqs)
        vocab_file = gen_mock_vocab_answers(tmpdirname, vocabs)

        configs_file = gen_faq_config(tmpdirname, vocab_file, faq_file)

        try:
            select_faq_responses(
                question='Is Enrique the prettiest person in town?',
                faq_model=train_model(configs_file, download=True)
            )
            assert False
        except ValueError as e:
            assert True


def test_faq_response_with_minimum_faqs_in_dataframe_success_case():
    with tempfile.TemporaryDirectory() as tmpdirname:

        faqs = {
            'Question': ['Is Covid erradicated?', 'Who is the current POTUS?'],
            'Answer': ['Definitely not!', 'Donald Trump']
        }

        vocabs = {'text': ['This is a vocab example'], 'freq': [1]}

        faq_file = gen_mock_csv_file(tmpdirname, faqs)
        vocab_file = gen_mock_vocab_answers(tmpdirname, vocabs)

        configs_file = gen_faq_config(tmpdirname, vocab_file, faq_file)

        assert select_faq_responses(
            question='Is Enrique the prettiest person in town?',
            faq_model=train_model(configs_file, download=True)
        ) == ['Donald Trump']

        
        
def test_faq_response_with_minimum_answers_vocab_success_case():
    with tempfile.TemporaryDirectory() as tmpdirname:

        faqs = {
            'Question': ['Is Covid erradicated?', 'Who is the current POTUS?'],
            'Answer': ['Definitely not!', 'Donald Trump']
        }

        vocabs = {'text': [], 'freq': []}

        faq_file = gen_mock_csv_file(tmpdirname, faqs)
        vocab_file = gen_mock_vocab_answers(tmpdirname, vocabs)

        configs_file = gen_faq_config(tmpdirname, vocab_file, faq_file)

        select_faq_responses(
            question='Is Enrique the prettiest person in town?',
            faq_model=train_model(configs_file, download=True)
        ) == ['Donald Trump']

test_faq_response_with_minimum_faqs_in_dataframe_fail_case()
test_faq_response_with_minimum_faqs_in_dataframe_success_case()
test_faq_response_with_minimum_answers_vocab_success_case()

09:09:15 DEBUG:Starting new HTTP connection (1): files.deeppavlov.ai:80
09:09:15 DEBUG:http://files.deeppavlov.ai:80 "GET /faq/mipt/en_mipt_faq_v4.tar.gz.md5 HTTP/1.1" 200 189
09:09:15 DEBUG:Starting new HTTP connection (1): files.deeppavlov.ai:80
09:09:16 DEBUG:http://files.deeppavlov.ai:80 "GET /faq/mipt/en_mipt_faq_v4.tar.gz HTTP/1.1" 200 12276
100%|██████████| 12.3k/12.3k [00:00<00:00, 6.87MB/s]
09:09:17 DEBUG:Starting new HTTP connection (1): files.deeppavlov.ai:80
09:09:18 DEBUG:http://files.deeppavlov.ai:80 "GET /faq/mipt/en_mipt_faq_v4.tar.gz.md5 HTTP/1.1" 200 189
09:09:18 DEBUG:Starting new HTTP connection (1): files.deeppavlov.ai:80
09:09:18 DEBUG:http://files.deeppavlov.ai:80 "GET /faq/mipt/en_mipt_faq_v4.tar.gz HTTP/1.1" 200 12276
100%|██████████| 12.3k/12.3k [00:00<00:00, 5.89MB/s]
09:09:21 DEBUG:Starting new HTTP connection (1): files.deeppavlov.ai:80
09:09:21 DEBUG:http://files.deeppavlov.ai:80 "GET /faq/mipt/en_mipt_faq_v4.tar.gz.md5 HTTP/1.1" 200 189
09:09:21 DEBUG:Sta

In [None]:
#export
def select_squad_responses(
    contexts, squad_model, question, best_results=1
):
    '''Calls Deeppavlov BERT and RNET Context Question Answering
    '''
    responses = contexts.context.apply(
        lambda context: squad_model([context], [question])
    ).values
    
    logging.debug(f'Responses: {responses}')
    top_responses = [
        r[0][0] for r in sorted(responses, key=lambda x: -1 * x[2][0])
        [:best_results]
    ]

    logging.debug(f'Top Responses: {top_responses}')
    return responses, top_responses

In [None]:
#test select_squad_responses
import tempfile
from shutil import copyfile

empty = {'topic': [], 'context': []}
spacex = {
    'topic': ['SpaceX'],
    'context':
        [
            '''Space Exploration Technologies Corp., trading as SpaceX, is an American aerospace manufacturer and space transportation
services company headquartered in Hawthorne, California. It was founded in 2002 by Elon Musk with the goal of reducing space 
transportation costs to enable the colonization of Mars. SpaceX has developed several launch vehicles, the Starlink satellite
constellation, and the Dragon spacecraft. It is widely considered among the most successful private spaceflight companies.'''
        ]
}

intekglobal = {
    'topic': ['Intekglobal', 'InG'],
    'context':
        [
            'Intekglobal has its headquarters located in TJ',
            'Intekglobal is in the north of mexico'
        ]
}


def assert_squad_model(
    contexts, squad_model, question, expected_responses, **args
):
    responses, top_responses = select_squad_responses(
        contexts=pd.DataFrame(contexts),
        squad_model=squad_model,
        question=question,
        **args
    )
    assert top_responses == expected_responses


def test_squad_bert():

    bert = build_model(configs.squad.squad_bert, download=True)

    assert_squad_model(
        empty,
        bert,
        'Is an empty response expected?',
        expected_responses=[],
        best_results=2
    )

    assert_squad_model(
        spacex, bert, 'Who founded SpaceX?', expected_responses=['Elon Musk']
    )

    assert_squad_model(
        intekglobal,
        bert,
        'Where is Intekglobal located?',
        expected_responses=['north of mexico','TJ'],
        best_results=2
    )


def test_squad_rnet():

    bert = build_model(configs.squad.squad, download=True)

    assert_squad_model(
        empty,
        bert,
        'Is an empty response expected?',
        expected_responses=[],
        best_results=5
    )

    assert_squad_model(
        spacex, bert, 'Who founded SpaceX?', expected_responses=['Elon Musk']
    )

    assert_squad_model(
        intekglobal,
        bert,
        'Where is Intekglobal located?',
        expected_responses=['north of mexico','TJ'],
        best_results=2
    )

test_squad_bert()
test_squad_rnet()
del spacex, empty, intekglobal

09:09:27 DEBUG:Starting new HTTP connection (1): files.deeppavlov.ai:80
09:09:28 DEBUG:http://files.deeppavlov.ai:80 "GET /deeppavlov_data/bert/cased_L-12_H-768_A-12.zip.md5 HTTP/1.1" 200 386
09:09:29 DEBUG:Starting new HTTP connection (1): files.deeppavlov.ai:80
09:09:29 DEBUG:http://files.deeppavlov.ai:80 "GET /deeppavlov_data/squad_bert.tar.gz.md5 HTTP/1.1" 200 184
09:09:46 INFO:Restoring parameters from /home/jovyan/.deeppavlov/models/squad_bert/model
09:09:47 DEBUG:Responses: []
09:09:47 DEBUG:Top Responses: []
09:09:49 DEBUG:Responses: [list([['Elon Musk'], [203], [50257280.0]])]
09:09:49 DEBUG:Top Responses: ['Elon Musk']
09:09:50 DEBUG:Responses: [list([['TJ'], [44], [6978.86279296875]])
 list([['north of mexico'], [22], [81567.328125]])]
09:09:50 DEBUG:Top Responses: ['north of mexico', 'TJ']
09:09:50 DEBUG:Starting new HTTP connection (1): files.deeppavlov.ai:80
09:09:51 DEBUG:http://files.deeppavlov.ai:80 "GET /embeddings/wiki-news-300d-1M-char.vec.md5 HTTP/1.1" 200 61
09:09

In [None]:
#export
def load_qa_models(
    config_rnet=configs.squad.squad,
    config_bert=configs.squad.squad_bert,
    config_tfidf=configs.faq.tfidf_logreg_en_faq,
    download=True
):
    ''' Load the squad and faq models
    '''
    qa_models = {
        'squad':
            {
                'rnet': build_model(config_rnet, download=download),
                'bert': build_model(config_bert, download=download)
            },
        'faq': {
            'tfidf': train_model(config_tfidf, download=download)
        }
    }
    return qa_models


def format_responses(question, responses):
    '''Format question-response pair
    '''
    formatted_response = f'{question}:\n\n'
    for k, res in enumerate(responses):
        formatted_response += f'{k+1}: {res}\n'
    return formatted_response


def get_responses(contexts, question, qa_models, nb_squad_results=1):
    ''' Get response from a quesiton using qa_models and contexts
    '''
    responses = []
    for squad_model in qa_models['squad'].values():
        responses.extend(
            select_squad_responses(
                contexts, squad_model, question, best_results=nb_squad_results
            )[1]
        )
    for faq_model in qa_models['faq'].values():
        responses.extend(select_faq_responses(faq_model, question))
    return responses, format_responses(
        question, set([r for r in responses if r.strip()])
    )

In [None]:
# test get_responses
import tempfile
from shutil import copyfile

intekglobal_context = {
    'topic': ['Intekglobal', 'InG'],
    'context':
        [
            'Intekglobal has its headquarters located in TJ',
            'Intekglobal is in the north of mexico'
        ]
}

intekglobal_faqs = {
    'Question': ['Is Intekglobal an IT company?', 'Where can I apply?'],
    'Answer':
        ['Yes it is!', 'Please refer the our website for further information']
}


def mock_faq_files(tmpdirname, faqs):

    faq_files = {
        'data': path.join(tmpdirname, 'temp_faq.csv'),
        'config': path.join(tmpdirname, 'temp_config_faq.json')
    }

    pd.DataFrame(faqs).to_csv(faq_files['data'], index=False)
    copyfile(configs.faq.tfidf_logreg_en_faq, faq_files['config'])

    updates_faq_config_file(
        configs_path=faq_files['config'],
        dataset_reader={'data_path': faq_files['data']}
    )

    return faq_files


def test_get_intekglobal_responses():
    with tempfile.TemporaryDirectory() as tmpdirname:

        faq_files = mock_faq_files(tmpdirname, intekglobal_faqs)
        qa_models = load_qa_models(
            config_tfidf=faq_files['config'], download=False
        )

        responses, format_responses = get_responses(
            pd.DataFrame(intekglobal_context),
            'Where is Intekglobal?',
            qa_models,
            nb_squad_results=2
        )

        logging.debug(responses)
        logging.debug(format_responses)
        assert all(
            response in ('north of mexico', 'TJ', 'Yes it is!')
            for response in responses
        )


def test_get_responses_with_empty_context():
    with tempfile.TemporaryDirectory() as tmpdirname:
        min_faqs = {'Question': ['Minimum number of questions?','This is the other question?'], 'Answer': ['Two','yes']}
        faq_files = mock_faq_files(tmpdirname, min_faqs)

        qa_models = load_qa_models(
            config_tfidf=faq_files['config'], download=False
        )
        empty_context = {'topic': [], 'context': []}

        responses, format_responses = get_responses(
            pd.DataFrame(empty_context),
            'What is the minimun number of FAQ questions',
            qa_models,
            nb_squad_results=2
        )

        logging.debug(responses)
        logging.debug(format_responses)
        assert responses == ['Two']


test_get_intekglobal_responses()
test_get_responses_with_empty_context()

del intekglobal_context

09:10:40 INFO:Restoring parameters from /home/jovyan/.deeppavlov/models/squad_model/model
09:10:59 INFO:Restoring parameters from /home/jovyan/.deeppavlov/models/squad_bert/model
09:11:04 DEBUG:Responses: [list([['TJ'], [44], [120.95974731445312]])
 list([['north of mexico'], [22], [174602.40625]])]
09:11:04 DEBUG:Top Responses: ['north of mexico', 'TJ']
09:11:07 DEBUG:Responses: [list([['TJ'], [44], [22507.34375]])
 list([['north of mexico'], [22], [269778.3125]])]
09:11:07 DEBUG:Top Responses: ['north of mexico', 'TJ']
09:11:07 DEBUG:['north of mexico', 'TJ', 'north of mexico', 'TJ', 'Yes it is!']
09:11:07 DEBUG:Where is Intekglobal?:

1: north of mexico
2: TJ
3: Yes it is!

09:11:30 INFO:Restoring parameters from /home/jovyan/.deeppavlov/models/squad_model/model
09:11:59 INFO:Restoring parameters from /home/jovyan/.deeppavlov/models/squad_bert/model
09:12:06 DEBUG:Responses: []
09:12:06 DEBUG:Top Responses: []
09:12:06 DEBUG:Responses: []
09:12:06 DEBUG:Top Responses: []
09:12:06 DE

In [None]:
#export
def get_input(text):
    '''This redundancy is needed for testing'''
    return input(text)


def question_response(data, qa_models, num_returned_values_per_squad_model=1):
    ''' Receive response and call get_response()
    '''
    question = get_input('Introduce question:\n')

    _, formatted_responses = get_responses(
        data['context']['df'], question, qa_models, nb_squad_results=1
    )
    
    return question, formatted_responses

In [None]:
##Test FAQ dialog system's part

In [None]:
import tempfile
from unittest.mock import patch
from shutil import copyfile
from collections import defaultdict

def mock_faq_files(tmpdirname, faqs, faq_dic):

    faq_dic['path'] = path.join(tmpdirname, 'temp_faq.csv')
    faq_dic['config'] = path.join(tmpdirname, 'temp_config_faq.json')
    faq_dic['df'] = pd.DataFrame(faqs)
    faq_dic['df'].to_csv(faq_dic['path'], index=False)

    copyfile(configs.faq.tfidf_logreg_en_faq, faq_dic['config'])

    updates_faq_config_file(
        configs_path=faq_dic['config'],
        dataset_reader={'data_path': faq_dic['path']}
    )


def mock_context_file(tmpdirname, contexts, context_dic):

    context_dic['path'] = path.join(tmpdirname, 'temp_context.csv')
    context_dic['df'] = pd.DataFrame(contexts)
    context_dic['df'].to_csv(context_dic['path'], index=False)


@patch('__main__.get_input')
def test_context_response_with_no_updates(mock_input):
    mock_input.side_effect = ['Who is Enrique Jimenez?']
    data = {'context': defaultdict(str), 'faq': defaultdict(str)}
    contexts = {
        'context':
            [
                'Intekglobal has its headquarters located in TJ',
                'In Intekglobal we care about you',
                '''Enrique Jimenez is one of the smartest minds on the planet, 
                   he currently works as Intekglobal employee'''
            ],
        'topic': ['headquarters', 'mission', 'Enrique\'s biography']
    }

    faqs = {
        'Question':
            ['Minimum number of questions?', 'This is the other question?'],
        'Answer': ['Two', 'yes']
    }

    with tempfile.TemporaryDirectory() as tmpdirname:

        mock_faq_files(tmpdirname, faqs, data['faq'])
        mock_context_file(tmpdirname, contexts, data['context'])

        qa_models = load_qa_models(
            config_tfidf=data['faq']['config'], download=False
        )

        question,responses = question_response(data, qa_models)
        logging.debug(f'  {question}')
        logging.debug(f'  {responses}')
        assert 'Who is Enrique Jimenez?' == question
        assert 'one of the smartest minds on the planet' in responses

test_context_response_with_no_updates()

09:12:37 INFO:Restoring parameters from /home/jovyan/.deeppavlov/models/squad_model/model
09:13:10 INFO:Restoring parameters from /home/jovyan/.deeppavlov/models/squad_bert/model
09:13:20 DEBUG:Responses: [list([['Intekglobal'], [0], [160.03579711914062]])
 list([['Intekglobal'], [3], [244.86245727539062]])
 list([['one of the smartest minds on the planet'], [19], [1664652.0]])]
09:13:20 DEBUG:Top Responses: ['one of the smartest minds on the planet']
09:13:24 DEBUG:Responses: [list([[''], [-1], [0.0011892060283571482]])
 list([[''], [-1], [0.01691678911447525]])
 list([['one of the smartest minds on the planet, \n                   he currently works as Intekglobal employee'], [19], [18812.87109375]])]
09:13:24 DEBUG:Top Responses: ['one of the smartest minds on the planet, \n                   he currently works as Intekglobal employee']
09:13:24 DEBUG:  Who is Enrique Jimenez?
09:13:24 DEBUG:  Who is Enrique Jimenez?:

1: one of the smartest minds on the planet, 
                   

In [None]:
#export
def new_question_answer(data, qa_models):
    ''' Asks for a new question-answer pair; store the result in the 
        faq dataframe and retrain the faq-model
    '''
    
    question = get_input('Introduce question:\n')

    new_faq = pd.DataFrame(
        {
            'Question': [question],
            'Answer': [get_input('Introduce the answer:\n')]
        }
    )
    data['faq']['df'] = data['faq']['df'].append(new_faq)
    data['faq']['df'].to_csv(data['faq']['path'], index=False)
    qa_models['faq']['tfidf'] = train_model(
        data['faq']['config'], download=False
    )
    logging.info('FAQ dataset and model updated..')

In [None]:
#tests


@patch('__main__.get_input')
def test_new_question_answer(mock_input):
    question = 'What is Intekglobal?'
    new_answer = 'Intekglobal is one of the best companies in the world'
    mock_input.side_effect = [question, new_answer]

    data = {'context': defaultdict(str), 'faq': defaultdict(str)}

    faqs = {
        'Question': ['Who  owns Tesla Company?', 'Is this is heaven?'],
        'Answer': [
            'Elon Musk is the owner of Tesla', 'No, it is life on earth'
        ]
    }

    with tempfile.TemporaryDirectory() as tmpdirname:

        mock_faq_files(tmpdirname, faqs, data['faq'])
        qa_models = load_qa_models(
            config_tfidf=data['faq']['config'], download=False
        )
        new_question_answer(data, qa_models)
        updated_faq = pd.read_csv(data['faq']['path'])

        assert updated_faq[updated_faq['Answer'] == new_answer].shape[0] == 1


test_new_question_answer()

09:13:50 INFO:Restoring parameters from /home/jovyan/.deeppavlov/models/squad_model/model
09:14:23 INFO:Restoring parameters from /home/jovyan/.deeppavlov/models/squad_bert/model
09:14:35 INFO:FAQ dataset and model updated..


In [None]:
#export 
def new_context(data):
    ''' Stores the new context in the context dataframe
    '''

    new_context = pd.DataFrame(
        {
            'topic': [get_input('Give context a title:\n')],
            'context': [get_input('Introduce the context:\n')]
        }
    )
    data['context']['df'] = data['context']['df'].append(new_context)
    data['context']['df'].to_csv(data['context']['path'], index=False)

    logging.info('contexts dataset updated..')



In [None]:
@patch('__main__.get_input')
def test_new_context(mock_input):
    data = {'context': defaultdict(str), 'faq': defaultdict(str)}

    new_topic = 'AI Tool & Chatbot Development'
    new_context_str = '''

A chatbot is an important tool for simulating intelligent conversations with humans.
Intekglobal chatbots efficiently live message on platforms such as Facebook Messenger, 
Slack, and Telegram. But chatbots are more than just a cool technology advancement.

'''
    contexts = {
        'context':
            [
                '''One of the greatest punk rock bands from all the time
                is the Ramones.
                '''
            ],
        'topic': ['Ramones']
    }
    
    mock_input.side_effect = [new_topic, new_context_str]
    with tempfile.TemporaryDirectory() as tmpdirname:

        
        logging.debug(str(new_context))
        mock_context_file(tmpdirname, contexts, data['context'])
        new_context(data)
        updated_faq = pd.read_csv(data['context']['path'])

        assert updated_faq[updated_faq.topic == new_topic].shape[0] == 1


test_new_context()

09:14:35 DEBUG:<function new_context at 0x7f5d18456680>
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,
09:14:35 INFO:contexts dataset updated..


In [None]:
#export
def set_minimal_faq_questions(data):
    ''' Sets the faq configurations that assure a proper operation
    '''
    if data['df'].shape[0] > 1:
        return
    minimal_questions = [
        'Is this the Intekglobal Dialog System?',
        'What is the purpose of these two automated questions?'
    ]
    minimal_answers = [
        'This is the Intekglobal Dialog System', 'To populate the FAQ data file'
    ]
    minimal_faqs_df = pd.DataFrame(
        {
            'Question': minimal_questions,
            'Answer': minimal_answers
        }
    )
    data['df'] = pd.concat([data['df'], minimal_faqs_df])
    data['df'].to_csv(data['path'], index=False)
    logging.info(f' File created at {data["path"]}')


def set_minimal_contexts(data):
    ''' Sets the context configurations that assure a proper operation
    '''
    if data['df'].shape[0] > 0:
        return

    minimal_context_df = pd.DataFrame({'topic': [], 'context': []})
    data['df'] = minimal_context_df
    data['df'].to_csv(data['path'], index=False)

    logging.info(f' File created at {data["path"]}')


def set_data_dict(file, data, question_type, data_dir):
    '''Creates unexistent files
    '''

    data['path'] = file if file is not None else path.join(
        data_dir, question_type + '_data.csv'
    )

    data['df'] = pd.read_csv(data['path']) if path.isfile(data['path']
                                                         ) else pd.DataFrame()

    if question_type == 'faq':
        set_minimal_faq_questions(data)
    if question_type == 'context':
        set_minimal_contexts(data)


def load_and_prepare_data(context_data_file, faq_data_file, data, configs_faq):
    '''Calls the context and faq configuration routines
    '''

    PARENT_DIR = popen('$PWD').read().strip()

    if faq_data_file or context_data_file is None:
        DATA_DIR = path.join(PARENT_DIR, 'data')

        if not path.isdir(DATA_DIR):
            mkdir(DATA_DIR)
            logging.info(f'Data directory created at {DATA_DIR}')

    if configs_faq is None:
        configs_faq = configs.faq.tfidf_logreg_en_faq

    data['faq']['config'] = configs_faq

    set_data_dict(
        file=faq_data_file,
        data=data['faq'],
        question_type='faq',
        data_dir=DATA_DIR
    )
    set_data_dict(
        file=context_data_file,
        data=data['context'],
        question_type='context',
        data_dir=DATA_DIR
    )

    updates_faq_config_file(
        configs_path=configs_faq,
        dataset_reader={'data_path': data['faq']['path']}
    )

In [None]:
#tests
import tempfile,logging
import pandas as pd
from collections import defaultdict
from shutil import rmtree
from os import path,popen
from unittest.mock import patch
#from let_me_answer_for_you.settings import *


def test_set_minimal_faqs_with_more_than_one_question():
    with tempfile.TemporaryDirectory() as tmpdirname:
        data_file = path.join(tmpdirname, 'tmp_data.csv')
        questions = ['a?', 'b?']
        answers = ['a', 'b']
        df = pd.DataFrame({'Question': questions, 'Answer': answers})
        df.to_csv(data_file, index=False)
        data = {'df': df, 'path': data_file}
        set_minimal_faq_questions(data)

        assert data['df'].shape[0] == 2


def test_set_minimal_faqs_with_less_than_two_questions():
    with tempfile.TemporaryDirectory() as tmpdirname:
        data_file = path.join(tmpdirname, 'tmp_data.csv')
        questions = ['a?']
        answers = ['a']
        df = pd.DataFrame({'Question': questions, 'Answer': answers})
        df.to_csv(data_file, index=False)
        data = {'df': df, 'path': data_file}

        assert data['df'].shape[0] == 1

        set_minimal_faq_questions(data)

        assert data['df'].shape[0] == 3
        assert any(
            data['df'].Question == 'Is this the Intekglobal Dialog System?'
        )


def test_set_minimal_contexts():
    with tempfile.TemporaryDirectory() as tmpdirname:
        data_file = path.join(tmpdirname, 'tmp_data.csv')
        data = {'df': pd.DataFrame(), 'path': data_file}
        set_minimal_contexts(data)
        assert path.isfile(data['path'])
        assert all(data['df'].columns == ['topic', 'context'])


def test_set_data_dict_no_file():
    with tempfile.TemporaryDirectory() as tmpdirname:
        data = {'context': defaultdict(str)}
        set_data_dict(
            file=None,
            data=data['context'],
            data_dir=tmpdirname,
            question_type='context'
        )
        logging.debug(data)
        assert path.isfile(data['context']['path'])


@patch('__main__.popen')
def test_load_and_prepare_data(mock_popen):
    with tempfile.TemporaryDirectory() as tmpdirname:
        mock_popen("$PWD").read().strip.side_effect = [tmpdirname]
        data = {'context': defaultdict(str), 'faq': defaultdict(str)}

        load_and_prepare_data(
            context_data_file=None,
            faq_data_file=None,
            data=data,
            configs_faq=None
        )
        data_dir = path.join(tmpdirname, 'data')
        #config_file = path.join(tmpdirname,'config_faq.json')
        assert path.isdir(data_dir)


test_set_minimal_faqs_with_more_than_one_question()
test_set_minimal_faqs_with_less_than_two_questions()
test_set_minimal_contexts()
test_set_data_dict_no_file()
test_load_and_prepare_data()

09:14:35 INFO: File created at /tmp/tmpe1fzbn5r/tmp_data.csv
09:14:35 INFO: File created at /tmp/tmpi4mgn_tl/tmp_data.csv
09:14:35 INFO: File created at /tmp/tmp006qpt5n/context_data.csv
09:14:35 DEBUG:{'context': defaultdict(<class 'str'>, {'path': '/tmp/tmp006qpt5n/context_data.csv', 'df': Empty DataFrame
Columns: [topic, context]
Index: []})}
09:14:35 INFO:Data directory created at /tmp/tmpn3sct0bz/data
09:14:35 INFO: File created at /tmp/tmpn3sct0bz/data/faq_data.csv
09:14:35 INFO: File created at /tmp/tmpn3sct0bz/data/context_data.csv
