In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import re
from utils import flatten_lol,remove_multi_space
import os
import pymongo

## CONNECT DATABASE

In [2]:
os.environ["MONGOLAB_URI"] = 'mongodb://localhost:27017'
client = pymongo.MongoClient(os.environ.get('MONGOLAB_URI'))
database = client.vinbrain
collection = database.disease

## CHECK QUERY

In [3]:
query = collection.find({"Disease" : "AIDS"})
for item in query:
    print(item)

{'_id': ObjectId('606c0a179dadc241387f6792'), 'Disease': 'AIDS', 'Symptom': ['muscle_wasting', 'extra_marital_contacts', 'high_fever', 'patches_in_throat']}


## CONVERT CONSTRAINT TO QUERY STRING

assumpt we have semantic frame from user action <br/>

semantic of user action as a dictionary: <br/>
{
"intent":<>,
"inform_slots": <>,
"request_slots": <>
} <br/>
type of "intent" field is string <br/>
type of "inform_slots" field is list of string <br/>
type of "request_slots" field is list of string


In [4]:
user_action = {}
user_action['intent'] = 'inform'
user_action['request_slots'] = {}
user_action['inform_slots'] = {'Symptom':['muscle']}

In [5]:
def convert_constraint(constraints):
    """
    input dict các thực thể theo từng slot {entity_slot:[entity_mess]}
    return câu query mongodb
    form của câu query: { "$and": [{entity_slot:{"$all":[re.compile("entity_mess")]}},{},{}] }
    """

    list_and_out = []
    list_and_in = []
    regex_constraint_dict = {}

    for keys,values in constraints.items():
#         print(values)
        if not type(values) is list:
            values = []
        for value in values:
            list_and_in.append({
                    "$or" : [
                                {
                                    keys: {
                                        "$all": [re.compile(".*{0}.*".format(value))]
                                    }
                                }
                        ]
            })

    if list_and_in:
        list_and_out.append({"$and": list_and_in})
    if list_and_out:
        regex_constraint_dict = {"$and":list_and_out}

    return regex_constraint_dict

In [6]:
constraints = user_action['inform_slots']
query_string = convert_constraint(constraints)

query_result = collection.find(query_string)
list_record_query = []
for item in query_result:
    list_record_query.append(item)

In [7]:
print('Amount records match: {}'.format(len(list_record_query)))

Amount records match: 7


## LOGIC SUGGEST

have 2 logic suggest: <br/>
    + correct symptom's name <br/>
    + count sympt max-time appear <br/>

get sympt if sympt not in list sympt inform from user

In [8]:
def check_pattern(dis_list,inp):
    pred_list=[]
    patt = "^" + inp + "$"
    regexp = re.compile(inp)
    for item in dis_list:
        if regexp.search(item):
            pred_list.append(item)
    return pred_list

In [9]:
inp = 'muscle'
dis_list = [' high_fever', ' muscle_pain', ' chills', ' headache', ' fatigue', ' vomiting', ' malaise', ' nausea', ' chest_pain', ' swelled_lymph_nodes', ' runny_nose', ' continuous_sneezing', ' back_pain', ' red_spots_over_body', ' loss_of_appetite', ' pain_behind_the_eyes', ' skin_rash', ' diarrhoea', ' sinus_pressure', ' redness_of_eyes', ' throat_irritation', ' loss_of_smell', ' sweating', ' joint_pain', ' cough', ' extra_marital_contacts', ' muscle_wasting', ' congestion', ' phlegm', ' patches_in_throat']
check_pattern(dis_list,inp)

[' muscle_pain', ' muscle_wasting']

In [10]:
user_action

{'intent': 'inform',
 'request_slots': {},
 'inform_slots': {'Symptom': ['muscle']}}

In [11]:
# last_sympt_user_inform

In [21]:
list_statistic_sympt = []
for record in list_record_query:
    for k,v in record.items():
        if k == 'Symptom':
            list_statistic_sympt += v
            
## logic correct sympt's name
last_sympt_user_inform = user_action['inform_slots']['Symptom'][-1]
list_unique_sympt_query = list(set(list_statistic_sympt))

list_correct_sympt_name = check_pattern(list_unique_sympt_query,last_sympt_user_inform)

## logic max appear

def freq_sympt_appear(list_statistic_sympt,list_unique_sympt_query):
    dict_count_sympt = {}

    for sympt in list_unique_sympt_query:
        dict_count_sympt[sympt] = list_statistic_sympt.count(sympt)

    dict_sort_sympt = dict(sorted(dict_count_sympt.items(), key=lambda item: item[1],reverse=True))
    return dict_sort_sympt

In [22]:
dict_sympt_correct_name_appear = freq_sympt_appear(list_statistic_sympt,list_correct_sympt_name)

dict_all_sympt_appear = freq_sympt_appear(list_statistic_sympt,list_unique_sympt_query)

In [25]:
dict_sympt_correct_name_appear

{'muscle_pain': 4, 'muscle_weakness': 2, 'muscle_wasting': 1}

In [28]:
dict_all_sympt_appear

{'high_fever': 4,
 'muscle_pain': 4,
 'headache': 3,
 'chills': 3,
 'fatigue': 3,
 'nausea': 3,
 'diarrhoea': 3,
 'vomiting': 3,
 'sweating': 2,
 'muscle_weakness': 2,
 'loss_of_appetite': 2,
 'joint_pain': 2,
 'malaise': 2,
 'back_pain': 1,
 'mild_fever': 1,
 'restlessness': 1,
 'swelling_joints': 1,
 'stiff_neck': 1,
 'congestion': 1,
 'dark_urine': 1,
 'excessive_hunger': 1,
 'patches_in_throat': 1,
 'phlegm': 1,
 'extra_marital_contacts': 1,
 'yellowish_skin': 1,
 'red_spots_over_body': 1,
 'sinus_pressure': 1,
 'painful_walking': 1,
 'weight_loss': 1,
 'redness_of_eyes': 1,
 'loss_of_smell': 1,
 'abdominal_pain': 1,
 'muscle_wasting': 1,
 'continuous_sneezing': 1,
 'runny_nose': 1,
 'abnormal_menstruation': 1,
 'throat_irritation': 1,
 'skin_rash': 1,
 'cough': 1,
 'fast_heart_rate': 1,
 'mood_swings': 1,
 'swelled_lymph_nodes': 1,
 'pain_behind_the_eyes': 1,
 'yellowing_of_eyes': 1,
 'irritability': 1,
 'movement_stiffness': 1,
 'chest_pain': 1}

In [10]:
# list_sympt_suggest = []

# for k in dict_sort_sympt.keys():
#     for v in list(user_action['inform_slots'].values())[0]:
#         if v not in k:
#             list_sympt_suggest.append(k)

In [11]:
list_sympt_suggest_backup = list_sympt_suggest.copy()

## GEN AGENT ACTION


semantic of agent action as a dictionary: <br/>
{
"intent":<>,
"inform_slots": <>,
"request_slots": <>
} <br/>
type of "intent" field is string <br/>
type of "inform_slots" field is list of string <br/>
type of "request_slots" field is list of string

In [12]:
agent_action = {}
## investigate
agent_action['intent'] = 'inform'

if agent_action['intent'] == 'inform':
    
    _PLACE_HOLDER = list_sympt_suggest.pop(0)
    
    agent_action['inform_slots'] = {'Symptom' : [_PLACE_HOLDER]}
    agent_action['request_slots'] = {}
else:
    
    _UNK = 'unknown'

    agent_action['inform_slots'] = {}
    agent_action['request_slots'] = {'Symptom' : [_UNK]}

In [13]:
agent_action

{'intent': 'inform',
 'inform_slots': {'Symptom': ['high_fever']},
 'request_slots': {}}

## CHATBOT SCRIPT

assumpt that when agent ask/suggest anything, user willing confirm/answer this slot <br/>

so, update user action from agent action

In [14]:
if agent_action['inform_slots']:
    current_slot = list(agent_action['inform_slots'].values())[0][0]
    
if agent_action['request_slots']:
    current_slot = list(agent_action['request_slots'].values())[0][0]
    

In [15]:
user_action

{'intent': 'inform',
 'request_slots': {},
 'inform_slots': {'Symptom': ['muscle']}}

In [16]:
if current_slot not in user_action['inform_slots']['Symptom']:
    user_action['inform_slots']['Symptom'].append(current_slot)

In [17]:
user_action

{'intent': 'inform',
 'request_slots': {},
 'inform_slots': {'Symptom': ['muscle', 'high_fever']}}

In [18]:
constraints = user_action['inform_slots']
query_string = convert_constraint(constraints)

query_result = collection.find(query_string)
list_record_query = []
for item in query_result:
    list_record_query.append(item)

In [19]:
list_record_query

[{'_id': ObjectId('606c0a179dadc241387f6792'),
  'Disease': 'AIDS',
  'Symptom': ['muscle_wasting',
   'extra_marital_contacts',
   'high_fever',
   'patches_in_throat']},
 {'_id': ObjectId('606c0a179dadc241387f679b'),
  'Disease': 'Dengue',
  'Symptom': ['headache',
   'chills',
   'fatigue',
   'loss_of_appetite',
   'high_fever',
   'muscle_pain',
   'red_spots_over_body',
   'pain_behind_the_eyes',
   'malaise',
   'joint_pain',
   'back_pain',
   'vomiting',
   'skin_rash',
   'nausea']},
 {'_id': ObjectId('606c0a179dadc241387f679c'),
  'Disease': 'Common Cold',
  'Symptom': ['runny_nose',
   'continuous_sneezing',
   'fatigue',
   'chills',
   'throat_irritation',
   'chest_pain',
   'high_fever',
   'malaise',
   'sinus_pressure',
   'congestion',
   'muscle_pain',
   'redness_of_eyes',
   'swelled_lymph_nodes',
   'loss_of_smell',
   'cough',
   'headache',
   'phlegm']},
 {'_id': ObjectId('606c0a179dadc241387f67a9'),
  'Disease': 'Malaria',
  'Symptom': ['chills',
   'sweating

In [20]:
print('Amount records match: {}'.format(len(list_record_query)))

Amount records match: 4


In [21]:
# def check_pattern(dis_list,inp):
#     pred_list=[]
#     patt = "^" + inp + "$"
#     regexp = re.compile(inp)
#     for item in dis_list:
#         if regexp.search(item):
#             pred_list.append(item)

In [22]:
def check_available(list_sympt_query,list_user_inform):
    for item in list_sympt_query:
        
        for inform in list_user_inform:
            if len(item) < len(inform):
                print('a')
                regexp = re.compile(item)
                if not regexp.search(inform):
                    return item
            else:
                
                regexp = re.compile(inform)
                if not regexp.search(item):
                    print('b',item,inform)
                    return item
            

In [23]:
list_user_inform = ['muscle', ' high_fever']
list_sympt_query = [' high_fever', ' muscle_pain', ' chills', ' headache', ' fatigue', ' vomiting', ' malaise', ' nausea', ' chest_pain', ' swelled_lymph_nodes', ' runny_nose', ' continuous_sneezing', ' back_pain', ' red_spots_over_body', ' loss_of_appetite', ' pain_behind_the_eyes', ' skin_rash', ' diarrhoea', ' sinus_pressure', ' redness_of_eyes', ' throat_irritation', ' loss_of_smell', ' sweating', ' joint_pain', ' cough', ' extra_marital_contacts', ' muscle_wasting', ' congestion', ' phlegm', ' patches_in_throat']

In [29]:
# check_available(list_sympt_query,list_user_inform)