In [1]:
from tensorflow.keras.models import model_from_json
import pickle
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

In [4]:
# load json and create model
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model.h5")
print("Loaded model from disk")

Loaded model from disk


In [5]:
# Load the tokenizer
with open('tokenizer.pkl', 'rb') as token_file:
    loaded_tokenizer = pickle.load(token_file)

In [6]:
MAX_SEQUENCE_LENGTH = 125

In [7]:
# Load the reverse mapping dictionary from the pickle file
with open('reverse_mapping.pkl', 'rb') as file:
    loaded_reverse_mapping = pickle.load(file)

In [9]:
new_complaint = ['why should i screen for diabetes']
seq = loaded_tokenizer.texts_to_sequences(new_complaint)
padded = pad_sequences(seq, maxlen=MAX_SEQUENCE_LENGTH)
pred = loaded_model.predict(padded)[0]
top_topic_indices = np.argsort(pred)[::-1][:4]
# print(pred, labels[np.argmax(pred)])
# Print the top 4 predicted topics and their probabilities
for idx in top_topic_indices:
    topic_label = loaded_reverse_mapping[idx]
    probability = pred[idx]
    print("Topic:", topic_label, "Probability:", probability)

Topic: how_can_i_prevent_diabetes Probability: 0.35229325
Topic: why_should_i_get_screened_for_diabetes Probability: 0.07603625
Topic: what_is_involved_in_screening_for_diabetes Probability: 0.07055103
Topic: how_does_vaccination_prevent_diabetes_complications Probability: 0.06846282


In [10]:
text = "Why should I screen for diabetes?"
[text]

['Why should I screen for diabetes?']

In [26]:
from sqlalchemy import create_engine, text
from dotenv import load_dotenv

from get_postgres_str import get_postgres_str

## Postgres username, password, and database name
postgres_str = get_postgres_str()


## Create the Connection
engine = create_engine(postgres_str, echo=False)

In [50]:
q = text(f'''
INSERT INTO public.message(
	from_number, received_text, translated_text, text_to_classify, language, intent, confidence, response, numbered_intents)
	VALUES ('7204001070', 'whatever', '', 'whatever', 'en', 'irrelevent', 0.2, 'response', '1.abc 2. ahsjdva 3.');
''')

In [51]:
q

<sqlalchemy.sql.elements.TextClause object at 0x11628b3d0>

In [52]:
engine = create_engine(postgres_str, echo=False)
conn = engine.connect()

In [53]:
conn.execute(q)

<sqlalchemy.engine.cursor.CursorResult at 0x1164d54e0>

In [25]:
conn.commit()

In [90]:
from_number = '17204001070'
q = text(f'''
select language, numbered_intents from public.message where from_number = '{from_number}' order by id desc limit 1;
''')
engine = create_engine(postgres_str, echo=False)
conn = engine.connect()


In [91]:
option_num = 2

In [99]:
conn.execute(q).fetchall()[0][1]

'{"1": "how_do_i_make_time_for_getting_active", "2": "what_are_ways_to_be_more_active", "3": "what_are_the_benefits_of_being_active", "4": "how_can_i_be_motivated_to_stay_active"}'

In [100]:
language = conn.execute(q).fetchall()[0][0]
options = conn.execute(q).fetchall()[0][1]
conn.close()

In [101]:
import json
json.loads(options)[str(option_num)]

'what_are_ways_to_be_more_active'

In [69]:
import re

def extract_text_until_next_integer(input_string, search_integer):
    pattern = rf'(?<={search_integer}).*?(?=\d|$)'
    match = re.search(pattern, input_string)
    
    if match:
        return match.group(0).strip()
    else:
        return None


result = extract_text_until_next_integer(var_a, 2)
print(result)

None


In [104]:
import pandas as pd
intent = 'what_are_the_benefits_of_being_active'
df = pd.read_excel('responses.xlsx',engine='openpyxl')

In [117]:
from fuzzywuzzy import fuzz

In [118]:
target_value = 'what_are_the_benefits_of_being_'

# Define a function to calculate similarity
def similarity_score(row):
    return fuzz.ratio(row['intent'], target_value)

In [121]:
temp_df = df.copy()
# Apply similarity_score function to each row and create a new column
temp_df['Similarity'] = temp_df.apply(similarity_score, axis=1)

# Find the row with the highest similarity score
filtered_row = temp_df[temp_df['Similarity'] == temp_df['Similarity'].max()].iloc[0]
filtered_row

dialog                            What are the benefits of being active?
response               There are two main benefits to being more acti...
response_1             <a href="https://www.tepeyachealth.org/new-pat...
related_intent_1                        What are ways to be more active?
related_intent_2          What are some more benefits of getting active?
related_intent_3                  How do I make time for getting active?
related_intent_4                                 What is a healthy diet?
language                                                              en
intent                             what_are_the_benefits_of_being_active
dialog_es                    ¿Cuáles son los beneficios de estar activo?
response_es            Hay dos beneficios principales en ser más acti...
response_1_es          <a href="https://es.tepeyachealth.org/new-pati...
related_intent_1_es          ¿Cuáles son los beneficios de estar activo?
related_intent_2_es               ¿Cómo hacer un ha

In [115]:
filtered_row = df[df['intent'] == intent].iloc[0]
filtered_row

dialog                            What are the benefits of being active?
response               There are two main benefits to being more acti...
response_1             <a href="https://www.tepeyachealth.org/new-pat...
related_intent_1                        What are ways to be more active?
related_intent_2          What are some more benefits of getting active?
related_intent_3                  How do I make time for getting active?
related_intent_4                                 What is a healthy diet?
language                                                              en
intent                             what_are_the_benefits_of_being_active
dialog_es                    ¿Cuáles son los beneficios de estar activo?
response_es            Hay dos beneficios principales en ser más acti...
response_1_es          <a href="https://es.tepeyachealth.org/new-pati...
related_intent_1_es          ¿Cuáles son los beneficios de estar activo?
related_intent_2_es               ¿Cómo hacer un ha

In [107]:
related_intents = [value for value in filtered_row[['related_intent_1', 'related_intent_2', 'related_intent_3','related_intent_4']] if pd.notnull(value)]
related_intents

['What are ways to be more active?',
 'What are some more benefits of getting active?',
 'How do I make time for getting active?',
 'What is a healthy diet?']

In [113]:
[df[df['dialog'] == value]['intent'].values[0] for value in related_intents]

['what_are_ways_to_be_more_active',
 'what_are_some_more_benefits_of_getting_active',
 'how_do_i_make_time_for_getting_active',
 'what_is_a_healthy_diet']