In [22]:
from os import environ as env
from flask import Flask, jsonify, request
from os import environ as env
from dotenv import load_dotenv, find_dotenv
from flask_cors import CORS, cross_origin
from jose import jwt
from flask_sqlalchemy import SQLAlchemy
from authlib.integrations.flask_oauth2 import ResourceProtector
from middleware.validator import Auth0JWTBearerTokenValidator
import openai
import pandas as pd
import numpy as np
from openai.embeddings_utils import get_embedding, cosine_similarity

In [24]:
# Load Environment Variables
ENV_FILE = find_dotenv()
if ENV_FILE:
    load_dotenv(ENV_FILE)
AUTH0_DOMAIN = env.get("AUTH0_DOMAIN")
API_IDENTIFIER = env.get("API_IDENTIFIER")
ALGORITHMS = ["RS256"]
openai.api_key = env.get("OPENAI_API_KEY")

In [25]:
# Create Application
app = Flask(__name__)
cors = CORS(app, resources={r"/*": {"origins": "*"}})

In [26]:
# Middleware
require_auth = ResourceProtector()
validator = Auth0JWTBearerTokenValidator(
    AUTH0_DOMAIN,
    API_IDENTIFIER
)
require_auth.register_token_validator(validator)

In [27]:
# embedding model parameters for second gen models (text-embedding-ada-002) recommended by OpenAI
embedding_model = "text-embedding-ada-002" # second gen best model at the moment
embedding_encoding = "cl100k_base" # latest tokenizer for second gen models
max_tokens = 8000 # max tokens for second gen models and tokenizer above is 8191

In [7]:
# Load the embedding data
df = pd.read_csv("data/wmd_1452_embeddings.csv")

In [None]:
df.head(2)

In [10]:
# when reading from csv to ensure correct data types
df["embedding"] = df.embedding.apply(eval).apply(np.array)

In [11]:
df.head(2)

Unnamed: 0.1,Unnamed: 0,topic,overview,symptoms,causes,tests,treatment,url,combined,n_tokens,embedding
0,0,A1AT Deficiency,Alpha-1 antitrypsin deficiency is a genetic di...,You might not know you have the disease until ...,Alpha-1 antitrypsin deficiency runs in familie...,Your doctor may ask you questions such as: You...,"Although there’s no cure for AAT deficiency, y...",https://www.webmd.com/lung/copd/alpha-1-antitr...,Topic: A1AT Deficiency Overview: Alpha-1 antit...,1123,"[-0.01765124686062336, 0.010525372810661793, 0..."
1,1,AAT,Alpha-1 antitrypsin deficiency is a genetic di...,You might not know you have the disease until ...,Alpha-1 antitrypsin deficiency runs in familie...,Your doctor may ask you questions such as: You...,"Although there’s no cure for AAT deficiency, y...",https://www.webmd.com/lung/copd/alpha-1-antitr...,Topic: AAT Overview: Alpha-1 antitrypsin defic...,1120,"[-0.0176760945469141, 0.00719116535037756, 0.0..."


In [46]:
# search through the symptoms for the most similar topic
def search_symptoms(df, symptoms, n=5, pprint=True):
    symptoms_embedding = get_embedding(symptoms, engine=embedding_model)
    df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, symptoms_embedding))
    results = (
        df.sort_values("similarity", ascending=False)
        .head(n)
        .combined.str.replace("Topic: ", "")
        .str.replace("Symptoms: ", "")
    )
    if pprint:
        for r in results:
            print(r[:200])
            print()
    return results

In [98]:
results = search_symptoms(df, "purple toes and cold shivers and light headedness", n=5)

CMTC Overview: The main sign is the blue or purple marbled pattern on the skin. It looks a lot like what happens to a baby’s skin when they’re cold, but it’s more defined and doesn’t go away. Most chi

Van Lohuizen Syndrome Overview: The main sign is the blue or purple marbled pattern on the skin. It looks a lot like what happens to a baby’s skin when they’re cold, but it’s more defined and doesn’t 

Chilblains Overview: Frostbite occurs when tissues freeze. This condition happens when you are exposed to temperatures below the freezing point of skin. The condition has long been recognized. A 5,000

Raynaud's Phenomenon Overview: Treatment goals will include preventing attacks or limiting them when they do happen. That usually means keeping your hands and feet warm and dry, controlling stress, an

TORCH Syndrome Overview: If you get one of the TORCH infections while you’re pregnant, and it spreads through your blood to your baby, they can get it, too. And because they're still developin

In [47]:
results = search_symptoms(df, "back pain", n=5)

Lumbar Pain Overview: We often bring on our back problems through bad habits, such as: The spine is actually a stack of 24 bones called vertebrae. A healthy spine is S-shaped when viewed from the side

Low Back Pain Exercises Overview:   Causes:  Tests:  Treatment: 

Low Back Pain Overview: The causes of back pain can be complex. Some causes of back pain include accidents, muscle strains, and sports injuries. The main symptom of cervical radiculopathy is pain that

Exercises to Reduce Low Back Pain Overview:   Causes:  Tests:  Treatment: 

Slipped Disc Overview: Back pain can sneak up on you when you least expect it. One minute you're sitting comfortably in front of the TV, and the next you try to stand up, and -- ouch! -- a sharp pain 



In [63]:
results.head(1).values[0][:200]

'Lumbar Pain Overview: We often bring on our back problems through bad habits, such as: The spine is actually a stack of 24 bones called vertebrae. A healthy spine is S-shaped when viewed from the side'

In [14]:
results = search_symptoms(df, "memory loss", n=5)

Monomodal Visual Amnesia Overview: The outlook for people with dissociative amnesia depends on several factors, including the person's life situation, the availability of support systems, and the indi

Agnosia, Primary Visual Overview: The outlook for people with dissociative amnesia depends on several factors, including the person's life situation, the availability of support systems, and the indiv

Agnosis, Primary Overview: The outlook for people with dissociative amnesia depends on several factors, including the person's life situation, the availability of support systems, and the individual's

Thiamine Deficiency Overview: Wernicke-Korsakoff syndrome (WKS) is one name for two conditions that often happen together -- Wernicke encephalopathy and Korsakoff syndrome. Many doctors think of them 

Gayet-Wernicke Syndrome Overview: Wernicke-Korsakoff syndrome (WKS) is one name for two conditions that often happen together -- Wernicke encephalopathy and Korsakoff syndrome. Many doctors th

In [15]:
@app.route("/")
def index():
    return jsonify(message="Hello from SmartHealth!")

In [16]:
@app.route("/public")
def public():
    """No access token required to access this route
    """
    response = "Hello from a public endpoint! You don't need to be authenticated to see this."
    return jsonify(message=response)

In [17]:
@app.route("/private")
@cross_origin(headers=["Content-Type", "Authorization"])
@require_auth(None)
def private():
    """A valid access token is required to access this route
    """
    response = "Hello from a private endpoint! You need to be authenticated to see this."
    return jsonify(message=response)

In [18]:
@app.route("/chat", methods=["GET", "POST"])
@cross_origin(headers=["Content-Type", "Authorization"])
@require_auth(None)
def chat():
    if request.method == "POST":
        # Get the messages from the post body in json format
        messages = request.get_json()["messages"]
        print(messages)
        # response = openai.ChatCompletion.create(
        #     model="gpt-3.5-turbo",
        #     messages=[{"role": "system", "content": "I am a digital health bot who is able to help diagnose symptoms, how can I help you?"}, *messages],
        # )
        

        response = search_symptoms(df, messages, n=5, pprint=False)

        return jsonify({
            "data": {   
                    "id": 1,
                    "role": "system",
                    "content": response.choices[0].message.content
                }
        })

    return jsonify({
        "data": [
            {   
                "id": 1,
                "sender": "user",
                "content": "Hello, how are you?"
            },
        ]
    })

In [30]:
if __name__ == "__main__":
    app.run(host="0.0.0.0", port=env.get("PORT", 3010))

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:3010
 * Running on http://192.168.68.50:3010
Press CTRL+C to quit
192.168.68.50 - - [03/Mar/2023 08:38:24] "POST /chat HTTP/1.1" 404 -


In [32]:
messages = [{'content': 'hello', 'role': 'user'}]

In [41]:
messages = [{'content': 'back pain', 'role': 'user'}]

In [42]:
search_message = messages[0]['content']

In [44]:
results = search_symptoms(df, search_message, n=5, pprint=False)

back pain


In [75]:
messages = [{'content': 'back pain', 'role': 'user'}, {'content': 'Lumbar Pain Overview: We often bring on our back problems through bad habits, such as: The spine is actually a stack of 24 bones called vertebrae. A healthy spine is S-shaped when viewed from the side', 'role': 'system'}, {'content': 'headache', 'role': 'user'}]

In [94]:
def reverse_tokens(sentence):
    words = sentence.split(' ')
    reverse_sentence = ' '.join(reversed(words))
    return reverse_sentence

In [97]:
search_message = ''
for message in messages:
    if (message['role'] == 'user'):
        search_message =  message['content'] + ' ' + search_message
print(search_message)
# search_message = reverse_tokens(search_message)
# print(search_message)

headache back pain 


In [93]:
search_message = "knee pain headache back pain"
results = search_symptoms(df, search_message, n=5, pprint=False)
print(results)

885     Neck Pain Overview: Causes of neck pain includ...
782     Lumbar Pain Overview: We often bring on our ba...
1113    Sacroiliac Joint Pain Overview: Its full name ...
777     Low Back Pain Exercises Overview:   Causes:  T...
775     Low Back Pain Overview: The causes of back pai...
Name: combined, dtype: object
