In [1]:

from urllib.parse import urlparse
import os
import pandas as pd
import openai
import numpy as np
import os
from openai.embeddings_utils import distances_from_embeddings

openai.api_key = os.getenv('OPEN_AI_KEY') #Add your API key here

COMPLETIONS_MODEL = "text-davinci-003"
EMBEDDING_MODEL = "text-embedding-ada-002"

In [2]:
df=pd.read_csv('processed/embeddings.csv', index_col=0)
df.head()
df['embeddings'] = df['embeddings'].apply(eval).apply(np.array)

In [3]:
from openai.embeddings_utils import distances_from_embeddings

In [4]:
def create_context(
    question, df, max_len=1800, size="ada"
):
    """
    Create a context for a question by finding the most similar context from the dataframe
    """

    # Get the embeddings for the question
    q_embeddings = openai.Embedding.create(input=question, engine='text-embedding-ada-002')['data'][0]['embedding']

    # Get the distances from the embeddings
    df['distances'] = distances_from_embeddings(q_embeddings, df['embeddings'].values, distance_metric='cosine')


    returns = []
    cur_len = 0

    # Sort by distance and add the text to the context until the context is too long
    for i, row in df.sort_values('distances', ascending=True).iterrows():
        
        # Add the length of the text to the current length
        cur_len += row['n_tokens'] + 4
        
        # If the context is too long, break
        if cur_len > max_len:
            break
        
        # Else add it to the text that is being returned
        returns.append(row["content"])

    # Return the context
    return "\n\n###\n\n".join(returns)

def answer_question(
    df,
    model="text-davinci-003",
    question="Am I allowed to publish model outputs to Twitter, without a human review?",
    max_len=1800,
    size="ada",
    debug=False,
    max_tokens=150,
    stop_sequence=None
):
    """
    Answer a question based on the most similar context from the dataframe texts
    """
    context = create_context(
        question,
        df,
        max_len=max_len,
        size=size,
    )
    # If debug, print the raw model response
    if debug:
        print("Context:\n" + context)
        print("\n\n")

    try:
        # Create a completions using the questin and context
        response = openai.Completion.create(
            prompt=f"Answer the question based on the context below, and if the question can't be answered based on the context, say \"I don't know\"\n\nContext: {context}\n\n---\n\nQuestion: {question}\nAnswer:",
            temperature=0,
            max_tokens=max_tokens,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0,
            stop=stop_sequence,
            model=model,
        )
        return response["choices"][0]["text"].strip()
    except Exception as e:
        print(e)
        return ""

In [6]:
print(answer_question(df, question="What is Pursed-Lip Breathing"))

Pursed-lip breathing is a technique that allows people to control their oxygenation and ventilation. The technique requires a person to inspire through the nose and exhale through the mouth at a slow controlled flow. This type of exhalation gives the person a puckered or pursed appearance. By prolonging the expiratory phase of respiration, a small amount of positive end-expiratory pressure (PEEP) is created in the airways that helps to keep them open so that more air can be exhaled, thus reducing air trapping that occurs in some conditions such as COPD. Pursed-lip breathing often relieves the feeling of shortness of breath, decreases the work of breathing, and improves gas exchange. People also


In [9]:
print(answer_question(df, question="What is COPD?"))

COPD (Chronic Obstructive Pulmonary Disease) is a long-term lung disease that makes it hard to breathe. It is caused by damage to the lungs over time, usually from smoking.


In [10]:
print(answer_question(df, question="What is Infusion by Pump?"))

Infusion by pump is a type of new technology used to continuously deliver subcutaneous insulin. It works by closely mimicking the body’s normal release of insulin. Insulin doses are delivered through a flexible plastic tube called a catheter. With the aid of a small needle, the catheter is inserted through the skin into the fatty tissue and is taped in place.


In [11]:
print(answer_question(df, question="what do you know about Tendons:?"))

Tendons are tissues that connect muscle to bone.


In [12]:
print(answer_question(df, question="what is chapter 14 about for?"))

Chapter 14 is about Integumentary Assessment.


In [None]:
print(answer_question(df, question="what is Integumentary Assessment?"))