In [27]:


import requests
from llama_index.core import Settings
from llama_index.embeddings.openai import OpenAIEmbedding

from pinecone import Pinecone
from dotenv import load_dotenv, find_dotenv
import os

import json

import requests



In [8]:
load_dotenv(os.path.join(os.path.expanduser('~'), '.openai_config.env'))
api_key = os.environ.get('HACKATHON_API_KEY')

load_dotenv(os.path.join(os.path.expanduser('~'), '.pinecone_config.env'))
pinecone_api_key = os.environ.get('ANDREW_API_KEY')



In [63]:

def get_rag_context(query, top_k=5):
    query_embedding = embed_model.get_text_embedding(
        query
    )
    retrieved_doc = pinecone_index.query(
        vector=query_embedding, 
        top_k=top_k, 
        include_metadata=True
    )
    extracted_context_summary = list(map(lambda x:json.loads(x.metadata['_node_content'])['metadata']['section_summary'], retrieved_doc.matches))
    provenance = list(map(lambda x:x.metadata['c_document_id'], retrieved_doc.matches))
    context = ''
    for i in range(top_k):
        context += extracted_context_summary[i] + '(Ref: ' + provenance[i] + '). '
    return context


def format_prompt(user_message, rag_context, system_prompt):
    formatted_prompt = f'''
    <|begin_of_text|><|start_header_id|>system<|end_header_id|>
    
    {system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>
    
    Context: {rag_context}
    User message: {user_message}
    Always make sure to provide references in your answer. You can find the references in the Context marked as '(Ref: '. After providing references, print 'End of response'<|eot_id|><|start_header_id|>assistant<|end_header_id|>
    '''
    return formatted_prompt




def query_zebra_llama(user_message):
    system_prompt = '''
    You are an expert in the rare disease Ehlers-Danlos syndrome (EDS).
    You are supposed to answer the question asked by the user.
    Your response should be grounded on the given Context in the user message.
    If no context is given, try to answer as accurately as possible. 
    If you don't know the answer, admit that you don't instead of making one up.   
    '''
    headers = {
    	"Accept" : "application/json",
    	"Content-Type": "application/json" 
    }
    rag_context = get_rag_context(user_message)
    input_prompt = format_prompt(user_message, rag_context, system_prompt)
    payload = {
        "inputs": input_prompt,
        "parameters": {
            "temperature": 0.7,
            "max_new_tokens": 256,
            "stop_sequences" : ['End of response'],        
            "return_full_text" : False,
            "frequency_penalty" : 1
        }
    }
    response = requests.post(API_URL, headers=headers, json=payload)
    if response.status_code == 200:
        return response.json()[0]['generated_text']
    else:
        return None



In [64]:

embedding_model = "text-embedding-ada-002"

embed_model = OpenAIEmbedding(
    model=embedding_model,
    api_key=api_key,
)


Settings.embed_model = embed_model

pc = Pinecone(api_key=pinecone_api_key)

index_name = "eds"

pinecone_index = pc.Index(index_name)


API_URL = "https://pl56muwe315crf88.us-east-1.aws.endpoints.huggingface.cloud"



In [74]:
%%time

user_message = "What mutations cause EDS?"

output = query_zebra_llama(user_message)




CPU times: user 72.4 ms, sys: 13.5 ms, total: 85.9 ms
Wall time: 18.4 s


In [75]:

print(output)


 Ehlers-Danlos Syndrome (EDS) is caused by a type of point mutation in the COL3A1 gene, which is responsible for the formation of collagen type III. This mutation leads to a defect in collagen synthesis, which in turn causes the symptoms of EDS. Additionally, a new variant of a splice site mutation in the COL3A1 gene has been identified as a cause of EDS. This mutation disrupts the normal process of collagen synthesis, resulting in the characteristic features of EDS. The genetic testing methods used to confirm the presence of this mutation in a specific case are described in the passage. It highlights the importance of genetic testing in identifying the underlying cause of EDS, which can help in the diagnosis and management of the condition. 
    End of response. 
    References:
    (Ref: 10.1186/s12891-016-0936-8)
    (Ref: 10.1186/s12891-016-0936-8)
    (Ref: 10.1002/ajmg.c.31550)
    (Ref: 10.1186/s12891-016-0936-8)
    (Ref: 10.17992/lbl.2012.
