In [None]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
import re

  from tqdm.autonotebook import tqdm, trange





In [None]:
# Load the CSV data
file_path = r"patient_records_50k.csv"
data = pd.read_csv(file_path)

# Step 1: Data Preprocessing
def preprocess_text(text):
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    text = text.lower().strip()
    return text

# Combine relevant text columns for vectorization
data['combined_text'] = data['medical_conditions'] + ' ' + data['current_medications'] + ' ' + \
                        data['problem_description'] + ' ' + data['doctor_notes']
data['combined_text'] = data['combined_text'].apply(preprocess_text)

In [None]:
data.head()

Unnamed: 0,patient_id,name,age,gender,medical_conditions,current_medications,visit_id,visit_date,problem_description,doctor_notes,tests_ordered,test_results,combined_text
0,P00000,Damon Moore,38,Male,Hypertension,Vitamin D; Lisinopril; Albuterol; Levothyroxine,V0001,2024-08-13,Muscle Cramps,Recommend tests: Lisinopril. | Encourage a Lev...,Blood Pressure Monitoring,Blood Pressure Monitoring: 5.1,hypertension vitamin d lisinopril albuterol le...
1,P00000,Damon Moore,38,Male,Hypertension,Vitamin D; Lisinopril; Albuterol; Levothyroxine,V0002,2024-01-04,Fatigue,Recommend tests: Albuterol. | Increase dosage ...,Blood Pressure Monitoring; Lipid Profile; Elec...,Blood Pressure Monitoring: 8.78; Lipid Profile...,hypertension vitamin d lisinopril albuterol le...
2,P00000,Damon Moore,38,Male,Hypertension,Vitamin D; Lisinopril; Albuterol; Levothyroxine,V0003,2024-05-25,Joint Pain,Schedule a follow-up in Atorvastatin weeks for...,Electrolyte Panel,Electrolyte Panel: 6.93,hypertension vitamin d lisinopril albuterol le...
3,P00000,Damon Moore,38,Male,Hypertension,Vitamin D; Lisinopril; Albuterol; Levothyroxine,V0004,2024-07-01,Fatigue,Recommend tests: Albuterol. | Encourage a Lisi...,Nerve Conduction Test; Lipid Profile; Electrol...,Nerve Conduction Test: 1.21; Lipid Profile: 8....,hypertension vitamin d lisinopril albuterol le...
4,P00000,Damon Moore,38,Male,Hypertension,Vitamin D; Lisinopril; Albuterol; Levothyroxine,V0005,2024-09-26,Numbness in Feet,Patient shows signs of Atorvastatin. Recommend...,Lipid Profile; Electrolyte Panel,Lipid Profile: 8.52; Electrolyte Panel: 5.5,hypertension vitamin d lisinopril albuterol le...


# Step 2: Vectorization using Embedding Model

In [None]:
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(data['combined_text'].tolist(), convert_to_tensor=False)

## Step 3: Building the Vector Store



In [None]:
vector_dimension = embeddings[0].shape[0]
faiss_index = faiss.IndexFlatL2(vector_dimension)

# Convert embeddings to float32 for FAISS compatibility
embeddings = np.array(embeddings).astype('float32')

# Add embeddings to the FAISS index
faiss_index.add(embeddings)

## Step 4: Implementing Search Capabilities



In [None]:
def search_similar_records(query, top_k=5):
    query = preprocess_text(query)
    query_embedding = model.encode([query], convert_to_tensor=False).astype('float32')

    # Search the FAISS index
    distances, indices = faiss_index.search(query_embedding, top_k)

    # Retrieve the corresponding patient records
    results = data.iloc[indices[0]]
    return results

### Example Usage: Searching for Similar Records



In [None]:
query = "What are test recommended dugin Frequent Urination"
similar_records = search_similar_records(query)
print(similar_records[['doctor_notes']])


                                            doctor_notes
42929  Recommend tests: Metformin. | Increase dosage ...
22682  Recommend tests: Metformin. | Encourage a Albu...
30824  Recommend tests: Atorvastatin. | Patient shows...
41511  Recommend tests: Metformin. | Patient shows si...
11576  Patient shows signs of Metformin. Recommend 5....


In [None]:
similar_records[['doctor_notes']].iloc[0,0]

'Recommend tests: Metformin. | Increase dosage of Metformin to 14 units/day. | Patient shows signs of Albuterol. Recommend 10.'

## Step 5: Setting Up the Language Model (LLM) with Langchain Together



In [None]:
from langchain_community.llms import Together
import os
os.environ['TOGETHER_API_KEY']= "d6117b27b27e38d385e8c72da91dbeec1e20f5a7741f705c5e591b613d07b989"
llm=Together(model="meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo")

## Step 6: Generating Answers Using the Together LLaMA Model



In [None]:
def generate_answer(query):

    similar_records = search_similar_records(query)
    global context
    # Prepare context for generation
    context = "\n".join(similar_records['combined_text'].tolist())
    template = """Use the following pieces of context to answer the question at the end.
    If you don't know the answer, just say you don't know, don't try to make up an answer.
    Your answer should be helpful and informative, but not too long.
    The answer should explain the concept in a way that is easy to understand for someone who is not an expert in the field.
    The answer should explain the concept in one or two lines and if needed other aspects in bullet points.
    Use bullet point sentences maximum and keep the answer as concise as possible.

    {context}

    Question: {question}

    Helpful Answer:"""
    prompt = template.format(context=context, question=query)

    # Generate answer using Together model
    response = llm.generate([prompt]).generations[0][0].text
    return response

### Example Usage: Using the RAG Pipeline to Answer Queries


In [None]:
query = "What tests are recommended for patients with numbness?"
answer = generate_answer(query)
print(answer)


 The recommended tests for patients with numbness in feet are not explicitly stated in the provided context. However, based on the information given, the following tests might be considered:
    • Blood tests to check for diabetes (e.g., insulin, metformin) and thyroid function (e.g., levothyroxine)
    • Tests to diagnose coronary artery disease (e.g., amlodipine, aspirin)
    • Nerve conduction studies to assess nerve damage
    • Imaging tests (e.g., X-rays, MRI) to evaluate joint health (e.g., arthritis) and potential nerve compression
    Please consult a healthcare professional for personalized advice and testing recommendations.
