In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import chromadb
from chromadb.config import Settings
from sentence_transformers import SentenceTransformer

In [2]:
model_name = "microsoft/BioGPT"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)



## Set Up Chroma DB for Retrieval

In [6]:
client = chromadb.Client(Settings())
collection = client.create_collection("medical_records")

In [13]:
embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

knowledge_base = [
        "A 52-year-old male presents with Type 2 Diabetes and is currently on Metformin 500 mg twice daily. He has a history of obesity, hypertension (treated with Lisinopril 10 mg daily), and hyperlipidemia (treated with Atorvastatin 20 mg daily). His HbA1c is 8.3%, and his fasting blood glucose levels average 160 mg/dL. The patient is experiencing mild peripheral neuropathy and has been recommended lifestyle changes, including a low-carbohydrate diet and increased physical activity.",
        "A 65-year-old female with a history of chronic obstructive pulmonary disease (COPD) and smoking (40-pack-year history). She was diagnosed with mild heart failure (NYHA Class II) and has been on Furosemide 40 mg daily. Her symptoms include dyspnea on exertion and occasional ankle swelling. Pulmonary function tests revealed an FEV1/FVC ratio of 60%. The patient was advised to undergo smoking cessation and pulmonary rehabilitation.",
        "A 45-year-old male presents with newly diagnosed atrial fibrillation. He has a history of untreated hyperthyroidism and recent onset of palpitations and dizziness. His current ECG shows an irregularly irregular rhythm with a ventricular rate of 110 bpm. The patient is started on Metoprolol 25 mg twice daily, and thyroid function tests are pending. He is advised to follow up for a possible referral for catheter ablation if symptoms persist.",
        "A 72-year-old female presents with progressive dementia and is currently in the moderate stage of Alzheimer's disease. She has been on Donepezil 10 mg daily and Memantine 10 mg twice daily for the past year. Her symptoms include increasing forgetfulness, difficulty with daily tasks, and occasional episodes of confusion. The patient's family has been counseled on long-term care options, including assisted living and home care.",
        "A 60-year-old male with a history of myocardial infarction 3 years ago presents for a follow-up. He has been on dual antiplatelet therapy (Aspirin 81 mg and Clopidogrel 75 mg daily) and Atorvastatin 40 mg. His LDL cholesterol level is currently 85 mg/dL. The patient follows a Mediterranean diet and engages in regular cardiovascular exercise. He has been advised to continue current medications and return in 6 months for a stress test.",
        "A 35-year-old female with newly diagnosed multiple sclerosis (MS). The patient presents with fatigue, blurred vision, and muscle weakness, which began 6 months ago. MRI of the brain and spinal cord reveals demyelinating lesions consistent with MS. She has been started on Glatiramer acetate injections and corticosteroids for acute exacerbation management. The patient has also been referred for physical therapy and counseling for disease management.",
        "A 50-year-old male with a 10-year history of rheumatoid arthritis, currently treated with Methotrexate 20 mg weekly and adalimumab injections every 2 weeks. He presents with increased joint pain and stiffness in the hands and knees, particularly in the morning. Lab work shows elevated C-reactive protein (CRP) and erythrocyte sedimentation rate (ESR). The patient was advised to consider biologic escalation and has been referred to a rheumatologist for further evaluation.",
        "A 28-year-old female presents with gestational diabetes mellitus (GDM) at 28 weeks of pregnancy. She has been monitoring her blood glucose levels and has started insulin therapy (NPH insulin 10 units in the morning and 8 units at night). Fasting glucose levels remain within the 90-110 mg/dL range. The patient is advised to maintain a healthy diet, exercise regularly, and continue monitoring blood sugar levels at home. A follow-up ultrasound is scheduled to assess fetal growth. ",
        "A 67-year-old male with a history of chronic kidney disease (stage 3) and hypertension presents with worsening kidney function. His current serum creatinine is 2.1 mg/dL, and his estimated glomerular filtration rate (eGFR) is 45 mL/min. The patient is on Lisinopril 20 mg daily and a low-sodium diet. He has been referred to a nephrologist for further evaluation, and his antihypertensive medication is being closely monitored to prevent further kidney damage.",
        "Clinical Guideline 1: Management of Hypertension in Adults: Hypertension management involves lifestyle changes (dietary sodium reduction, weight loss, physical activity) and pharmacologic treatment. First-line agents include ACE inhibitors, ARBs, calcium channel blockers, and thiazide diuretics. Patients with comorbidities such as diabetes or chronic kidney disease should be managed with ACE inhibitors or ARBs for renal protection. Blood pressure targets are generally below 130/80 mmHg for high-risk individuals.",
        "Clinical Guideline 2: Diabetes Mellitus Management: Type 2 Diabetes management focuses on lifestyle modifications (healthy diet, physical activity) and pharmacologic interventions. First-line therapy includes Metformin, with the addition of GLP-1 receptor agonists or SGLT2 inhibitors for patients at high cardiovascular risk. Monitoring should include regular HbA1c checks (every 3-6 months) and annual screenings for diabetic retinopathy, nephropathy, and neuropathy.",
        "A 54-year-old female presents with advanced breast cancer (Stage III, ER/PR-positive, HER2-negative). She has undergone a mastectomy and is currently receiving chemotherapy (doxorubicin and cyclophosphamide). The patient is experiencing side effects, including nausea and fatigue, and is managed with antiemetics and lifestyle changes. She will begin hormone therapy with Tamoxifen after completing chemotherapy.",
        "A 38-year-old male presents with newly diagnosed Crohn's disease, experiencing chronic diarrhea, abdominal pain, and weight loss. Colonoscopy revealed inflammation and ulcerations in the terminal ileum. He has been started on mesalamine and corticosteroids for inflammation control. The patient is also scheduled for a follow-up to assess treatment efficacy and possible escalation to biologics if necessary.",
        "A 25-year-old female with a history of polycystic ovary syndrome (PCOS) presents with irregular menstrual cycles and weight gain. The patient has been on combined oral contraceptives for cycle regulation and Metformin for insulin resistance. Lifestyle modifications, including a low-glycemic-index diet and increased physical activity, have been recommended. The patient is also being monitored for potential development of type 2 diabetes.",
        "A 40-year-old male with obesity (BMI 35) presents with obstructive sleep apnea (OSA) and loud snoring. He underwent a sleep study, which revealed moderate OSA with an apnea-hypopnea index (AHI) of 20. The patient has been started on continuous positive airway pressure (CPAP) therapy at night and has been advised to lose weight. Follow-up is scheduled to evaluate treatment effectiveness and consider further interventions if necessary.",
        "A 30-year-old female with a history of migraine presents with severe, recurring headaches that are associated with nausea, vomiting, and photophobia. The patient is currently on Sumatriptan 100 mg as needed, but she reports frequent attacks (4-5 per month). She has been advised to start on Propranolol 40 mg daily as a preventive treatment, with follow-up planned in 3 months to evaluate the reduction in attack frequency.",
        "A 45-year-old male presents with newly diagnosed non-alcoholic fatty liver disease (NAFLD). The patient is overweight (BMI 32) with mild elevation in liver enzymes (ALT 65 U/L, AST 58 U/L). He has been advised to lose weight through diet and exercise, targeting a 7-10% weight reduction. The patient was also counseled on the risks of progression to non-alcoholic steatohepatitis (NASH) and cirrhosis if lifestyle modifications are not followed.",
        "A 68-year-old male with a history of Parkinson's disease presents with worsening tremors and bradykinesia. He is currently taking Levodopa-Carbidopa 100/25 mg three times daily but reports increasing difficulty with daily activities such as dressing and eating. His neurologist recommended adjusting his medication dosage and adding a dopamine agonist (Pramipexole) to improve motor symptoms. He will follow up in 3 months to reassess motor function.",
        "A 70-year-old female presents with advanced chronic kidney disease (Stage 4, eGFR 25 mL/min). The patient is hypertensive and currently on Losartan 100 mg daily, with controlled blood pressure (130/80 mmHg). She has been advised to follow a low-sodium, low-protein diet and referred to a nephrologist for pre-dialysis counseling. Hemoglobin levels are being monitored for anemia management, and erythropoietin therapy is under consideration.",
        "A 62-year-old male with a 15-year history of type 2 diabetes and diabetic nephropathy presents with albuminuria (urine albumin-to-creatinine ratio of 300 mg/g). He is currently on Metformin 1000 mg twice daily and Lisinopril 10 mg daily for renal protection. Blood pressure is well controlled at 120/75 mmHg. The patient has been counseled on tight glycemic control, targeting an HbA1c of less than 7%, and a low-protein diet to slow kidney disease progression.",
        "A 55-year-old female with a history of breast cancer (Stage II, ER-positive) presents for a follow-up. She underwent a lumpectomy and radiation therapy 18 months ago and is currently on Tamoxifen 20 mg daily. She reports mild hot flashes and joint stiffness as side effects. A recent mammogram and ultrasound showed no evidence of disease recurrence. The patient has been advised to continue Tamoxifen and return in 6 months for routine follow-up.",
        "A 75-year-old male with a history of stroke 5 years ago presents with mild cognitive decline and worsening balance. He is on Aspirin 81 mg daily for stroke prevention and Simvastatin 40 mg for hyperlipidemia. His Mini-Mental State Examination (MMSE) score is 25/30, indicating mild cognitive impairment. The patient has been referred for physical therapy to improve balance and prevent falls. Cognitive stimulation exercises are also recommended.",
        "A 23-year-old female presents with newly diagnosed generalized anxiety disorder (GAD). She reports persistent worry, muscle tension, and difficulty concentrating, particularly over the past 6 months. The patient has been started on Sertraline 50 mg daily and referred for cognitive-behavioral therapy (CBT). She has also been advised to engage in relaxation exercises and mindfulness practices. A follow-up is planned in 4 weeks to assess symptom improvement.",
        "A 50-year-old male presents with severe osteoarthritis of the right knee, with joint pain, stiffness, and swelling, which has worsened over the last year. He has tried conservative management, including physical therapy and NSAIDs (Ibuprofen 600 mg). Due to his poor response, the patient has been referred for orthopedic evaluation and is considering total knee arthroplasty (knee replacement surgery). He is also receiving injections of corticosteroids to reduce inflammation while awaiting surgery.",
        "A 40-year-old female with a history of hypothyroidism presents for a follow-up. She is on Levothyroxine 100 mcg daily, and her most recent thyroid-stimulating hormone (TSH) level is within the normal range at 1.8 mIU/L. The patient reports stable energy levels and no significant symptoms. She has been advised to continue her current dose and return in 6 months for a routine check of TSH levels.",
        "A 65-year-old male with a history of prostate cancer, currently in remission, presents for routine monitoring. He had undergone a radical prostatectomy 3 years ago and completed adjuvant radiation therapy. His most recent prostate-specific antigen (PSA) level is undetectable, and there are no signs of recurrence. The patient remains asymptomatic and has been advised to continue PSA monitoring every 6 months.",
        "A 16-year-old male with newly diagnosed asthma presents with wheezing and shortness of breath, particularly during exercise. Spirometry shows an FEV1/FVC ratio of 70%. The patient has been started on Albuterol inhaler as needed for acute symptoms and Fluticasone inhaler twice daily for long-term control. He has been advised on asthma triggers, including allergens and exercise, and to follow up in 1 month for reassessment of lung function.",
        "Management of Chronic Obstructive Pulmonary Disease (COPD): COPD management focuses on reducing symptoms, improving quality of life, and preventing exacerbations. First-line therapy includes inhaled bronchodilators (short-acting beta-agonists, long-acting muscarinic antagonists), and inhaled corticosteroids may be added for patients with frequent exacerbations. Smoking cessation, pulmonary rehabilitation, and vaccination against influenza and pneumococcal infections are recommended for all patients with COPD.",
        "A 29-year-old female presents with recurrent urinary tract infections (UTIs) over the past 6 months. She has had three episodes requiring antibiotics (Trimethoprim-Sulfamethoxazole) and reports dysuria, frequency, and urgency during each episode. The patient has been advised on hygiene measures and has been started on prophylactic antibiotics (Nitrofurantoin 100 mg daily) to prevent future infections. A urine culture is pending to guide further treatment.",
        "A 60-year-old female with newly diagnosed osteopenia based on a bone density scan (T-score of -1.8). She has been advised to increase her calcium and vitamin D intake through diet and supplements. Weight-bearing exercises have been recommended to improve bone strength, and she has been started on Alendronate 70 mg weekly to prevent progression to osteoporosis. A follow-up bone density scan is scheduled for 2 years later to monitor her bone health."
    ]

embeddings = embedding_model.encode(knowledge_base)
for i, doc in enumerate(knowledge_base):
    collection.add(
        embeddings=[embeddings[i].tolist()],
        documents=[doc],
        ids=[f"record_{i}"]
    )


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

### Retrieving Relevant Context from Chroma DB


In [14]:
def retrieve_relevant_context(query, k=2):
    query_embedding = embedding_model.encode([query])
    results = collection.query(
        query_embeddings=query_embedding.tolist(),
        n_results=k
    )
    return results['documents']

### Combining Retrieval with Generation (RAG Pipeline)


In [15]:
def generate_synthetic_medical_record(query):
    retrieved_context = " ".join(retrieve_relevant_context(query, k=2)[0])
    prompt_with_context = query + "\nContext:\n" + retrieved_context
    input_ids = tokenizer(prompt_with_context, return_tensors="pt").input_ids
    generated_text = model.generate(input_ids, max_length=200)
    return tokenizer.decode(generated_text[0], skip_special_tokens=True)

In [17]:
query = "diabetic patient with hypertension"
synthetic_data = generate_synthetic_medical_record(query)

print("Synthetic Data: ", synthetic_data)


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Synthetic Data:  diabetic patient with hypertension Context: Clinical Guideline 1: Management of Hypertension in Adults: Hypertension management involves lifestyle changes (dietary sodium reduction, weight loss, physical activity) and pharmacologic treatment. First-line agents include ACE inhibitors, ARBs, calcium channel blockers, and thiazide diuretics. Patients with comorbidities such as diabetes or chronic kidney disease should be managed with ACE inhibitors or ARBs for renal protection. Blood pressure targets are generally below 130 / 80 mmHg for high-risk individuals. Patient Record 1: A 52-year-old male presents with Type 2 Diabetes and is currently on Metformin 500 mg twice daily. He has a history of obesity, hypertension (treated with Lisinopril 10 mg daily), and hyperlipidemia (treated with Atorvastatin 20 mg daily). His HbA1c is 8.3%, and his fasting blood glucose levels average 160 mg / dL. The patient is experiencing mild peripheral neuropathy and has been recommended life