In [27]:
import numpy as np
import pandas as pd
df = pd.read_csv('datasets/EHR.csv')
print('Initial shape:', df.shape)
print('Missing values per column:')
print(df.isnull().sum())
df = df.drop_duplicates()
for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = df[col].fillna(df[col].mode()[0])
    else:
        df[col] = df[col].fillna(df[col].mean())
print('Cleaned shape:', df.shape)
print('Missing values after cleaning:')
print(df.isnull().sum())

Initial shape: (1447, 29)
Missing values per column:
patientunitstayid              0
patienthealthsystemstayid      0
gender                         3
age                            1
ethnicity                     42
hospitalid                     0
wardid                         0
apacheadmissiondx            180
admissionheight               45
hospitaladmittime24            0
hospitaladmitoffset            0
hospitaladmitsource          229
hospitaldischargeyear          0
hospitaldischargetime24        0
hospitaldischargeoffset        0
hospitaldischargelocation      8
hospitaldischargestatus        7
unittype                       0
unitadmittime24                0
unitadmitsource               19
unitvisitnumber                0
unitstaytype                   0
admissionweight              134
dischargeweight              576
unitdischargetime24            0
unitdischargeoffset            0
unitdischargelocation          5
unitdischargestatus            2
uniquepid              

In [36]:
import google.generativeai as genai
genai.configure(api_key="AIzaSyDyVY2ZAFunydX53ncBlO1Y-hjgIlD1chM")  

def clean_text(text):
    return text.replace('*', '').replace('#', '')

def generate_report_from_ehr(uniquepid):
    if 'uniquepid' in df.columns:
        patient_data = df[df['uniquepid'] == uniquepid]
        if not patient_data.empty:
            ehr_row = patient_data.iloc[0]
            ehr_info = []
            for col in df.columns:
                val = ehr_row[col] if col in ehr_row else 'N/A'
                ehr_info.append(f"{col}: {val}")
            ehr_text = "\n".join(ehr_info)
            prompt = f"Generate a clinical report and ICD-10 code for a brain tumour patient using the following EHR data with CT and MRI. Just give a random findings as it is just a demo. Just remove the asterisks and stars.\n{ehr_text}"
            model = genai.GenerativeModel('gemini-2.5-flash')
            response = model.generate_content(prompt)
            cleaned_text = clean_text(response.text)
            print(cleaned_text)
        else:
            print(f'No data found for uniquepid: {uniquepid}')
    else:
        print('uniquepid column not found in EHR dataset.')

uniquepid = input('Enter uniquepid (from EHR): ')
generate_report_from_ehr(uniquepid)

 Clinical Report

Patient Demographics:
   Patient ID: 200026 (uniquepid: 002-10715)
   Gender: Male
   Age: 50 years
   Ethnicity: Caucasian
   Admission Height: 177.8 cm
   Admission Weight: 106.1 kg

Hospital and Unit Stay Details:
   Hospital Admitting Source: Emergency Department
   Hospital Admission Date/Time: January 2, 2014, 10:41
   Unit Type: Med-Surg ICU
   Unit Admitting Source: Operating Room
   Unit Admission Date/Time: January 2, 2014, 15:52
   Unit Discharge Date/Time: January 3, 2014, 17:40
   Hospital Discharge Date/Time: January 3, 2014, 18:17
   Discharge Location: Home
   Discharge Status: Alive

Presenting Complaint and History (Hypothetical for Brain Tumor Demo):
Patient presented to the Emergency Department with a several-week history of progressively worsening headaches, associated with nausea and occasional vomiting. Over the past few days, the patient also developed new-onset right-sided weakness and expressive aphasia. There was no prior history of neurolog