In [2]:
!pip install pandas transformers



In [7]:
import pandas as pd
from transformers import pipeline

# --- ICD-10 Coding Automation (Lookup Table) ---
icd10_codes = {
    "Pneumonia": "J18.9",
    "hypertension": "I10",
    "ablation or mapping of cardiac conduction pathway": "Z98.89",
    "endarterectomy, carotid": "I70.0",
    "infarction, acute myocardial (mi)": "I21.9",
    "cabg alone, coronary artery bypass grafting": "Z95.1",
    "emphysema/bronchitis": "J44.9",
    "alcohol withdrawal": "F10.93",
    "overdose, sedatives, hypnotics, antipsychotics, benzodiazepines": "T43.501A",
    "diabetic ketoacidosis": "E10.10",
    "overdose, alcohols (bethanol, methanol, ethylene glycol)": "T51.0X1A",
    "cva, cerebrovascular accident/stroke": "I64",
    "renal failure, acute": "N17.9",
    "pneumonia, viral": "J12.9",
    "angina, unstable (angina interferes w/quality of life or meds are tolerated poorly)": "I20.0",
    "mi admitted > 24 hrs after onset of ischemia": "I21.4",
    "aortic valve replacement (isolated)": "Z95.2",
    "cabg with other operation": "Z95.1",
    "aortic and mitral valve replacement": "Z95.3",
    "aneurysm, thoracic aortic; with dissection": "I71.0",
    "chf, congestive heart failure": "I50.9",
    "pneumonia": "J18.9",
    "sepsis, other": "A41.9",
    "rhythm disturbance (atrial, supraventricular)": "I49.9",
    "Diabetes": "E11.9",
    "Hypertension": "I10",
    "Migraine": "G43.9",
    "Asthma": "J45.9",
    "COVID-19": "U07.1",
    "Bronchitis": "J20.9",
    "Heart Failure": "I50.9",
    "Chronic Kidney Disease": "N18.9",
    "Anemia": "D64.9",
    "Stroke": "I63.9",
    "Influenza": "J10.1",
    "Depression": "F32.9",
    "Anxiety": "F41.9",
    "Osteoarthritis": "M19.9",
    "Gastroenteritis": "A09",
    "Appendicitis": "K35.9",
    "Epilepsy": "G40.9",
    "Hyperthyroidism": "E05.9",
    "Hypothyroidism": "E03.9"

}

def get_icd10_code(diagnosis):
    """
    Looks up the ICD-10 code from the predefined lookup table.
    """
    normalized_diagnosis = diagnosis.lower()
    for key, code in icd10_codes.items():
        if key in normalized_diagnosis:
            return code
    return "Unknown"

# --- Clinical Note Generation (Free Model) ---
try:
    # This will download the model the first time you run it.
    generator = pipeline("text-generation", model="distilgpt2")
except Exception as e:
    print(f"Error loading the model: {e}")
    generator = None

def generate_clinical_note_hf(age, gender, symptoms, diagnosis):
    """
    Generates a clinical note using the free Hugging Face model.
    """
    if not generator:
        return "Model not loaded. Please check your internet connection."

    prompt = f"""
    You are a clinical documentation assistant. Given the following patient data, write a clinical note in proper medical format.
    Patient: {age}-year-old {gender}
    Presenting complaints: {symptoms}
    Assessment: Likely {diagnosis} based on symptoms.
    Plan: Recommend further evaluation and treatment as appropriate for {diagnosis}.
    """

    try:
        response = generator(prompt, max_length=150, num_return_sequences=1)
        return response[0]['generated_text']
    except Exception as e:
        return f"Error generating note: {e}"

Device set to use cpu


In [8]:
def main():
    """Main function to read data, process it, and save the output."""
    input_file = "patient_data.csv"
    output_file = "final_patient_data_with_notes.csv"

    try:
        patients_df = pd.read_csv(input_file).fillna("")
        print("Patient data loaded successfully.")
    except FileNotFoundError:
        print(f"Error: The file '{input_file}' was not found. Please ensure it is in the same folder.")
        return

    required_cols = ['age', 'gender', 'symptoms', 'diagnosis']
    if not all(col in patients_df.columns for col in required_cols):
        print(f"Error: The CSV file must contain the following columns: {required_cols}")
        print("Columns found:", patients_df.columns.tolist())
        return

    patients_df['Clinical_Note'] = ""
    patients_df['ICD-10_Code'] = ""

    for index, row in patients_df.iterrows():
        Age = row['age']
        Gender = row['gender']
        Symptoms = row['symptoms']
        Diagnosis = row['diagnosis']

        # Call the free Hugging Face model function
        clinical_note = generate_clinical_note_hf(Age, Gender, Symptoms, Diagnosis)
        patients_df.loc[index, 'Clinical_Note'] = clinical_note

        # Get the ICD-10 code
        icd_code = get_icd10_code(Diagnosis)
        patients_df.loc[index, 'ICD-10_Code'] = icd_code

    patients_df.to_csv(output_file, index=False)
    print(f"✅ Process completed. Results saved to '{output_file}'")
    excel_output_file = "final_patient_data_with_notes.xlsx"
    output_df = patients_df[['gender', 'age', 'symptoms', 'diagnosis', 'ICD-10_Code']]
    output_df.to_excel(excel_output_file, index=False)
    print(f"✅ Process completed. Results saved to '{excel_output_file}'")

if __name__ == "__main__":
    main()

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=150) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Patient data loaded successfully.


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=150) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=150) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=150) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Setting `pad_token_id` to `eos_tok

✅ Process completed. Results saved to 'final_patient_data_with_notes.csv'
✅ Process completed. Results saved to 'final_patient_data_with_notes.xlsx'
