<a href="https://colab.research.google.com/github/manikanta5315/Bayes_Assignments/blob/main/sdoh_extract_from_clinical_notes_assign3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
! pip install gradio



In [10]:
import pandas as pd
import re
import json
import gradio as gr

In [11]:
# Function to read the SDOH CSV/Excel file
def read_sdo_data(sdo_file):
    # Check if the file is CSV or Excel and load accordingly
    if sdo_file.name.endswith('.csv'):
        return pd.read_csv(sdo_file.name)
    elif sdo_file.name.endswith('.xlsx'):
        return pd.read_excel(sdo_file.name)
    else:
        raise ValueError("Unsupported file format. Please upload CSV or Excel files.")

In [12]:
# Function to read clinical notes file
def read_clinical_notes(clinical_notes_file):
    with open(clinical_notes_file.name, "r") as f:
        return f.read()

In [13]:
# Function to extract patient details from clinical notes
def extract_patient_details(clinical_notes):
    # Regex patterns for extracting details
    name_pattern = r"Pt: ([\w\s\.]+)"
    address_pattern = r"residing @ ([\w\s,]+)"
    hospital_pattern = r"Treating facility: ([\w\s,]+)"
    allergies_pattern = r"allergies to: ([\w\s,]+)"
    major_problem_pattern = r"Dx: ([\w\s,]+)"

    # Extract the details using regex
    name = re.search(name_pattern, clinical_notes)
    address = re.search(address_pattern, clinical_notes)
    hospital = re.search(hospital_pattern, clinical_notes)
    allergies = re.search(allergies_pattern, clinical_notes)
    major_problem = re.search(major_problem_pattern, clinical_notes)

    # Return extracted details in a dictionary
    patient_details = {
        "Name": name.group(1) if name else "N/A",
        "Address": address.group(1) if address else "N/A",
        "Hospital": hospital.group(1) if hospital else "N/A",
        "Allergies": allergies.group(1) if allergies else "N/A",
        "Major Medical Problem": major_problem.group(1) if major_problem else "N/A"
    }

    return patient_details

In [14]:
def extract_sdo_factors(clinical_notes, sdo_df):
    # SDOH factors to check from clinical notes
    sdo_factors = {
        "Lack of access to clean water": r"clean water",
        "Poor housing conditions": r"condemned trailer park",
        "Inadequate nutrition": r"food",
        "Exposure to environmental pollutants": r"toxic exposure"
    }

    # Matched SDOH factors list
    matched_sdoh = []

    # Loop through each SDOH factor and match it in clinical notes
    for factor, pattern in sdo_factors.items():
        if re.search(pattern, clinical_notes, re.IGNORECASE):
            # Try to find the corresponding code from the dataframe
            matched_code = sdo_df[sdo_df['SDOH factor'] == factor]

            if not matched_code.empty:
                # Add matched SDOH factor with its corresponding code
                matched_sdoh.append({
                    "SDOH Factor": factor,
                    "Code": matched_code['Code'].values[0]
                })
            else:
                # If no code found, append the SDOH factor with 'N/A'
                matched_sdoh.append({
                    "SDOH Factor": factor,
                    "Code": 'N/A'  # No code found for this factor
                })

    return matched_sdoh


In [15]:
import gradio as gr

# Main function that integrates everything
def gradio_interface(sdo_file, clinical_notes_file):
    try:
        # Read SDOH data
        sdo_df = read_sdo_data(sdo_file)

        # Read clinical notes
        clinical_notes = read_clinical_notes(clinical_notes_file)

        # Extract patient details
        patient_details = extract_patient_details(clinical_notes)

        # Extract SDOH factors and match with codes
        sdoh_factors = extract_sdo_factors(clinical_notes, sdo_df)

        # Prepare final result in JSON format
        result = {
            "Patient Details": patient_details,
            "SDOH Factors": sdoh_factors
        }

        return json.dumps(result, indent=4)  # Return as JSON string
    except Exception as e:
        return f"Error: {e}"


In [16]:
# Gradio interface to upload files and show results
iface = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.File(label="Upload SDOH CSV/Excel file"),
        gr.File(label="Upload Clinical Notes File (Text Format)")
    ],
    outputs="text",  # Output the JSON result
    live=False
)

iface.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://39a5335697a7ad0908.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


