In [None]:
!pip install gradio pandas transformers
!pip install gradio
!pip install gradio pandas matplotlib seaborn transformers torch


import gradio as gr
import pandas as pd
from transformers import pipeline




In [None]:
# Load zero-shot classification model
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Device set to use cpu


In [None]:
candidate_labels = [
    "financial insecurity",
    "transportation issues",
    "housing instability",
    "food insecurity",
    "employment challenges",
    "family support",
    "education barriers",
    "interpersonal violence",
    "mental health concerns",
    "substance abuse",
    "legal problems",
    "immigration stress",
    "childcare needs",
    "disability-related barriers",
    "healthcare access issues",
    "social isolation",
    "digital divide or technology access",
    "discrimination or stigma",
    "language barriers",
    "chronic illness burden"
]


In [None]:
# Exhaustive remedial suggestions
remedies = {
    "financial insecurity": "Connect with financial counseling services, emergency funds, or government aid programs (e.g., SNAP, TANF).",
    "transportation issues": "Refer to non-emergency medical transport services, ride-share vouchers, or community shuttle programs.",
    "housing instability": "Coordinate with housing assistance programs, shelters, or supportive housing services.",
    "food insecurity": "Suggest food banks, meal delivery services, or SNAP enrollment.",
    "employment challenges": "Refer to workforce development programs, job placement services, or vocational rehab.",
    "family support": "Offer family counseling, caregiver support groups, or parenting resources.",
    "education barriers": "Link to adult education, GED programs, or school reentry support.",
    "interpersonal violence": "Provide access to domestic violence hotlines, shelters, or legal support services.",
    "mental health concerns": "Refer to counseling, psychiatry, peer support groups, or crisis intervention.",
    "substance abuse": "Recommend addiction recovery programs, rehab centers, or harm reduction services.",
    "legal problems": "Suggest legal aid clinics or social justice advocacy groups.",
    "immigration stress": "Connect with immigration attorneys, cultural liaison programs, or mental health services for immigrants.",
    "childcare needs": "Offer daycare resources, childcare subsidies, or parenting classes.",
    "disability-related barriers": "Refer to occupational therapy, assistive devices, or disability rights support.",
    "healthcare access issues": "Arrange for low-cost clinics, telehealth options, or insurance navigation help.",
    "social isolation": "Recommend community engagement programs, peer groups, or volunteer match services.",
    "digital divide or technology access": "Suggest tech literacy classes, device loan programs, or free Wi-Fi access points.",
    "discrimination or stigma": "Refer to advocacy groups, diversity training, or mental health support.",
    "language barriers": "Offer translation services, ESL programs, or bilingual health navigation.",
    "chronic illness burden": "Provide disease-specific education, home care services, or care coordination."
}

def predict_single(note):
    result = classifier(note, candidate_labels)
    top_label = result['labels'][0]
    scores = {label: f"{score:.2f}" for label, score in zip(result['labels'], result['scores'])}
    remedy = remedies.get(top_label, "No specific remedy available.")

    return (
        f"**Top SDoH Prediction:** {top_label}\n\n"
        f"**Suggested Remedy:** {remedy}\n\n"
        f"**All Scores:**\n" + "\n".join([f"{k}: {v}" for k,v in scores.items()])
    )


In [None]:
def predict_bulk(file):
    df = pd.read_csv(file.name)
    if 'note' not in df.columns:
        return "CSV must contain a column named 'note'.", None, None, None

    results = []
    remedies_list = []

    for note in df['note']:
        result = classifier(note, candidate_labels)
        top_label = result['labels'][0]
        remedy = remedies.get(top_label, "No specific remedy available.")
        results.append(top_label)
        remedies_list.append(remedy)

    df['Predicted_SDoH'] = results
    df['Remedy'] = remedies_list

    # Frequency count
    freq = df['Predicted_SDoH'].value_counts().reset_index()
    freq.columns = ['SDoH Category', 'Count']

    # Create bar chart
    fig, ax = plt.subplots(figsize=(8, 5))
    ax.bar(freq['SDoH Category'], freq['Count'], color='skyblue')
    plt.xticks(rotation=45, ha='right')
    plt.title('Frequency of SDoH Predictions')
    plt.xlabel('SDoH Category')
    plt.ylabel('Count')
    plt.tight_layout()

    # Save figure
    fig_path = "/tmp/sdoh_freq_plot.png"
    fig.savefig(fig_path)
    plt.close(fig)

    # Save CSV content
    output_csv = df.to_csv(index=False)

    return df, freq, fig_path, output_csv


In [None]:

import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from concurrent.futures import ThreadPoolExecutor
from transformers import pipeline

# Load classifier
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Candidate labels
candidate_labels = [
    "financial insecurity", "transportation issues", "housing instability",
    "food insecurity", "employment challenges", "family support",
    "depression", "anxiety", "bipolar disorder", "schizophrenia",
    "trauma", "self-esteem issues", "guilt", "shame", "societal isolation",
    "addiction", "eating disorders", "grief", "anger issues", "stress"
]

# Remedies
def get_remedies(label):
    remedies_dict = {
        "financial insecurity": "Financial counseling, job assistance programs, government aid.",
        "transportation issues": "Public transport access, ride-sharing programs, subsidized transportation.",
        "housing instability": "Affordable housing programs, rent assistance, homelessness shelters.",
        "food insecurity": "Food banks, meal assistance programs, nutritional counseling.",
        "employment challenges": "Career counseling, skill development programs, job training.",
        "family support": "Family therapy, parenting support, social work services.",
        "depression": "Cognitive Behavioral Therapy (CBT), Medication (SSRIs, SNRIs, TCA), Exercise.",
        "anxiety": "Cognitive Behavioral Therapy (CBT), Medication (SSRIs, SNRIs), Mindfulness.",
        "bipolar disorder": "Medication (Lithium, Antipsychotics), Psychotherapy (CBT, IPT).",
        "schizophrenia": "Antipsychotic medication, Psychosocial therapy, Rehabilitation.",
        "trauma": "Trauma-focused therapy, EMDR, PTSD treatment.",
        "self-esteem issues": "Therapy (CBT, Self-compassion), Support groups, Mindfulness.",
        "guilt": "Therapy (CBT, Self-forgiveness), Support groups, Mindfulness.",
        "shame": "Therapy (CBT, Shame resilience training), Support groups.",
        "societal isolation": "Community engagement, Therapy, Social skill training.",
        "addiction": "Detox programs, Rehabilitation, Support groups (AA, NA).",
        "eating disorders": "Nutrition therapy, Cognitive Behavioral Therapy (CBT), Support groups.",
        "grief": "Grief counseling, Support groups, Time management.",
        "anger issues": "Anger management therapy, Mindfulness, Relaxation techniques.",
        "stress": "Stress management techniques, Therapy, Exercise, Relaxation techniques."
    }
    return remedies_dict.get(label, "No specific remedy found.")

# Prediction for single input
def predict_single(note):
    prediction = classifier(note, candidate_labels)
    top_label = prediction['labels'][0]
    top_score = prediction['scores'][0]
    remedies = get_remedies(top_label)

    # Heatmap data preparation
    heatmap_df = pd.DataFrame([prediction['scores']], columns=prediction['labels'])

    # Create heatmap
    fig, ax = plt.subplots(figsize=(10, 1.5))
    sns.heatmap(heatmap_df, annot=False, cmap="YlGnBu", cbar=True, ax=ax)
    ax.set_title("Confidence Heatmap (Single Note)")
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
    heatmap_path = "/tmp/single_heatmap.png"
    fig.tight_layout()
    fig.savefig(heatmap_path)
    plt.close(fig)

    return f"**Problem:** {top_label}\n\n**Remedy:** {remedies}\n\n**Confidence:** {top_score:.2f}", heatmap_path

# Prediction for bulk CSV input
def predict_bulk(file):
    df = pd.read_csv(file.name)
    if 'note' not in df.columns:
        return "CSV must contain a column named 'note'.", None, None, None, None

    results = []
    confidence_data = []

    with ThreadPoolExecutor() as executor:
        predictions = list(executor.map(classifier, df['note'], [candidate_labels]*len(df['note'])))

    for note, prediction in zip(df['note'], predictions):
        top_label = prediction['labels'][0]
        top_score = prediction['scores'][0]
        remedies = get_remedies(top_label)
        results.append({
            'note': note,
            'Predicted_SDoH': top_label,
            'Confidence Score': round(top_score, 4),
            'Remedies': remedies
        })
        confidence_data.append(dict(zip(prediction['labels'], prediction['scores'])))

    result_df = pd.DataFrame(results)

    # Histogram
    freq = result_df['Predicted_SDoH'].value_counts().reset_index()
    freq.columns = ['SDoH Category', 'Count']
    fig1, ax1 = plt.subplots(figsize=(10, 5))
    ax1.bar(freq['SDoH Category'], freq['Count'], color='skyblue')
    plt.xticks(rotation=45, ha='right')
    plt.title('SDoH Category Frequency')
    plt.tight_layout()
    hist_path = "/tmp/bulk_histogram.png"
    fig1.savefig(hist_path)
    plt.close(fig1)

    # Heatmap
    heatmap_df = pd.DataFrame(confidence_data).fillna(0)
    fig2, ax2 = plt.subplots(figsize=(12, 6))
    sns.heatmap(heatmap_df, cmap="YlGnBu", cbar=True, ax=ax2)
    plt.title("Confidence Score Heatmap (Bulk)")
    plt.xlabel("SDoH Category")
    plt.ylabel("Note Index")
    plt.tight_layout()
    heatmap_path = "/tmp/bulk_heatmap.png"
    fig2.savefig(heatmap_path)
    plt.close(fig2)

    return result_df, freq, hist_path, heatmap_path, result_df.to_csv(index=False)

# Interface
with gr.Blocks() as demo:
    gr.Markdown("# 🏥 SDoH Extractor & Visualizer")
    gr.Markdown("Upload a clinical note or CSV file to detect Social Determinants of Health and visualize confidence.")

    with gr.Tab("Single Note"):
        input_text = gr.Textbox(lines=4, placeholder="Paste clinical note here...")
        output_text = gr.Markdown()
        output_heatmap = gr.Image()
        btn1 = gr.Button("Predict")
        btn1.click(fn=predict_single, inputs=input_text, outputs=[output_text, output_heatmap])

    with gr.Tab("Bulk (CSV Upload)"):
        input_file = gr.File(file_types=[".csv"])
        output_table = gr.Dataframe()
        output_freq = gr.Dataframe()
        output_hist = gr.Image()
        output_heat = gr.Image()
        download_btn = gr.File()

        btn2 = gr.Button("Predict CSV")
        btn2.click(fn=predict_bulk,
                   inputs=input_file,
                   outputs=[output_table, output_freq, output_hist, output_heat, download_btn])

demo.launch(share=True)


Device set to use cpu


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://77b723d0fa335c176b.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [2]:
!pip install gradio pandas matplotlib


Collecting gradio
  Downloading gradio-5.29.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.0 (from gradio)
  Downloading gradio_client-1.10.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6

In [8]:
import gradio as gr
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from transformers import pipeline
from concurrent.futures import ThreadPoolExecutor

# Initialize the classifier (assuming Hugging Face transformer model for SDoH)
classifier = pipeline('zero-shot-classification', model='facebook/bart-large-mnli')

# Candidate labels for SDoH
candidate_labels = [
    "Financial Stress", "Work Stress", "Trauma", "Social Disconnection",
    "Depression", "Anxiety", "Guilt", "Loneliness", "Unemployment",
    "Low Self-Esteem"
]

# Sample remedies based on labels (customize as per your needs)
def get_remedies(label):
    remedies = {
        "Financial Stress": "Seek financial counseling, explore budgeting tools.",
        "Work Stress": "Take breaks, speak with a supervisor, consider therapy.",
        "Trauma": "Consider trauma-informed care, seek emotional support.",
        "Social Disconnection": "Engage in social groups, seek therapy.",
        "Depression": "Consider seeking mental health support, regular exercise.",
        "Anxiety": "Breathing exercises, yoga, professional therapy.",
        "Guilt": "Work on self-forgiveness, speak to a counselor.",
        "Loneliness": "Engage in hobbies, reach out to friends/family.",
        "Unemployment": "Explore career counseling, skill development programs.",
        "Low Self-Esteem": "Consider therapy, set small achievable goals."
    }
    return remedies.get(label, "Seek professional help.")

# Function to generate and save heatmap
def plot_confidence_heatmap(confidence_data, candidate_labels):
    """
    Generates and saves a heatmap for the confidence scores of SDoH predictions.

    :param confidence_data: List of confidence scores for each prediction
    :param candidate_labels: List of all possible candidate labels
    """
    confidence_data = np.array(confidence_data)

    # Create the heatmap plot
    plt.figure(figsize=(12, 8))  # Adjust figure size for better visibility
    sns.set(font_scale=1.2)  # Adjust font scale for labels
    ax = sns.heatmap(confidence_data, annot=True, cmap="YlGnBu",
                     xticklabels=candidate_labels, yticklabels=range(1, confidence_data.shape[0]+1),
                     cbar=True, linewidths=0.5)

    ax.set_xlabel('Candidate Labels')
    ax.set_ylabel('Note Index')
    ax.set_title('Confidence Scores Heatmap')

    # Adjust layout and make it more readable
    plt.tight_layout()

    # Save the figure
    plt.savefig('/tmp/heatmap.png')  # Save it in the temp directory or any desired location
    plt.close()

# Prediction for single input
def predict_single(note):
    prediction = classifier(note, candidate_labels)
    top_label = prediction['labels'][0]
    top_score = prediction['scores'][0]
    remedies = get_remedies(top_label)

    # Create heatmap data (just the scores)
    heatmap_data = np.array([prediction['scores']])

    # Generate and save the heatmap
    plot_confidence_heatmap(heatmap_data, candidate_labels)

    return f"*Problem:* {top_label}\n\n*Remedy:* {remedies}\n\n*Confidence:* {top_score:.2f}"

# Prediction for bulk CSV input
def predict_bulk(file):
    df = pd.read_csv(file.name)
    if 'note' not in df.columns:
        return "CSV must contain a column named 'note'.", None, None, None, None

    results = []
    confidence_data = []

    with ThreadPoolExecutor() as executor:
        predictions = list(executor.map(classifier, df['note'], [candidate_labels]*len(df['note'])))

    for note, prediction in zip(df['note'], predictions):
        top_label = prediction['labels'][0]
        top_score = prediction['scores'][0]
        remedies = get_remedies(top_label)
        results.append({
            'note': note,
            'Predicted_SDoH': top_label,
            'Confidence Score': round(top_score, 4),
            'Remedies': remedies
        })
        confidence_data.append(dict(zip(prediction['labels'], prediction['scores'])))

    result_df = pd.DataFrame(results)

    # Generate and save the heatmap for the bulk predictions
    plot_confidence_heatmap(confidence_data, candidate_labels)

    return result_df, None, None, None, result_df.to_csv(index=False)

# Gradio UI Setup
def main():
    gr.Interface(
        fn=predict_single,
        inputs="text",
        outputs=["text", gr.Image("/tmp/heatmap.png")],
        live=True
    ).launch()

    gr.Interface(
        fn=predict_bulk,
        inputs=gr.File(label="Upload CSV"),
        outputs=[gr.DataFrame(), gr.Image("/tmp/heatmap.png")],
        live=True
    ).launch()

# Running the Gradio interface
if __name__ == "__main__":
    main()


Device set to use cpu


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://0bf278b51739a2e162.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://095fb5036042b23cc9.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
