In [1]:
import pandas as pd

# Load datasets
patients = pd.read_csv('patients.csv')
encounters = pd.read_csv('encounters.csv')
conditions = pd.read_csv('conditions.csv')


In [2]:
# Merge datasets to create unified patient records
# Step 1: Merge conditions with encounters
conditions_encounters = pd.merge(
    conditions, encounters, left_on="ENCOUNTER", right_on="Id", how="left"
)

# Step 2: Merge the result with patients
patient_records = pd.merge(
    conditions_encounters, patients, left_on="PATIENT_x", right_on="Id", how="left"
)

# Correct the column names and re-select key fields for summarization
summary_data = patient_records[
    [
        "PATIENT_x",
        "BIRTHDATE",
        "DESCRIPTION_x",  # Condition description
        "DESCRIPTION_y",  # Encounter description
        "START_x",  # Condition start
        "STOP_x",   # Condition end
        "CITY",
        "STATE",
        "HEALTHCARE_EXPENSES",
        "HEALTHCARE_COVERAGE",
    ]
]

# Rename columns for clarity
summary_data.columns = [
    "Patient_ID",
    "Birthdate",
    "Condition_Description",
    "Encounter_Description",
    "Condition_Start",
    "Condition_End",
    "City",
    "State",
    "Healthcare_Expenses",
    "Healthcare_Coverage",
]

# Display the cleaned and merged dataset
summary_data_cleaned = summary_data.head()
summary_data_cleaned

Unnamed: 0,Patient_ID,Birthdate,Condition_Description,Encounter_Description,Condition_Start,Condition_End,City,State,Healthcare_Expenses,Healthcare_Coverage
0,30a6452c-4297-a1ac-977a-6a23237c7b46,1994-02-06,Housing unsatisfactory (finding),General examination of patient (procedure),2012-04-01,,Braintree,Massachusetts,56904.96,18019.99
1,30a6452c-4297-a1ac-977a-6a23237c7b46,1994-02-06,Received higher education (finding),General examination of patient (procedure),2012-04-01,,Braintree,Massachusetts,56904.96,18019.99
2,30a6452c-4297-a1ac-977a-6a23237c7b46,1994-02-06,Transport problem (finding),General examination of patient (procedure),2012-04-01,,Braintree,Massachusetts,56904.96,18019.99
3,30a6452c-4297-a1ac-977a-6a23237c7b46,1994-02-06,Social isolation (finding),General examination of patient (procedure),2012-04-01,,Braintree,Massachusetts,56904.96,18019.99
4,30a6452c-4297-a1ac-977a-6a23237c7b46,1994-02-06,Stress (finding),General examination of patient (procedure),2012-04-01,2016-04-10,Braintree,Massachusetts,56904.96,18019.99


In [3]:
# Select a sample patient record for summarization
sample_record = summary_data.iloc[0]

# Create a structured summary for GPT input
record_for_prompt = f"""
Patient ID: {sample_record['Patient_ID']}
Birthdate: {sample_record['Birthdate']}
Condition: {sample_record['Condition_Description']}
Encounter: {sample_record['Encounter_Description']}
Condition Start: {sample_record['Condition_Start']}
Condition End: {sample_record['Condition_End']}
City: {sample_record['City']}
State: {sample_record['State']}
Healthcare Expenses: ${sample_record['Healthcare_Expenses']}
Healthcare Coverage: ${sample_record['Healthcare_Coverage']}
"""

# Display the structured input for the GPT prompt
record_for_prompt


'\nPatient ID: 30a6452c-4297-a1ac-977a-6a23237c7b46\nBirthdate: 1994-02-06\nCondition: Housing unsatisfactory (finding)\nEncounter: General examination of patient (procedure)\nCondition Start: 2012-04-01\nCondition End: nan\nCity: Braintree\nState: Massachusetts\nHealthcare Expenses: $56904.96\nHealthcare Coverage: $18019.99\n'

In [4]:
import openai

# Set OpenAI API key
openai.api_key = "sk-proj-.....pnnYkjnynwk8um-zGMJbadOXW7Q8NW128k24eF08n-S22IsqyumBRh4A"  


# Create a GPT prompt for summarization
def generate_summary(record):
    prompt = f"""
    You are a medical assistant. Summarize the following medical record in simple terms for a layperson.
    Provide explanations for any medical terms or conditions if necessary.
    Record: {record}
    """
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",  # Use a model available to your API key
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.7
        )
        return response["choices"][0]["message"]["content"]
    except Exception as e:
        return f"Error generating summary: {e}"

# Test the summarization
summary = generate_summary(record_for_prompt)
print(summary)




Summary:
- Patient ID: 30a6452c-4297-a1ac-977a-6a23237c7b46
- Birthdate: February 6, 1994
- Location: Braintree, Massachusetts
- Condition: Housing unsatisfactory (finding)
- Encounter: General examination of patient
- Condition Start Date: April 1, 2012
- Healthcare Expenses: $56,904.96
- Healthcare Coverage: $18,019.99

Explanation:
- The medical record indicates that the patient's living situation is found to be unsatisfactory.
- The patient underwent a general examination during the medical encounter.
- The condition of unsatisfactory housing was noted to have started on April 1, 2012.
- The patient incurred healthcare expenses totaling $56,904.96, with $18,019.99 covered by healthcare insurance.

If you have any questions or need further clarification, feel free to ask!


In [5]:
# Summarize multiple patient records
def summarize_multiple_records(data, n_records=10):
    summaries = []
    for i in range(min(n_records, len(data))):  # Limit to n_records
        sample_record = data.iloc[i]
        record_for_prompt = f"""
        Patient ID: {sample_record['Patient_ID']}
        Birthdate: {sample_record['Birthdate']}
        Condition: {sample_record['Condition_Description']}
        Encounter: {sample_record['Encounter_Description']}
        Condition Start: {sample_record['Condition_Start']}
        Condition End: {sample_record['Condition_End']}
        City: {sample_record['City']}
        State: {sample_record['State']}
        Healthcare Expenses: ${sample_record['Healthcare_Expenses']}
        Healthcare Coverage: ${sample_record['Healthcare_Coverage']}
        """
        summary = generate_summary(record_for_prompt)
        summaries.append({
            "Patient_ID": sample_record["Patient_ID"],
            "Summary": summary
        })
    return summaries

# Generate summaries for the first 10 records
summaries = summarize_multiple_records(summary_data, n_records=10)

# Save the summaries to a CSV file
summaries_df = pd.DataFrame(summaries)
summaries_df.to_csv("patient_summaries.csv", index=False)
print("Summaries saved to patient_summaries.csv")


Summaries saved to patient_summaries.csv


In [6]:
summaries_df.head()

Unnamed: 0,Patient_ID,Summary
0,30a6452c-4297-a1ac-977a-6a23237c7b46,"Summary:\n- Patient: Born on February 6, 1994,..."
1,30a6452c-4297-a1ac-977a-6a23237c7b46,Patient ID: 30a6452c-4297-a1ac-977a-6a23237c7b...
2,30a6452c-4297-a1ac-977a-6a23237c7b46,Patient ID: 30a6452c-4297-a1ac-977a-6a23237c7b...
3,30a6452c-4297-a1ac-977a-6a23237c7b46,Patient ID: 30a6452c-4297-a1ac-977a-6a23237c7b...
4,30a6452c-4297-a1ac-977a-6a23237c7b46,"Patient: Born on February 6, 1994, in Braintre..."


In [7]:
from textstat import flesch_reading_ease

# Evaluate readability of each summary
def evaluate_readability(summaries):
    evaluations = []
    for summary in summaries:
        readability_score = flesch_reading_ease(summary["Summary"])
        evaluations.append({
            "Patient_ID": summary["Patient_ID"],
            "Summary": summary["Summary"],
            "Readability_Score": readability_score
        })
    return evaluations

# Generate evaluations
evaluations = evaluate_readability(summaries)

# Save evaluations to a file
evaluations_df = pd.DataFrame(evaluations)
evaluations_df.to_csv("summary_evaluations.csv", index=False)
print("Evaluations saved to summary_evaluations.csv")


Evaluations saved to summary_evaluations.csv


In [8]:
evaluations_df.head()

Unnamed: 0,Patient_ID,Summary,Readability_Score
0,30a6452c-4297-a1ac-977a-6a23237c7b46,"Summary:\n- Patient: Born on February 6, 1994,...",38.72
1,30a6452c-4297-a1ac-977a-6a23237c7b46,Patient ID: 30a6452c-4297-a1ac-977a-6a23237c7b...,45.01
2,30a6452c-4297-a1ac-977a-6a23237c7b46,Patient ID: 30a6452c-4297-a1ac-977a-6a23237c7b...,46.33
3,30a6452c-4297-a1ac-977a-6a23237c7b46,Patient ID: 30a6452c-4297-a1ac-977a-6a23237c7b...,29.35
4,30a6452c-4297-a1ac-977a-6a23237c7b46,"Patient: Born on February 6, 1994, in Braintre...",55.91


In [9]:
# Simplify medical notes for different patient groups
def simplify_medical_notes(record, target_group):
    prompt = f"""
    You are a medical assistant. Simplify the following medical note for a {target_group}.
    Focus on using clear, easy-to-understand language that fits the target audience.
    Medical Note: {record}
    """
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.7
        )
        return response["choices"][0]["message"]["content"]
    except Exception as e:
        return f"Error simplifying note: {e}"

# Test with a sample record for different groups
sample_note = summary_data.iloc[0]['Condition_Description']
simplifications = {
    "Children": simplify_medical_notes(sample_note, "child"),
    "Adults": simplify_medical_notes(sample_note, "adult"),
    "Seniors": simplify_medical_notes(sample_note, "senior")
}

# Print simplified outputs
for group, text in simplifications.items():
    print(f"{group}:\n{text}\n")


Children:
Medical Note: Your home needs some improvements.

Adults:
Medical Note: Not happy with living situation.

Seniors:
Medical Note: Your living situation is not good (finding).



In [10]:
# Tree-of-Thought Reasoning for medical records
def tree_of_thought_reasoning(record):
    try:
        # Step 1: Summarize the record
        summary_prompt = f"Summarize the following medical record: {record}"
        summary = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": summary_prompt}
            ]
        )["choices"][0]["message"]["content"]

        # Step 2: Explain the condition
        condition_prompt = f"Explain the condition in this summary: {summary}"
        explanation = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": condition_prompt}
            ]
        )["choices"][0]["message"]["content"]

        # Step 3: Provide actionable recommendations
        recommendation_prompt = f"Based on the condition, provide actionable recommendations: {explanation}"
        recommendations = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": recommendation_prompt}
            ]
        )["choices"][0]["message"]["content"]

        return {
            "Summary": summary,
            "Explanation": explanation,
            "Recommendations": recommendations
        }
    except Exception as e:
        return f"Error in tree-of-thought reasoning: {e}"

# Test with a sample record
record_for_thought = summary_data.iloc[0]['Condition_Description']
tree_of_thought_result = tree_of_thought_reasoning(record_for_prompt)
print(tree_of_thought_result)


{'Summary': 'The patient with ID 30a6452c-4297-a1ac-977a-6a23237c7b46, born on February 6, 1994, has a finding of unsatisfactory housing noted during a general examination on April 1, 2012, in Braintree, Massachusetts. The patient has incurred healthcare expenses totaling $56904.96, with $18019.99 covered by healthcare coverage. The end date of the housing condition is not specified in the record.', 'Explanation': "Based on the summary provided, the patient with the specified ID was found to have unsatisfactory housing during a general examination conducted on April 1, 2012, in Braintree, Massachusetts. This indicates that the patient's living conditions were deemed inadequate or substandard in some way.\n\nThe information also includes details about the patient's healthcare expenses, which total $56,904.96, with $18,019.99 covered by healthcare coverage. This suggests that the patient may be facing financial challenges related to their healthcare needs and may be dealing with addition