In [4]:
import pandas as pd
import random

# Load the dataset
file_path = '../healthcare_dataset.csv'
df_user = pd.read_csv(file_path)

# Define random sentence fragments for richer variation
intros = [
    "This {age}-year-old {gender} presented with {condition}.",
    "Patient is a {age}-year-old {gender} diagnosed with {condition}.",
    "{gender} of {age} years, known to have {condition}, was seen today."
]

symptoms = [
    "The patient reported {symptom_description}.",
    "Symptoms include {symptom_description}.",
    "The patient has been experiencing {symptom_description}."
]

treatments = [
    "The treatment plan includes {medication}.",
    "{medication} was prescribed to manage the condition.",
    "Current medication includes {medication}."
]

lab_results = [
    "Recent lab tests revealed {lab_results}.",
    "Lab results indicate {lab_results}.",
    "Tests show {lab_results}."
]

follow_up = [
    "The patient was advised on the importance of follow-up visits.",
    "Regular monitoring and follow-up were discussed.",
    "The importance of adherence to the treatment plan was emphasized."
]

# Randomly choose fragments and combine them to create the clinic note
def generate_dynamic_clinic_notes(row):
    intro = random.choice(intros).format(age=row['Age'], gender=row['Gender'], condition=row['Medical Condition'])
    symptom = random.choice(symptoms).format(symptom_description="worsening symptoms over the past few days" if 'COPD' in row['Medical Condition'] else "stable but with concerns about long-term management")
    treatment = random.choice(treatments).format(medication=row['Medication'])
    lab_result = random.choice(lab_results).format(lab_results=row['Test Results'])
    follow_up_note = random.choice(follow_up)
    
    # Combine the fragments into a complete clinic note
    clinic_note = f"{intro} {symptom} {treatment} {lab_result} {follow_up_note}"
    return clinic_note

# Apply the function to create a new column with clinic notes
df_user['Clinic Notes'] = df_user.apply(generate_dynamic_clinic_notes, axis=1)

# Save the updated dataset to a new CSV file
output_file_path = 'new_healthcare_dataset_with_dynamic_clinic_notes.csv'
df_user.to_csv(output_file_path, index=False)

df_user[['Name', 'Clinic Notes']].head()


Unnamed: 0,Name,Clinic Notes
0,Bobby JacksOn,"Male of 30 years, known to have Cancer, was se..."
1,LesLie TErRy,"Male of 62 years, known to have Obesity, was s..."
2,DaNnY sMitH,"Female of 76 years, known to have Obesity, was..."
3,andrEw waTtS,"Female of 28 years, known to have Diabetes, wa..."
4,adrIENNE bEll,This 43-year-old Female presented with Cancer....
