In [4]:
# 1. Import libraries
import pandas as pd
import random
from faker import Faker
from datetime import datetime, timedelta
import os

# 2. Setup Faker and random seed
fake = Faker()
random.seed(42)

# 3. Configuration
num_records = 50
diagnoses = ['Hypertension', 'Diabetes', 'Asthma', 'Healthy', 'Obesity', 'Heart Disease']
genders = ['Male', 'Female']
smoking_statuses = ['Smoker', 'Non-smoker']
activity_levels = ['Sedentary', 'Moderate', 'Active']
cholesterol_levels = ['Low', 'Normal', 'High']
adherence_status = ['Adherent', 'Non-adherent']

# 4. Generate unique 5-digit patient IDs
patient_ids = random.sample(range(10000, 99999), num_records)

# 5. Generate health records
records = []
for i in range(num_records):
    age = random.randint(18, 90)
    weight_kg = random.uniform(50, 120)
    height_m = random.uniform(1.5, 1.9)
    bmi = round(weight_kg / (height_m ** 2), 1)
    systolic = random.randint(90, 160)
    diastolic = random.randint(60, 100)
    bp = f"{systolic}/{diastolic}"
    visit_date = fake.date_between(start_date='-2y', end_date='today')
    readmitted = random.choices(['Yes', 'No'], weights=[0.2, 0.8])[0]

    record = {
        'patient_id': patient_ids[i],
        'name': fake.name(),
        'age': age,
        'gender': random.choice(genders),
        'diagnosis': random.choice(diagnoses),
        'blood_pressure': bp,
        'bmi': bmi,
        'smoking_status': random.choice(smoking_statuses),
        'physical_activity': random.choice(activity_levels),
        'cholesterol_level': random.choice(cholesterol_levels),
        'medication_adherence': random.choice(adherence_status),
        'hospital_stay_days': random.randint(0, 14),
        'readmitted': readmitted,
        'visit_date': visit_date
    }
    records.append(record)

# 6. Create DataFrame
df = pd.DataFrame(records)

# 7. Save DataFrame to CSV
output_dir = 'health_records'   
if not os.path.exists(output_dir):
    os.makedirs(output_dir)     
output_file = os.path.join(output_dir, 'health_records.csv')
df.to_csv(output_file, index=False)         

# 8. Preview all data
df


Unnamed: 0,patient_id,name,age,gender,diagnosis,blood_pressure,bmi,smoking_status,physical_activity,cholesterol_level,medication_adherence,hospital_stay_days,readmitted,visit_date
0,93810,Michael Hill,66,Male,Hypertension,136/96,21.2,Smoker,Moderate,Low,Adherent,13,Yes,2024-10-08
1,24592,Lauren Salazar,30,Male,Heart Disease,136/70,27.1,Non-smoker,Active,High,Adherent,9,No,2023-09-01
2,13278,Shelby Harper,39,Male,Heart Disease,149/84,34.2,Non-smoker,Sedentary,Low,Adherent,12,No,2023-11-17
3,46048,Joshua Mejia,58,Female,Heart Disease,130/73,33.5,Non-smoker,Sedentary,Normal,Adherent,3,No,2025-05-02
4,42098,Cindy Taylor,89,Male,Diabetes,144/97,27.1,Non-smoker,Sedentary,Low,Adherent,2,No,2024-02-26
5,39256,Elizabeth James,38,Female,Obesity,98/84,37.9,Non-smoker,Active,Low,Adherent,10,No,2023-12-15
6,28289,Melissa Hopkins DDS,86,Male,Healthy,133/67,31.4,Smoker,Active,High,Non-adherent,8,No,2024-07-15
7,23434,Taylor Daniel,40,Male,Diabetes,128/100,35.9,Non-smoker,Sedentary,High,Adherent,9,No,2024-04-23
8,98696,Brian Lopez,59,Male,Obesity,136/79,35.3,Smoker,Sedentary,High,Non-adherent,13,No,2025-04-16
9,81482,Joshua Martin,26,Male,Asthma,106/68,40.4,Non-smoker,Sedentary,High,Adherent,11,No,2024-07-21
