Synthetic data generation for heart attack risk prediction.

In [1]:
import pandas as pd
import numpy as np

In [2]:
np.random.seed(42)

num_records = 20000
high_risk_count = num_records // 2
low_risk_count = num_records // 2

In [3]:
heart_rate_high = np.random.normal(loc=95, scale=10, size=high_risk_count)
heart_rate_low = np.random.normal(loc=70, scale=10, size=low_risk_count)

oxygen_levels_high = np.random.normal(loc=88, scale=5, size=high_risk_count)
oxygen_levels_low = np.random.normal(loc=98, scale=2, size=low_risk_count)

body_temp_high = np.random.normal(loc=100.5, scale=1, size=high_risk_count)
body_temp_low = np.random.normal(loc=98.6, scale=0.5, size=low_risk_count)

In [4]:
heart_rate = np.concatenate([heart_rate_high, heart_rate_low])
oxygen_levels = np.concatenate([oxygen_levels_high, oxygen_levels_low])
body_temp = np.concatenate([body_temp_high, body_temp_low])

risk_category = np.array(['High Risk'] * high_risk_count + ['Low Risk'] * low_risk_count)

In [5]:
data = pd.DataFrame({
    'Heart Rate': heart_rate,
    'Oxygen Level': oxygen_levels,
    'Body Temperature': body_temp,
    'Heart Attack Risk': risk_category
})

data = data.sample(frac=1).reset_index(drop=True)

data.to_csv('heart_attack_prediction_dataset.csv', index=False)

In [6]:
print(data.head())

   Heart Rate  Oxygen Level  Body Temperature Heart Attack Risk
0  101.865749     96.476418         98.234058          Low Risk
1   67.382193     98.229872         99.267701          Low Risk
2   62.290010    100.313704         98.145910          Low Risk
3   76.288138     95.066348         98.305198          Low Risk
4   74.003683     97.772355         98.136160          Low Risk
