In [1]:
import pandas as pd
import numpy as np

def generate_health_data(num_records=1000):
    # Generating random data within plausible ranges
    age = np.random.randint(18, 100, size=num_records)
    sex = np.random.choice(['male', 'female'], size=num_records)
    height = np.round(np.random.uniform(low=120, high=200, size=num_records), decimals=2)  # Height in cms
    weight = np.round(np.random.uniform(low=40, high=100, size=num_records), decimals=1)    # Weight in one decimal place
    bmi = np.round(weight / ((height / 100) ** 2), decimals=2)                             # BMI in two decimal places
    body_fat = np.round(np.random.normal(loc=25, scale=10, size=num_records), decimals=1)   # Body fat in one decimal place
    hand_grip_dominant = np.random.randint(20, 60, size=num_records)                        # Hand grip dominant hand in integer
    hand_grip_non_dominant = np.round(np.random.normal(loc=35, scale=10, size=num_records), decimals=1)  # Hand grip non-dominant hand in one decimal place
    wrist_circumference_dominant = np.round(np.random.normal(loc=17, scale=2, size=num_records), decimals=1)  # Wrist circumference in one decimal place
    wrist_circumference_non_dominant = np.round(np.random.normal(loc=17, scale=2, size=num_records), decimals=1)  # Wrist circumference in one decimal place
    serum_t3 = np.round(np.random.normal(loc=1.0, scale=0.5, size=num_records), decimals=2)     # Serum T3 in two decimal places
    serum_t4 = np.round(np.random.normal(loc=6.0, scale=2.0, size=num_records), decimals=2)     # Serum T4 in two decimal places
    
    # Generating TSH values based on age and sex
    tsh = []
    for i in range(num_records):
        if sex[i] == 'female':
            tsh_val = np.round(np.random.normal(loc=2.5 + (age[i] * 0.02), scale=1.0), decimals=2)
        else:
            tsh_val = np.round(np.random.normal(loc=2.0 + (age[i] * 0.015), scale=0.8), decimals=2)
        tsh.append(max(0.1, tsh_val))  # Ensure TSH is not negative
    
    # Generating Thyroid status based on conditional probabilities
    thyroid = []
    normal_count = 0
    hypothyroid_count = 0
    for i in range(num_records):
        # Generate based on the desired distribution
        if (tsh[i] > 3.0 and serum_t3[i] < 0.8 or serum_t4[i] < 4.5 and age[i] >= 60 and age[i] <= 100) and body_fat[i] > 35:
            thyroid.append('Hypothyroid')
            hypothyroid_count += 1
        elif (tsh[i] > 3.0 and serum_t3[i] < 0.8 or serum_t4[i] < 4.5 and age[i] >= 60 and age[i] <= 100) and hypothyroid_count / (i + 1) < 0.4:
            thyroid.append('Hypothyroid')
            hypothyroid_count += 1
        elif np.random.random() < 0.05:
            thyroid.append('Hypothyroid')
            hypothyroid_count += 1
        else:
            thyroid.append('Normal')
            normal_count += 1
    
    # Creating DataFrame
    df = pd.DataFrame({
        'Age': age,
        'Sex': sex,
        'Height': height,
        'Weight': weight,
        'BMI': bmi,
        'Body fat': body_fat,
        'Hand grip dominant hand': hand_grip_dominant,
        'Hand grip non-dominant hand': hand_grip_non_dominant,
        'Wrist circumference dominant': wrist_circumference_dominant,
        'Wrist circumference non-dominant': wrist_circumference_non_dominant,
        'Serum T3': serum_t3,
        'Serum T4': serum_t4,
        'TSH': tsh,
        'Thyroid': thyroid
    })
    
    return df

In [2]:
# Generating 1000 records
health_data = generate_health_data(1000)

In [3]:
health_data.to_csv('generated_health_data_new.csv', index=False)
print("Data has been exported to 'generated_health_data.csv'")

Data has been exported to 'generated_health_data.csv'


In [4]:
df = pd.read_csv('generated_health_data_new.csv')
print(df.shape)
df.head()

(1000, 14)


Unnamed: 0,Age,Sex,Height,Weight,BMI,Body fat,Hand grip dominant hand,Hand grip non-dominant hand,Wrist circumference dominant,Wrist circumference non-dominant,Serum T3,Serum T4,TSH,Thyroid
0,39,male,169.68,63.9,22.19,20.7,38,31.3,21.7,15.9,0.92,8.47,1.75,Normal
1,65,female,185.69,72.7,21.08,23.5,34,21.8,18.7,17.1,0.78,4.66,4.46,Hypothyroid
2,85,male,148.6,82.2,37.22,42.8,53,37.3,15.3,14.8,0.28,6.64,1.97,Normal
3,46,female,152.04,76.0,32.88,32.0,53,24.8,16.5,16.6,0.87,7.91,4.61,Normal
4,42,male,169.27,55.7,19.44,21.5,37,36.6,16.5,13.1,2.03,7.92,2.17,Normal
