In [4]:
import pandas as pd
import numpy as np

np.random.seed(42)  # reproducibility

# Ask user for number of rows
rows = int(input("Enter number of rows you want in the dataset: "))

# Generate random values for original features
data = {
    "duration_hours": np.round(np.random.uniform(0, 8, rows), 2),       # float
    "volume_percent": np.random.randint(0, 101, rows),                   # int
    "env_noise_dB": np.round(np.random.uniform(30, 100, rows), 2),       # float
    "age": np.random.randint(10, 71, rows),                               # int
    "tone_test_avg": np.round(np.random.uniform(0, 50, rows), 2),        # float
    # New features
    "headphone_type": np.random.randint(0, 2, rows),                     # int: 0=in-ear, 1=over-ear
    "daily_sessions": np.random.randint(1, 6, rows),                     # int: 1-5 sessions/day
    "occupational_noise_dB": np.round(np.random.uniform(30, 100, rows), 2)  # float
}

df = pd.DataFrame(data)

# Calculate risk_level using effective sound dose
def calc_risk(row):
    # Adjust effective dB considering headphone type and occupational noise
    headphone_factor = 1.0 if row['headphone_type'] == 1 else 0.9  # over-ear slightly safer
    effective_dB = (row['volume_percent'] * headphone_factor +
                    0.5 * row['env_noise_dB'] +
                    0.3 * row['occupational_noise_dB'])
    
    safe_hours = 8 * (85 / effective_dB)  # linear safe hours
    total_duration = row['duration_hours'] * row['daily_sessions']  # cumulative exposure
    dose_percent = (total_duration / safe_hours) * 100

    if dose_percent < 50:
        return "Low"
    elif dose_percent < 80:
        return "Medium"
    else:
        return "High"

df['risk_level'] = df.apply(calc_risk, axis=1)

# Save CSV
df.to_csv("tinnisense_dataset.csv", index=False)

print(f"Dataset with {rows} rows created successfully with enhanced features!")
print(df.head())


Enter number of rows you want in the dataset:  4000


Dataset with 4000 rows created successfully with enhanced features!
   duration_hours  volume_percent  env_noise_dB  age  tone_test_avg  \
0            3.00              20         81.48   21          13.92   
1            7.61              63         35.60   36           4.78   
2            5.86              36         55.16   62           8.24   
3            4.79              26         90.09   39           8.70   
4            1.25              54         46.28   30          42.13   

   headphone_type  daily_sessions  occupational_noise_dB risk_level  
0               1               2                  84.24     Medium  
1               0               3                  89.81       High  
2               0               5                  59.43       High  
3               0               3                  76.75       High  
4               1               4                  34.12     Medium  
