In [3]:
import pandas as pd
import numpy as np

# Define the number of samples for each stress level
num_samples_per_class = 1000  # Total of 600 samples (100 for each stress level)

# Define feature ranges and relationships with stress levels
def generate_synthetic_data(num_samples, stress_level):
    np.random.seed(42)

    data = []
    for _ in range(num_samples):
        # Add a small noise factor to continuous variables
        noise = np.random.normal(0, 0.1)  # Adding Gaussian noise

        # Generate features based on stress level with overlapping ranges and noise
        if stress_level == 0:
            gpa = np.random.uniform(8.5, 10) + noise  # Overlap and noise
            sleep_hours = np.random.uniform(7.5, 9) + noise
            study_hours = np.random.uniform(5, 15) + noise  # More overlap
            class_hours = np.random.uniform(5, 15) + noise
        elif stress_level == 1:
            gpa = np.random.uniform(7.5, 9) + noise
            sleep_hours = np.random.uniform(6.5, 8.5) + noise
            study_hours = np.random.uniform(10, 25) + noise
            class_hours = np.random.uniform(10, 25) + noise
        elif stress_level == 2:
            gpa = np.random.uniform(6.5, 8.5) + noise
            sleep_hours = np.random.uniform(5.5, 7.5) + noise
            study_hours = np.random.uniform(15, 30) + noise
            class_hours = np.random.uniform(15, 30) + noise
        elif stress_level == 3:
            gpa = np.random.uniform(5.5, 7.5) + noise
            sleep_hours = np.random.uniform(4.5, 6.5) + noise
            study_hours = np.random.uniform(25, 40) + noise
            class_hours = np.random.uniform(25, 40) + noise
        elif stress_level == 4:
            gpa = np.random.uniform(4.5, 6.5) + noise
            sleep_hours = np.random.uniform(3.5, 5.5) + noise
            study_hours = np.random.uniform(35, 50) + noise
            class_hours = np.random.uniform(35, 50) + noise
        elif stress_level == 5:
            gpa = np.random.uniform(3.5, 5.5) + noise
            sleep_hours = np.random.uniform(2, 4.5) + noise
            study_hours = np.random.uniform(45, 60) + noise
            class_hours = np.random.uniform(45, 60) + noise

        assignments = np.random.randint(0, 15)
        academic_pressure = np.random.randint(1, 6)
        social_support = np.random.randint(1, 6)
        physical_activity = np.random.choice(['Daily', 'Frequently', 'Sometimes', 'Rarely', 'Never'])
        diet_quality = np.random.randint(1, 6)
        anxiety_levels = np.random.randint(1, 6)
        depression_symptoms = np.random.randint(1, 6)

        sample = {
            'timestamp': pd.to_datetime('today').strftime('%d-%m-%Y %H:%M'),
            'what_is_your_age?': np.random.randint(18, 31),
            'what_is_your_gender?': np.random.choice(['Male', 'Female']),
            'what_is_your_current_year_of_study?': np.random.randint(1, 6),
            'what_is_your_major_or_field_of_study?': np.random.choice(['Civil Engineering', 'BBA General', 'Law', 'Electronics and Communication', 'Robotics and Automation', 'Core Engineering']),
            'what_is_your_current_gpa_or_average_grade?': int(gpa),
            'how_many_hours_do_you_study_per_week?': int(study_hours),
            'how_many_assignments_or_deadlines_do_you_have_each_week?': assignments,
            'how_often_do_you_attend_classes?': np.random.choice(['Never', 'Rarely', 'Sometimes', 'Frequently', 'Always']),
            'how_much_academic_pressure_do_you_feel?': academic_pressure,
            'how_would_you_rate_your_level_of_social_support?': social_support,
            'what_is_your_current_living_situation?': np.random.choice(['On-Campus', 'With Family', 'Off-Campus']),
            'do_you_have_a_part_time_job?': np.random.choice(['Yes', 'No']),
            'do_you_participate_in_extracurricular_activities?': np.random.choice(['Yes', 'No']),
            'how_would_you_rate_your_sleep_quality?': np.random.randint(1, 6),
            'how_many_hours_of_sleep_do_you_get_per_night?': int(sleep_hours),
            'how_often_do_you_engage_in_physical_activity?': physical_activity,
            'how_would_you_rate_the_quality_of_your_diet?': diet_quality,
            'how_many_hours_do_you_spend_on_screens_phone_computer_tv_per_day?': int(np.random.uniform(0, 10)),
            'how_would_you_rate_your_anxiety_levels?': anxiety_levels,
            'how_would_you_rate_your_depression_symptoms?': depression_symptoms,
            'what_coping_mechanisms_do_you_use?': np.random.choice(['Sleeping', 'Playing Games', 'Exercise', 'Aggression - punching bag', 'Listening to Music']),
            'how_would_you_rate_your_current_stress_level?': stress_level
        }
        data.append(sample)

    return pd.DataFrame(data)

# Generate synthetic data for each stress level from 0 to 5
synthetic_data = pd.concat([generate_synthetic_data(num_samples_per_class, level) for level in range(6)], ignore_index=True)

# Shuffle the entire rows
synthetic_data = synthetic_data.sample(frac=1, random_state=42).reset_index(drop=True)

# Save the shuffled dataset to a CSV file
synthetic_data.to_csv('stress_level.csv', index=False)

print("Shuffled dataset with stress levels from 0 to 5 created successfully!")


Shuffled dataset with stress levels from 0 to 5 created successfully!
