# 🧹 Student Mental Health Data Cleaning
This notebook loads the raw survey data, performs cleaning operations, and exports a version suitable for SQL analysis.

In [10]:
import pandas as pd

In [11]:
df = pd.read_csv('../data/data.csv')
df.head()

Unnamed: 0,1. Age,2. Gender,3. University,4. Department,5. Academic Year,6. Current CGPA,7. Did you receive a waiver or scholarship at your university?,"1. In a semester, how often you felt nervous, anxious or on edge due to academic pressure?","2. In a semester, how often have you been unable to stop worrying about your academic affairs?","3. In a semester, how often have you had trouble relaxing due to academic pressure?",...,"2. In a semester, how often have you been feeling down, depressed or hopeless?","3. In a semester, how often have you had trouble falling or staying asleep, or sleeping too much?","4. In a semester, how often have you been feeling tired or having little energy?","5. In a semester, how often have you had poor appetite or overeating?","6. In a semester, how often have you been feeling bad about yourself - or that you are a failure or have let yourself or your family down?","7. In a semester, how often have you been having trouble concentrating on things, such as reading the books or watching television?","8. In a semester, how often have you moved or spoke too slowly for other people to notice? Or you've been moving a lot more than usual because you've been restless?","9. In a semester, how often have you had thoughts that you would be better off dead, or of hurting yourself?",Depression Value,Depression Label
0,18-22,Female,"Independent University, Bangladesh (IUB)",Engineering - CS / CSE / CSC / Similar to CS,Fourth Year or Equivalent,2.50 - 2.99,No,1,1,1,...,2,1,1,2,1,1,1,1,11,Moderate Depression
1,18-22,Male,"Independent University, Bangladesh (IUB)",Engineering - CS / CSE / CSC / Similar to CS,First Year or Equivalent,3.80 - 4.00,No,2,2,1,...,1,1,1,1,1,1,1,1,9,Mild Depression
2,18-22,Male,"Independent University, Bangladesh (IUB)",Engineering - CS / CSE / CSC / Similar to CS,First Year or Equivalent,3.00 - 3.39,No,2,1,1,...,0,2,3,2,2,2,2,1,16,Moderately Severe Depression
3,18-22,Male,"Independent University, Bangladesh (IUB)",Engineering - CS / CSE / CSC / Similar to CS,First Year or Equivalent,3.40 - 3.79,No,2,1,1,...,1,1,1,1,1,1,1,1,9,Mild Depression
4,18-22,Male,"Independent University, Bangladesh (IUB)",Engineering - CS / CSE / CSC / Similar to CS,First Year or Equivalent,3.40 - 3.79,No,1,1,1,...,1,1,1,1,1,1,1,1,9,Mild Depression


In [12]:
# Clean column names for SQL compatibility
df.columns = [col.strip().lower().replace('.', '').replace(' ', '_') for col in df.columns]
df.columns

Index(['1_age', '2_gender', '3_university', '4_department', '5_academic_year',
       '6_current_cgpa',
       '7_did_you_receive_a_waiver_or_scholarship_at_your_university?',
       '1_in_a_semester,_how_often_you_felt_nervous,_anxious_or_on_edge_due_to_academic_pressure?',
       '2_in_a_semester,_how_often_have_you_been_unable_to_stop_worrying_about_your_academic_affairs?',
       '3_in_a_semester,_how_often_have_you_had_trouble_relaxing_due_to_academic_pressure?',
       '4_in_a_semester,_how_often_have_you_been_easily_annoyed_or_irritated_because_of_academic_pressure?',
       '5_in_a_semester,_how_often_have_you_worried_too_much_about_academic_affairs?',
       '6_in_a_semester,_how_often_have_you_been_so_restless_due_to_academic_pressure_that_it_is_hard_to_sit_still?',
       '7_in_a_semester,_how_often_have_you_felt_afraid,_as_if_something_awful_might_happen?',
       'anxiety_value', 'anxiety_label',
       '1_in_a_semester,_how_often_have_you_felt_upset_due_to_something_that_

In [13]:
# Calculate composite scores for stress and anxiety
stress_cols = [col for col in df.columns if 'nervous' in col or 'worrying' in col or 'pressure' in col]
anxiety_cols = [col for col in df.columns if 'anxious' in col or 'relaxing' in col]

df['stress_score'] = df[stress_cols].apply(pd.to_numeric, errors='coerce').sum(axis=1)
df['anxiety_score'] = df[anxiety_cols].apply(pd.to_numeric, errors='coerce').sum(axis=1)

In [14]:
print(df.columns.tolist())

['1_age', '2_gender', '3_university', '4_department', '5_academic_year', '6_current_cgpa', '7_did_you_receive_a_waiver_or_scholarship_at_your_university?', '1_in_a_semester,_how_often_you_felt_nervous,_anxious_or_on_edge_due_to_academic_pressure?', '2_in_a_semester,_how_often_have_you_been_unable_to_stop_worrying_about_your_academic_affairs?', '3_in_a_semester,_how_often_have_you_had_trouble_relaxing_due_to_academic_pressure?', '4_in_a_semester,_how_often_have_you_been_easily_annoyed_or_irritated_because_of_academic_pressure?', '5_in_a_semester,_how_often_have_you_worried_too_much_about_academic_affairs?', '6_in_a_semester,_how_often_have_you_been_so_restless_due_to_academic_pressure_that_it_is_hard_to_sit_still?', '7_in_a_semester,_how_often_have_you_felt_afraid,_as_if_something_awful_might_happen?', 'anxiety_value', 'anxiety_label', '1_in_a_semester,_how_often_have_you_felt_upset_due_to_something_that_happened_in_your_academic_affairs?', '2_in_a_semester,_how_often_you_felt_as_if_you

In [15]:
export_cols = [
    '1_age',
    '2_gender',
    '3_university',
    '4_department',
    '5_academic_year',
    '6_current_cgpa',
    'stress_score',
    'anxiety_score',
    'depression_value',
    'depression_label'
]

df_cleaned = df[export_cols].copy()

df_cleaned.columns = [
    'age',
    'gender',
    'university',
    'department',
    'academic_year',
    'cgpa_range',
    'stress_score',
    'anxiety_score',
    'depression_score',
    'depression_label'
]

df_cleaned.head()



Unnamed: 0,age,gender,university,department,academic_year,cgpa_range,stress_score,anxiety_score,depression_score,depression_label
0,18-22,Female,"Independent University, Bangladesh (IUB)",Engineering - CS / CSE / CSC / Similar to CS,Fourth Year or Equivalent,2.50 - 2.99,10,2,11,Moderate Depression
1,18-22,Male,"Independent University, Bangladesh (IUB)",Engineering - CS / CSE / CSC / Similar to CS,First Year or Equivalent,3.80 - 4.00,9,3,9,Mild Depression
2,18-22,Male,"Independent University, Bangladesh (IUB)",Engineering - CS / CSE / CSC / Similar to CS,First Year or Equivalent,3.00 - 3.39,8,3,16,Moderately Severe Depression
3,18-22,Male,"Independent University, Bangladesh (IUB)",Engineering - CS / CSE / CSC / Similar to CS,First Year or Equivalent,3.40 - 3.79,8,3,9,Mild Depression
4,18-22,Male,"Independent University, Bangladesh (IUB)",Engineering - CS / CSE / CSC / Similar to CS,First Year or Equivalent,3.40 - 3.79,7,2,9,Mild Depression


In [16]:
# Export cleaned data to CSV
df_cleaned.to_csv('../data/cleaned_data.csv', index=False)
print('✅ Cleaned data exported successfully!')

✅ Cleaned data exported successfully!
