## IMPORT : RAW.DATA

1.  **Create a DataFrame:** Create a Pandas DataFrame from the data 

In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler

In [2]:
try:
    df = pd.read_csv('../data/raw_data.csv')
    print("CSV file loaded successfully!")
except FileNotFoundError:
    print("Error: 'rawData.csv' not found. Please make sure the file is in the same directory as your notebook or provide the correct path.")
    exit()

CSV file loaded successfully!


In [3]:
print("\nInfo about the data types:")
print(df.info())
print("\nDescribe:")
print(df.describe())
print("\nColumns:")
print(df.columns)


Info about the data types:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1100 entries, 0 to 1099
Data columns (total 21 columns):
 #   Column                        Non-Null Count  Dtype
---  ------                        --------------  -----
 0   anxiety_level                 1100 non-null   int64
 1   self_esteem                   1100 non-null   int64
 2   mental_health_history         1100 non-null   int64
 3   depression                    1100 non-null   int64
 4   headache                      1100 non-null   int64
 5   blood_pressure                1100 non-null   int64
 6   sleep_quality                 1100 non-null   int64
 7   breathing_problem             1100 non-null   int64
 8   noise_level                   1100 non-null   int64
 9   living_conditions             1100 non-null   int64
 10  safety                        1100 non-null   int64
 11  basic_needs                   1100 non-null   int64
 12  academic_performance          1100 non-null   int64
 13  study

### Identifying Missing Values

In [7]:
print("Missing Values per Column:")
print(df.isnull().sum())

Missing Values per Column:
anxiety_level                   0
self_esteem                     0
mental_health_history           0
depression                      0
headache                        0
blood_pressure                  0
sleep_quality                   0
breathing_problem               0
noise_level                     0
living_conditions               0
safety                          0
basic_needs                     0
academic_performance            0
study_load                      0
teacher_student_relationship    0
future_career_concerns          0
social_support                  0
peer_pressure                   0
extracurricular_activities      0
bullying                        0
stress_level                    0
dtype: int64


### There's no data missing values

In [10]:
print(df.head())

   anxiety_level  self_esteem  mental_health_history  depression  headache  \
0             14           20                      0          11         2   
1             15            8                      1          15         5   
2             12           18                      1          14         2   
3             16           12                      1          15         4   
4             16           28                      0           7         2   

   blood_pressure  sleep_quality  breathing_problem  noise_level  \
0               1              2                  4            2   
1               3              1                  4            3   
2               1              2                  2            2   
3               3              1                  3            4   
4               3              5                  1            3   

   living_conditions  ...  basic_needs  academic_performance  study_load  \
0                  3  ...            2        

In [None]:
try:
    df.to_csv('../data/processed_data.csv', index=False)
    print("CSV file saved successfully as 'processed_data.csv'!")
except Exception as e:
    print("Error: CSV file unsuccessfully saved")

In [22]:
scaler = MinMaxScaler()
df_minmax = pd.DataFrame(scaler.fit_transform(df), columns=df.columns, index=df.index)

scaler = StandardScaler()
df_standard = pd.DataFrame(scaler.fit_transform(df), columns=df.columns, index=df.index)

scaler = RobustScaler()
df_robust = pd.DataFrame(scaler.fit_transform(df), columns=df.columns, index=df.index)

print("\nMin-Max Scaled Data (First 5 rows):\n", df_minmax.head())
# print("\nMin-Max Scaled Data describe:\n", df_minmax.describe())

print("\nStandardized Data (First 5 rows):\n", df_standard.head())

print("\nRobust Scaled Data (First 5 rows):\n", df_robust.head())


Min-Max Scaled Data (First 5 rows):
    anxiety_level  self_esteem  mental_health_history  depression  headache  \
0       0.666667     0.666667                    0.0    0.407407       0.4   
1       0.714286     0.266667                    1.0    0.555556       1.0   
2       0.571429     0.600000                    1.0    0.518519       0.4   
3       0.761905     0.400000                    1.0    0.555556       0.8   
4       0.761905     0.933333                    0.0    0.259259       0.4   

   blood_pressure  sleep_quality  breathing_problem  noise_level  \
0             0.0            0.4                0.8          0.4   
1             1.0            0.2                0.8          0.6   
2             0.0            0.4                0.4          0.4   
3             1.0            0.2                0.6          0.8   
4             1.0            1.0                0.2          0.6   

   living_conditions  ...  basic_needs  academic_performance  study_load  \
0       