In [318]:
from IPython.display import display, Markdown
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer, SimpleImputer, KNNImputer
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import StandardScaler , OneHotEncoder
from sklearn.tree import DecisionTreeRegressor
import math
import matplotlib.pyplot as plt
import numpy as np 
import seaborn as sns
import pandas as pd 
import scipy.stats as ss
import seaborn as sns
import os
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# Decide between local or kaggle cloud storage         
KAGGLE_ENV = 'kaggle' in os.listdir('/')
data_path = '/kaggle/input' if KAGGLE_ENV else '../kaggle/input'

# This is a good idea to work only locally. But If you wanna ran your NB also at kaggle... this is not working.
# # Pull the dataset from kaggle, it is concat dataset train + original dataset
# dataset_name = 'dantheshark/s4-e11-train-concat'
# if KAGGLE_ENV:
#     kaggle.api.dataset_download_files(dataset_name, path="../kaggle/input/", unzip=True)


for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
    
for dirname, _, filenames in os.walk(data_path):
    for filename in filenames:
        print(os.path.join(dirname, filename)) 

../kaggle/input/submission.csv
../kaggle/input/s4-e11-train-concat-final.csv
../kaggle/input/depression-surveydataset-for-analysis/final_depression_dataset_1.csv
../kaggle/input/playground-series-s4e11/sample_submission.csv
../kaggle/input/playground-series-s4e11/test.csv
../kaggle/input/playground-series-s4e11/train.csv
../kaggle/input/s4-e11-test-concat/s4-e11-test-concat.csv
../kaggle/input/s4-e11-train-concat-final/s4-e11-train-concat-final.csv
../kaggle/input/s4-e11-train-concat-final/dataset-metadata.json
../kaggle/input/s4-e11-train-concat/s4-e11-train-concat.csv
../kaggle/input/s4-e11-test-concat-final/s4-e11-test-concat-final.csv


# Load Data

In [319]:
# Load the data
train_original = pd.read_csv(data_path + '/playground-series-s4e11/train.csv')
test_original = pd.read_csv(data_path + '/playground-series-s4e11/test.csv')
sample_submission = pd.read_csv(data_path + '/playground-series-s4e11/sample_submission.csv')
original_data = pd.read_csv(data_path + '/depression-surveydataset-for-analysis/final_depression_dataset_1.csv')

train_concat_data = pd.read_csv(data_path + '/s4-e11-train-concat/s4-e11-train-concat.csv')
test_concat_data = pd.read_csv(data_path + '/s4-e11-test-concat/s4-e11-test-concat.csv')

In [320]:
train_concat_data.head()

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression
0,Working Professional,5.0,,2.0,Other,No,1.0,2.0,0
1,Working Professional,4.0,,3.0,Other,Yes,7.0,3.0,1
2,Student,,2.0,,B.Pharm,Yes,3.0,1.0,1
3,Working Professional,5.0,,1.0,BBA,Yes,10.0,1.0,1
4,Working Professional,1.0,,1.0,BBA,Yes,9.0,4.0,0


In [321]:
test_concat_data.head()

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress
0,Working Professional,2.0,,5.0,LLB,No,9.0,3.0
1,Working Professional,2.0,,4.0,B.Ed,No,6.0,4.0
2,Working Professional,4.0,,1.0,B.Arch,Yes,12.0,4.0
3,Student,,1.0,,BSc,Yes,10.0,4.0
4,Working Professional,5.0,,5.0,BCA,Yes,3.0,4.0


# Preprocessing

## Convertion from Features

In [322]:

# Convert Float to Int, not needed float
columns_to_convert = ['Work Pressure', 'Job Satisfaction', 'Study Satisfaction', 'Work/Study Hours', 'Financial Stress']

for col in columns_to_convert:
    train_concat_data[col] = pd.to_numeric(train_concat_data[col], errors='coerce').astype('Int64')
    test_concat_data[col] = pd.to_numeric(test_concat_data[col], errors='coerce').astype('Int64')  
    

In [323]:

def get_categorical_numerical_features(df):
    # Get Numeric & Categorical Features
    numeric_features = df.select_dtypes(include=[np.number]).columns.tolist()
    categorical_features = df.select_dtypes(include=['object', 'category']).columns.tolist()
    return numeric_features, categorical_features
numeric_features, categorical_features = get_categorical_numerical_features(train_concat_data)

## Outliers

In [324]:
# plt.figure(figsize=(12, 6))
# sns.boxplot(data=train_concat_data[numeric_features])
# plt.xticks(rotation=90)
# plt.show()

## Show general stats 

In [325]:
def show_general_stats(df):
    display(Markdown('### General Stats'))
    display(df.describe())
    display(Markdown('### Data Types'))
    display(df.dtypes)
    display(Markdown('### Missing Values'))
    display(df.isnull().sum())
    display(Markdown('### Shape'))
    display(df.shape)
    display(Markdown('### Head'))
    display(df.head(100))
    display(Markdown('### Tail'))
    display(df.tail(100))
    display(Markdown('### Sample'))
    display(df.sample(100))
    display(Markdown('### '))

In [326]:
show_general_stats(train_concat_data)
show_general_stats(test_concat_data)

### General Stats

Unnamed: 0,Work Pressure,Study Satisfaction,Job Satisfaction,Work/Study Hours,Financial Stress,Depression
count,114836.0,28399.0,114844.0,143256.0,143252.0,143256.0
mean,2.999408,2.947252,2.975131,6.248597,2.988621,0.181647
std,1.405975,1.360518,1.416124,3.852275,1.413664,0.385555
min,1.0,1.0,1.0,0.0,1.0,0.0
25%,2.0,2.0,2.0,3.0,2.0,0.0
50%,3.0,3.0,3.0,6.0,3.0,0.0
75%,4.0,4.0,4.0,10.0,4.0,0.0
max,5.0,5.0,5.0,12.0,5.0,1.0


### Data Types

Working Professional or Student          object
Work Pressure                             Int64
Study Satisfaction                        Int64
Job Satisfaction                          Int64
Degree                                   object
Have you ever had suicidal thoughts ?    object
Work/Study Hours                          Int64
Financial Stress                          Int64
Depression                                int64
dtype: object

### Missing Values

Working Professional or Student               0
Work Pressure                             28420
Study Satisfaction                       114857
Job Satisfaction                          28412
Degree                                        0
Have you ever had suicidal thoughts ?         0
Work/Study Hours                              0
Financial Stress                              4
Depression                                    0
dtype: int64

### Shape

(143256, 9)

### Head

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression
0,Working Professional,5,,2,Other,No,1,2,0
1,Working Professional,4,,3,Other,Yes,7,3,1
2,Student,,2,,B.Pharm,Yes,3,1,1
3,Working Professional,5,,1,BBA,Yes,10,1,1
4,Working Professional,1,,1,BBA,Yes,9,4,0
...,...,...,...,...,...,...,...,...,...
95,Working Professional,2,,5,B.Ed,Yes,10,4,0
96,Working Professional,4,,3,B.Ed,Yes,4,2,0
97,Working Professional,1,,3,M.Tech,No,1,2,0
98,Working Professional,2,,2,B.Ed,No,9,2,0


### Tail

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression
143156,Working Professional,3,,3,BSc,No,10,5,0
143157,Working Professional,1,,5,BSc,No,5,4,0
143158,Working Professional,2,,4,M.Pharm,Yes,1,4,0
143159,Working Professional,4,,1,Other,Yes,2,5,1
143160,Student,,3,,Class 12,Yes,6,5,1
...,...,...,...,...,...,...,...,...,...
143251,Working Professional,1,,5,BBA,Yes,12,3,0
143252,Working Professional,3,,1,Other,Yes,8,3,1
143253,Working Professional,1,,4,Other,Yes,4,4,0
143254,Working Professional,2,,3,BBA,No,4,5,0


### Sample

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression
123191,Working Professional,2,,3,Other,Yes,6,1,0
77038,Student,,3,,Other,Yes,0,3,1
61270,Working Professional,1,,2,B.Com,No,11,4,0
56916,Working Professional,4,,2,B.Pharm,Yes,12,5,1
73729,Student,,4,,B.Com,Yes,7,4,1
...,...,...,...,...,...,...,...,...,...
119096,Working Professional,1,,1,B.Tech,No,2,2,0
81934,Working Professional,5,,5,Other,No,5,5,0
7082,Student,,4,,BSc,No,12,5,0
52103,Working Professional,2,,2,M.Ed,Yes,4,1,0


### 

### General Stats

Unnamed: 0,Work Pressure,Study Satisfaction,Job Satisfaction,Work/Study Hours,Financial Stress
count,75022.0,18767.0,75026.0,93800.0,93800.0
mean,3.011797,2.939522,2.96092,6.247335,2.978763
std,1.403563,1.374242,1.41071,3.858191,1.414604
min,1.0,1.0,1.0,0.0,1.0
25%,2.0,2.0,2.0,3.0,2.0
50%,3.0,3.0,3.0,6.0,3.0
75%,4.0,4.0,4.0,10.0,4.0
max,5.0,5.0,5.0,12.0,5.0


### Data Types

Working Professional or Student          object
Work Pressure                             Int64
Study Satisfaction                        Int64
Job Satisfaction                          Int64
Degree                                   object
Have you ever had suicidal thoughts ?    object
Work/Study Hours                          Int64
Financial Stress                          Int64
dtype: object

### Missing Values

Working Professional or Student              0
Work Pressure                            18778
Study Satisfaction                       75033
Job Satisfaction                         18774
Degree                                      86
Have you ever had suicidal thoughts ?        0
Work/Study Hours                             0
Financial Stress                             0
dtype: int64

### Shape

(93800, 8)

### Head

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress
0,Working Professional,2,,5,LLB,No,9,3
1,Working Professional,2,,4,B.Ed,No,6,4
2,Working Professional,4,,1,B.Arch,Yes,12,4
3,Student,,1,,BSc,Yes,10,4
4,Working Professional,5,,5,BCA,Yes,3,4
...,...,...,...,...,...,...,...,...
95,Working Professional,5,,5,MD,No,10,1
96,Working Professional,4,,2,B.Pharm,No,8,4
97,Student,,1,,PhD,Yes,8,5
98,Student,,4,,MBBS,Yes,0,5


### Tail

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress
93700,Working Professional,1,,5,BHM,No,3,4
93701,Working Professional,1,,3,ME,No,3,3
93702,Working Professional,2,,1,MSc,No,6,1
93703,Student,,5,,Class 12,Yes,6,3
93704,Working Professional,5,,5,B.Ed,Yes,0,1
...,...,...,...,...,...,...,...,...
93795,Working Professional,3,,5,BSc,Yes,2,2
93796,Working Professional,5,,1,BE,Yes,11,3
93797,Student,,4,,B.Tech,No,7,1
93798,Working Professional,4,,2,BA,Yes,7,5


### Sample

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress
57087,Working Professional,1,,3,MBA,Yes,11,1
66255,Working Professional,3,,3,BA,No,1,2
40657,Working Professional,4,,5,B.Arch,No,12,5
15603,Working Professional,5,,4,MA,No,7,1
87336,Working Professional,2,,3,B.Tech,No,0,1
...,...,...,...,...,...,...,...,...
47679,Working Professional,5,,2,B.Ed,No,5,2
64531,Student,,1,,BA,Yes,11,1
22986,Working Professional,1,,5,BSc,Yes,11,3
92846,Working Professional,2,,1,B.Pharm,No,1,4


### 

## MICE (Multiple Imputation by Chained Equations)
**We don't use MICE**, at the train data there is not enough correlation between the features (none of them > +/- 0.7!)

In [327]:
# def cramers_v(x, y):
#     confusion_matrix = pd.crosstab(x, y)
#     chi2 = ss.chi2_contingency(confusion_matrix)[0]
#     n = confusion_matrix.sum().sum()
#     phi2 = chi2 / n
#     r, k = confusion_matrix.shape
#     phi2corr = max(0, phi2 - ((k-1)*(r-1))/(n-1))
#     rcorr = r - ((r-1)**2)/(n-1)
#     kcorr = k - ((k-1)**2)/(n-1)
#     return np.sqrt(phi2corr / min((kcorr-1), (rcorr-1)))

# cramers_v(train_concat_data['FEATURE'], train_concat_data['Depression'])

## KNN Imputer (k-Nearest Neighbors)
**Takes way too long for this dataset**

In [328]:
# imputer = KNNImputer(n_neighbors=3)  # k=5 nearest neighbors
# print("Start KNN-Imputation...")
# train_original[numeric_features] = imputer.fit_transform(train_original[numeric_features])# only for numerical data
# print(train_original)

## Missing Data Numerical (int), IterativeImputer (estimator=RandomForestClassifier) 
Tyred also ExtraTreesRegressor, but does not converged.

# Training Dataset

In [329]:
# List of columns (only int) with missing values
columns_to_convert = ['Work Pressure', 'Job Satisfaction',  'Work/Study Hours', 'Financial Stress']#, 'Study Satisfaction']# Had problems here, messed up the data 

train_preprocessed = train_concat_data.copy()

imputer = IterativeImputer(
    estimator=RandomForestClassifier(n_estimators=50, max_depth=10, n_jobs=-1, random_state=42),
    max_iter=200,
    random_state=42,
    min_value=0,
    imputation_order="ascending" 
)
df_subset = train_preprocessed[columns_to_convert]
df_imputed_values = imputer.fit_transform(df_subset)
train_preprocessed[columns_to_convert] = np.round(df_imputed_values).astype(int)

print(imputer.n_iter_)
show_general_stats(train_preprocessed)

4


### General Stats

Unnamed: 0,Work Pressure,Study Satisfaction,Job Satisfaction,Work/Study Hours,Financial Stress,Depression
count,143256.0,28399.0,143256.0,143256.0,143256.0,143256.0
mean,3.031049,2.947252,2.850561,6.248597,2.988657,0.181647
std,1.420301,1.360518,1.422842,3.852275,1.413677,0.385555
min,1.0,1.0,1.0,0.0,1.0,0.0
25%,2.0,2.0,2.0,3.0,2.0,0.0
50%,3.0,3.0,3.0,6.0,3.0,0.0
75%,4.0,4.0,4.0,10.0,4.0,0.0
max,5.0,5.0,5.0,12.0,5.0,1.0


### Data Types

Working Professional or Student          object
Work Pressure                             int64
Study Satisfaction                        Int64
Job Satisfaction                          int64
Degree                                   object
Have you ever had suicidal thoughts ?    object
Work/Study Hours                          int64
Financial Stress                          int64
Depression                                int64
dtype: object

### Missing Values

Working Professional or Student               0
Work Pressure                                 0
Study Satisfaction                       114857
Job Satisfaction                              0
Degree                                        0
Have you ever had suicidal thoughts ?         0
Work/Study Hours                              0
Financial Stress                              0
Depression                                    0
dtype: int64

### Shape

(143256, 9)

### Head

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression
0,Working Professional,5,,2,Other,No,1,2,0
1,Working Professional,4,,3,Other,Yes,7,3,1
2,Student,2,2,5,B.Pharm,Yes,3,1,1
3,Working Professional,5,,1,BBA,Yes,10,1,1
4,Working Professional,1,,1,BBA,Yes,9,4,0
...,...,...,...,...,...,...,...,...,...
95,Working Professional,2,,5,B.Ed,Yes,10,4,0
96,Working Professional,4,,3,B.Ed,Yes,4,2,0
97,Working Professional,1,,3,M.Tech,No,1,2,0
98,Working Professional,2,,2,B.Ed,No,9,2,0


### Tail

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression
143156,Working Professional,3,,3,BSc,No,10,5,0
143157,Working Professional,1,,5,BSc,No,5,4,0
143158,Working Professional,2,,4,M.Pharm,Yes,1,4,0
143159,Working Professional,4,,1,Other,Yes,2,5,1
143160,Student,2,3,5,Class 12,Yes,6,5,1
...,...,...,...,...,...,...,...,...,...
143251,Working Professional,1,,5,BBA,Yes,12,3,0
143252,Working Professional,3,,1,Other,Yes,8,3,1
143253,Working Professional,1,,4,Other,Yes,4,4,0
143254,Working Professional,2,,3,BBA,No,4,5,0


### Sample

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression
75944,Working Professional,5,,5,Other,Yes,1,1,0
90494,Working Professional,1,,5,B.Arch,No,10,2,0
55558,Student,2,2,5,B.Com,Yes,12,1,1
129613,Working Professional,5,,5,MSc,No,11,1,0
52864,Working Professional,4,,2,Class 12,Yes,7,1,1
...,...,...,...,...,...,...,...,...,...
92193,Working Professional,5,,3,Other,No,4,2,0
142549,Student,5,4,1,BCA,No,10,5,0
44123,Working Professional,4,,2,BCA,Yes,6,2,1
123142,Working Professional,1,,4,B.Com,No,10,1,0


### 

In [330]:
columns_to_convert = ['Study Satisfaction'] # a lot of missing values! over 114k! better we do the this calculation seperated

df_train = train_preprocessed.copy()

imputer = IterativeImputer(
    estimator=RandomForestClassifier(n_estimators=50, max_depth=10, n_jobs=-1, random_state=42),
    max_iter=200,
    random_state=42,
    min_value=0,
    imputation_order="ascending" 
)

df_subset = df_train[columns_to_convert]
df_imputed_values = imputer.fit_transform(df_subset)
df_train[columns_to_convert] = np.round(df_imputed_values).astype(int)

print(imputer.n_iter_)
show_general_stats(df_train)

0


### General Stats

Unnamed: 0,Work Pressure,Study Satisfaction,Job Satisfaction,Work/Study Hours,Financial Stress,Depression
count,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0
mean,3.031049,2.989543,2.850561,6.248597,2.988657,0.181647
std,1.420301,0.606115,1.422842,3.852275,1.413677,0.385555
min,1.0,1.0,1.0,0.0,1.0,0.0
25%,2.0,3.0,2.0,3.0,2.0,0.0
50%,3.0,3.0,3.0,6.0,3.0,0.0
75%,4.0,3.0,4.0,10.0,4.0,0.0
max,5.0,5.0,5.0,12.0,5.0,1.0


### Data Types

Working Professional or Student          object
Work Pressure                             int64
Study Satisfaction                        int64
Job Satisfaction                          int64
Degree                                   object
Have you ever had suicidal thoughts ?    object
Work/Study Hours                          int64
Financial Stress                          int64
Depression                                int64
dtype: object

### Missing Values

Working Professional or Student          0
Work Pressure                            0
Study Satisfaction                       0
Job Satisfaction                         0
Degree                                   0
Have you ever had suicidal thoughts ?    0
Work/Study Hours                         0
Financial Stress                         0
Depression                               0
dtype: int64

### Shape

(143256, 9)

### Head

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression
0,Working Professional,5,3,2,Other,No,1,2,0
1,Working Professional,4,3,3,Other,Yes,7,3,1
2,Student,2,2,5,B.Pharm,Yes,3,1,1
3,Working Professional,5,3,1,BBA,Yes,10,1,1
4,Working Professional,1,3,1,BBA,Yes,9,4,0
...,...,...,...,...,...,...,...,...,...
95,Working Professional,2,3,5,B.Ed,Yes,10,4,0
96,Working Professional,4,3,3,B.Ed,Yes,4,2,0
97,Working Professional,1,3,3,M.Tech,No,1,2,0
98,Working Professional,2,3,2,B.Ed,No,9,2,0


### Tail

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression
143156,Working Professional,3,3,3,BSc,No,10,5,0
143157,Working Professional,1,3,5,BSc,No,5,4,0
143158,Working Professional,2,3,4,M.Pharm,Yes,1,4,0
143159,Working Professional,4,3,1,Other,Yes,2,5,1
143160,Student,2,3,5,Class 12,Yes,6,5,1
...,...,...,...,...,...,...,...,...,...
143251,Working Professional,1,3,5,BBA,Yes,12,3,0
143252,Working Professional,3,3,1,Other,Yes,8,3,1
143253,Working Professional,1,3,4,Other,Yes,4,4,0
143254,Working Professional,2,3,3,BBA,No,4,5,0


### Sample

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression
96892,Working Professional,3,3,2,B.Com,No,9,4,0
22785,Working Professional,4,3,3,M.Ed,Yes,8,5,1
105027,Working Professional,3,3,3,Other,Yes,6,1,0
124969,Student,2,3,2,Class 12,No,7,2,0
32248,Student,2,1,2,M.Tech,No,7,2,0
...,...,...,...,...,...,...,...,...,...
44281,Working Professional,5,3,2,BSc,No,9,5,0
44786,Working Professional,3,3,3,B.Pharm,Yes,7,3,0
78348,Working Professional,4,3,4,Class 12,No,11,2,0
76853,Student,2,3,2,BSc,No,0,1,0


### 

# Test Data set

In [331]:
# List of columns (only int) with missing values
columns_to_convert = ['Work Pressure', 'Job Satisfaction',  'Work/Study Hours', 'Financial Stress']#, 'Study Satisfaction']# Had problems here, messed up the data 

test_preprocessed = test_concat_data.copy()

imputer = IterativeImputer(
    estimator=RandomForestClassifier(n_estimators=50, max_depth=10, n_jobs=-1, random_state=42),
    max_iter=200,
    random_state=42,
    min_value=0,
    imputation_order="ascending" 
)
df_subset = test_preprocessed[columns_to_convert]
df_imputed_values = imputer.fit_transform(df_subset)
test_preprocessed[columns_to_convert] = np.round(df_imputed_values).astype(int)

print(imputer.n_iter_)
show_general_stats(test_preprocessed)

4


### General Stats

Unnamed: 0,Work Pressure,Study Satisfaction,Job Satisfaction,Work/Study Hours,Financial Stress
count,93800.0,18767.0,93800.0,93800.0,93800.0
mean,2.957495,2.939522,2.854606,6.247335,2.978763
std,1.3859,1.374242,1.36526,3.858191,1.414604
min,1.0,1.0,1.0,0.0,1.0
25%,2.0,2.0,2.0,3.0,2.0
50%,3.0,3.0,3.0,6.0,3.0
75%,4.0,4.0,4.0,10.0,4.0
max,5.0,5.0,5.0,12.0,5.0


### Data Types

Working Professional or Student          object
Work Pressure                             int64
Study Satisfaction                        Int64
Job Satisfaction                          int64
Degree                                   object
Have you ever had suicidal thoughts ?    object
Work/Study Hours                          int64
Financial Stress                          int64
dtype: object

### Missing Values

Working Professional or Student              0
Work Pressure                                0
Study Satisfaction                       75033
Job Satisfaction                             0
Degree                                      86
Have you ever had suicidal thoughts ?        0
Work/Study Hours                             0
Financial Stress                             0
dtype: int64

### Shape

(93800, 8)

### Head

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress
0,Working Professional,2,,5,LLB,No,9,3
1,Working Professional,2,,4,B.Ed,No,6,4
2,Working Professional,4,,1,B.Arch,Yes,12,4
3,Student,1,1,4,BSc,Yes,10,4
4,Working Professional,5,,5,BCA,Yes,3,4
...,...,...,...,...,...,...,...,...
95,Working Professional,5,,5,MD,No,10,1
96,Working Professional,4,,2,B.Pharm,No,8,4
97,Student,1,1,5,PhD,Yes,8,5
98,Student,5,4,2,MBBS,Yes,0,5


### Tail

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress
93700,Working Professional,1,,5,BHM,No,3,4
93701,Working Professional,1,,3,ME,No,3,3
93702,Working Professional,2,,1,MSc,No,6,1
93703,Student,3,5,3,Class 12,Yes,6,3
93704,Working Professional,5,,5,B.Ed,Yes,0,1
...,...,...,...,...,...,...,...,...
93795,Working Professional,3,,5,BSc,Yes,2,2
93796,Working Professional,5,,1,BE,Yes,11,3
93797,Student,2,4,2,B.Tech,No,7,1
93798,Working Professional,4,,2,BA,Yes,7,5


### Sample

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress
82130,Working Professional,5,,1,Class 12,Yes,6,3
26117,Working Professional,4,,1,B.Ed,Yes,9,1
39873,Student,2,4,2,B.Com,No,10,2
51373,Working Professional,5,,1,M.Ed,No,10,4
60654,Working Professional,1,,3,M.Ed,Yes,11,1
...,...,...,...,...,...,...,...,...
2961,Student,2,2,5,Class 12,No,12,5
53000,Working Professional,2,,4,BHM,Yes,7,4
26932,Working Professional,5,,1,B.Arch,No,0,2
91316,Working Professional,5,,2,MBBS,No,11,5


### 

In [332]:
columns_to_convert = ['Study Satisfaction'] # a lot of missing values! over 114k! better we do the this calculation seperated

df_test = test_preprocessed.copy()

imputer = IterativeImputer(
    estimator=RandomForestClassifier(n_estimators=50, max_depth=10, n_jobs=-1, random_state=42),
    max_iter=200,
    random_state=42,
    min_value=0,
    imputation_order="ascending" 
)

df_subset = df_test[columns_to_convert]
df_imputed_values = imputer.fit_transform(df_subset)
df_test[columns_to_convert] = np.round(df_imputed_values).astype(int)

print(imputer.n_iter_)
show_general_stats(df_test)

0


### General Stats

Unnamed: 0,Work Pressure,Study Satisfaction,Job Satisfaction,Work/Study Hours,Financial Stress
count,93800.0,93800.0,93800.0,93800.0,93800.0
mean,2.957495,2.9879,2.854606,6.247335,2.978763
std,1.3859,0.615157,1.36526,3.858191,1.414604
min,1.0,1.0,1.0,0.0,1.0
25%,2.0,3.0,2.0,3.0,2.0
50%,3.0,3.0,3.0,6.0,3.0
75%,4.0,3.0,4.0,10.0,4.0
max,5.0,5.0,5.0,12.0,5.0


### Data Types

Working Professional or Student          object
Work Pressure                             int64
Study Satisfaction                        int64
Job Satisfaction                          int64
Degree                                   object
Have you ever had suicidal thoughts ?    object
Work/Study Hours                          int64
Financial Stress                          int64
dtype: object

### Missing Values

Working Professional or Student           0
Work Pressure                             0
Study Satisfaction                        0
Job Satisfaction                          0
Degree                                   86
Have you ever had suicidal thoughts ?     0
Work/Study Hours                          0
Financial Stress                          0
dtype: int64

### Shape

(93800, 8)

### Head

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress
0,Working Professional,2,3,5,LLB,No,9,3
1,Working Professional,2,3,4,B.Ed,No,6,4
2,Working Professional,4,3,1,B.Arch,Yes,12,4
3,Student,1,1,4,BSc,Yes,10,4
4,Working Professional,5,3,5,BCA,Yes,3,4
...,...,...,...,...,...,...,...,...
95,Working Professional,5,3,5,MD,No,10,1
96,Working Professional,4,3,2,B.Pharm,No,8,4
97,Student,1,1,5,PhD,Yes,8,5
98,Student,5,4,2,MBBS,Yes,0,5


### Tail

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress
93700,Working Professional,1,3,5,BHM,No,3,4
93701,Working Professional,1,3,3,ME,No,3,3
93702,Working Professional,2,3,1,MSc,No,6,1
93703,Student,3,5,3,Class 12,Yes,6,3
93704,Working Professional,5,3,5,B.Ed,Yes,0,1
...,...,...,...,...,...,...,...,...
93795,Working Professional,3,3,5,BSc,Yes,2,2
93796,Working Professional,5,3,1,BE,Yes,11,3
93797,Student,2,4,2,B.Tech,No,7,1
93798,Working Professional,4,3,2,BA,Yes,7,5


### Sample

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress
80520,Working Professional,4,3,5,BBA,Yes,7,3
36105,Student,2,3,2,BCA,No,5,2
89390,Working Professional,5,3,3,BCA,No,0,4
2214,Working Professional,5,3,3,M.Ed,Yes,7,3
28323,Working Professional,3,3,1,Class 12,Yes,8,4
...,...,...,...,...,...,...,...,...
24101,Working Professional,5,3,2,BHM,Yes,10,4
85864,Working Professional,5,3,4,B.Arch,No,10,1
25146,Student,5,2,1,BCA,Yes,10,5
38614,Working Professional,2,3,1,B.Arch,Yes,10,5


### 

# Scale Numerical features
not needed, there all between 1-5 or 1-10

# Save preprocessed dataframe

In [333]:
# if KAGGLE_ENV:
#     train.to_csv('/kaggle/working/s4-e11-train-concat-imputed.csv', index=False)
# else:
#     train.to_csv( '../kaggle/working/' + '/s4-e11-train-concat-imputed.csv', index=False)

## Fix categorical missing data
There are not any missing data for categorical features.

# Scale Categorical Features
There are different categories at this dataframe.
Working Professional or Student, it is just 1 or 0, so we use OneHotEncoder
Have you ever had suicidal thoughts?, yes/no, already binar
Degree, should use a OrdinalEncoder

In [334]:
df_train.head(100)

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression
0,Working Professional,5,3,2,Other,No,1,2,0
1,Working Professional,4,3,3,Other,Yes,7,3,1
2,Student,2,2,5,B.Pharm,Yes,3,1,1
3,Working Professional,5,3,1,BBA,Yes,10,1,1
4,Working Professional,1,3,1,BBA,Yes,9,4,0
...,...,...,...,...,...,...,...,...,...
95,Working Professional,2,3,5,B.Ed,Yes,10,4,0
96,Working Professional,4,3,3,B.Ed,Yes,4,2,0
97,Working Professional,1,3,3,M.Tech,No,1,2,0
98,Working Professional,2,3,2,B.Ed,No,9,2,0


In [335]:
df_test.head(100)

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress
0,Working Professional,2,3,5,LLB,No,9,3
1,Working Professional,2,3,4,B.Ed,No,6,4
2,Working Professional,4,3,1,B.Arch,Yes,12,4
3,Student,1,1,4,BSc,Yes,10,4
4,Working Professional,5,3,5,BCA,Yes,3,4
...,...,...,...,...,...,...,...,...
95,Working Professional,5,3,5,MD,No,10,1
96,Working Professional,4,3,2,B.Pharm,No,8,4
97,Student,1,1,5,PhD,Yes,8,5
98,Student,5,4,2,MBBS,Yes,0,5


In [336]:
# Check the unique values in the column 'Degree'
num_unique_degrees = df_train["Degree"].nunique()
unique_degrees = df_train["Degree"].unique()

print(f"Number of unique categories in the Degree column': {num_unique_degrees}")
print(f"Unique categories in the 'Degree' column': {unique_degrees}")

Number of unique categories in the Degree column': 16
Unique categories in the 'Degree' column': ['Other' 'B.Pharm' 'BBA' 'MCA' 'BSc' 'B.Arch' 'BCA' 'B.Ed' 'B.Com'
 'Class 12' 'M.Tech' 'M.Ed' 'MSc' 'B.Tech' 'LLM' 'M.Pharm']


In [337]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

df = df_train.copy()

# Binary Encoding for "Have you ever had suicidal thoughts ?"
df['Have you ever had suicidal thoughts ?'] = df['Have you ever had suicidal thoughts ?'].map({'Yes': 1, 'No': 0})

# One-Hot - Encoding for "Working Professional or Student"
encoder = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
encoded_feature = encoder.fit_transform(df[['Working Professional or Student']])
df_encoded = pd.DataFrame(encoded_feature, columns=encoder.get_feature_names_out(['Working Professional or Student']))

df = df.drop(columns=['Working Professional or Student'])
df = pd.concat([df, df_encoded], axis=1)

# One-Hot-Encoding for "Degree"
encoder_degree = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
degree_encoded = encoder_degree.fit_transform(df2[["Degree"]])
degree_columns = encoder_degree.get_feature_names_out(["Degree"])
df_degree = pd.DataFrame(degree_encoded, columns=degree_columns, index=df2.index)

# Final DataFrame
train_final = df.drop(columns=["Degree"], errors='ignore').join(df_degree)

# Show stats
print("OneHotEncoded Degree Data")
show_general_stats(train_final)

OneHotEncoded Degree Data


### General Stats

Unnamed: 0,Work Pressure,Study Satisfaction,Job Satisfaction,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression,Working Professional or Student_Student,Working Professional or Student_Working Professional,Degree_B.Arch,...,Degree_BCA,Degree_BSc,Degree_Class 12,Degree_LLM,Degree_M.Ed,Degree_M.Pharm,Degree_M.Tech,Degree_MCA,Degree_MSc,Degree_Other
count,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0,...,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0
mean,3.031049,2.989543,2.850561,0.494297,6.248597,2.988657,0.181647,0.198267,0.801733,0.061645,...,0.04078,0.035747,0.104736,0.033025,0.040131,0.032264,0.031831,0.03729,0.034721,0.299373
std,1.420301,0.606115,1.422842,0.499969,3.852275,1.413677,0.385555,0.398696,0.398696,0.24051,...,0.197781,0.18566,0.306213,0.178702,0.196267,0.176701,0.175551,0.189472,0.183073,0.457985
min,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,3.0,2.0,0.0,3.0,2.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,3.0,3.0,3.0,0.0,6.0,3.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,4.0,3.0,4.0,1.0,10.0,4.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
max,5.0,5.0,5.0,1.0,12.0,5.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


### Data Types

Work Pressure                                             int64
Study Satisfaction                                        int64
Job Satisfaction                                          int64
Have you ever had suicidal thoughts ?                     int64
Work/Study Hours                                          int64
Financial Stress                                          int64
Depression                                                int64
Working Professional or Student_Student                 float64
Working Professional or Student_Working Professional    float64
Degree_B.Arch                                           float64
Degree_B.Com                                            float64
Degree_B.Ed                                             float64
Degree_B.Pharm                                          float64
Degree_B.Tech                                           float64
Degree_BBA                                              float64
Degree_BCA                              

### Missing Values

Work Pressure                                           0
Study Satisfaction                                      0
Job Satisfaction                                        0
Have you ever had suicidal thoughts ?                   0
Work/Study Hours                                        0
Financial Stress                                        0
Depression                                              0
Working Professional or Student_Student                 0
Working Professional or Student_Working Professional    0
Degree_B.Arch                                           0
Degree_B.Com                                            0
Degree_B.Ed                                             0
Degree_B.Pharm                                          0
Degree_B.Tech                                           0
Degree_BBA                                              0
Degree_BCA                                              0
Degree_BSc                                              0
Degree_Class 1

### Shape

(143256, 25)

### Head

Unnamed: 0,Work Pressure,Study Satisfaction,Job Satisfaction,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression,Working Professional or Student_Student,Working Professional or Student_Working Professional,Degree_B.Arch,...,Degree_BCA,Degree_BSc,Degree_Class 12,Degree_LLM,Degree_M.Ed,Degree_M.Pharm,Degree_M.Tech,Degree_MCA,Degree_MSc,Degree_Other
0,5,3,2,0,1,2,0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,4,3,3,1,7,3,1,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,2,2,5,1,3,1,1,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,5,3,1,1,10,1,1,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1,3,1,1,9,4,0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2,3,5,1,10,4,0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
96,4,3,3,1,4,2,0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
97,1,3,3,0,1,2,0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
98,2,3,2,0,9,2,0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Tail

Unnamed: 0,Work Pressure,Study Satisfaction,Job Satisfaction,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression,Working Professional or Student_Student,Working Professional or Student_Working Professional,Degree_B.Arch,...,Degree_BCA,Degree_BSc,Degree_Class 12,Degree_LLM,Degree_M.Ed,Degree_M.Pharm,Degree_M.Tech,Degree_MCA,Degree_MSc,Degree_Other
143156,3,3,3,0,10,5,0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
143157,1,3,5,0,5,4,0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
143158,2,3,4,1,1,4,0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
143159,4,3,1,1,2,5,1,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
143160,2,3,5,1,6,5,1,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
143251,1,3,5,1,12,3,0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
143252,3,3,1,1,8,3,1,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
143253,1,3,4,1,4,4,0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
143254,2,3,3,0,4,5,0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Sample

Unnamed: 0,Work Pressure,Study Satisfaction,Job Satisfaction,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression,Working Professional or Student_Student,Working Professional or Student_Working Professional,Degree_B.Arch,...,Degree_BCA,Degree_BSc,Degree_Class 12,Degree_LLM,Degree_M.Ed,Degree_M.Pharm,Degree_M.Tech,Degree_MCA,Degree_MSc,Degree_Other
103616,5,2,1,1,11,2,1,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
97743,5,3,2,1,9,5,1,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
43184,2,5,3,1,1,3,0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6129,2,3,5,0,0,2,0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13617,4,3,1,1,5,4,0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4023,3,3,5,0,3,1,0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
125755,5,3,1,0,10,2,0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
111226,4,3,1,1,12,4,0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
5118,1,3,5,0,7,3,0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


### 

In [338]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

df = df_test.copy()

# Binary Encoding for "Have you ever had suicidal thoughts ?"
df['Have you ever had suicidal thoughts ?'] = df['Have you ever had suicidal thoughts ?'].map({'Yes': 1, 'No': 0})

# One-Hot - Encoding for "Working Professional or Student"
encoder = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
encoded_feature = encoder.fit_transform(df[['Working Professional or Student']])
df_encoded = pd.DataFrame(encoded_feature, columns=encoder.get_feature_names_out(['Working Professional or Student']))

df = df.drop(columns=['Working Professional or Student'])
df = pd.concat([df, df_encoded], axis=1)

# One-Hot-Encoding for "Degree"
encoder_degree = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
degree_encoded = encoder_degree.fit_transform(df2[["Degree"]])
degree_columns = encoder_degree.get_feature_names_out(["Degree"])
df_degree = pd.DataFrame(degree_encoded, columns=degree_columns, index=df2.index)

# Final DataFrame
test_final = df.drop(columns=["Degree"], errors='ignore').join(df_degree)

# Show stats
print("OneHotEncoded Degree Data")
show_general_stats(test_final)

OneHotEncoded Degree Data


### General Stats

Unnamed: 0,Work Pressure,Study Satisfaction,Job Satisfaction,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Working Professional or Student_Student,Working Professional or Student_Working Professional,Degree_B.Arch,Degree_B.Com,...,Degree_BCA,Degree_BSc,Degree_Class 12,Degree_LLM,Degree_M.Ed,Degree_M.Pharm,Degree_M.Tech,Degree_MCA,Degree_MSc,Degree_Other
count,93800.0,93800.0,93800.0,93800.0,93800.0,93800.0,93800.0,93800.0,93800.0,93800.0,...,93800.0,93800.0,93800.0,93800.0,93800.0,93800.0,93800.0,93800.0,93800.0,93800.0
mean,2.957495,2.9879,2.854606,0.494936,6.247335,2.978763,0.200128,0.799872,0.062367,0.058188,...,0.040789,0.035906,0.104552,0.032793,0.040597,0.0321,0.03193,0.036866,0.034414,0.296908
std,1.3859,0.615157,1.36526,0.499977,3.858191,1.414604,0.400098,0.400098,0.241822,0.234099,...,0.197802,0.186057,0.305977,0.178096,0.197356,0.176267,0.175814,0.188433,0.18229,0.456898
min,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,3.0,2.0,0.0,3.0,2.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,3.0,3.0,3.0,0.0,6.0,3.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,4.0,3.0,4.0,1.0,10.0,4.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
max,5.0,5.0,5.0,1.0,12.0,5.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


### Data Types

Work Pressure                                             int64
Study Satisfaction                                        int64
Job Satisfaction                                          int64
Have you ever had suicidal thoughts ?                     int64
Work/Study Hours                                          int64
Financial Stress                                          int64
Working Professional or Student_Student                 float64
Working Professional or Student_Working Professional    float64
Degree_B.Arch                                           float64
Degree_B.Com                                            float64
Degree_B.Ed                                             float64
Degree_B.Pharm                                          float64
Degree_B.Tech                                           float64
Degree_BBA                                              float64
Degree_BCA                                              float64
Degree_BSc                              

### Missing Values

Work Pressure                                           0
Study Satisfaction                                      0
Job Satisfaction                                        0
Have you ever had suicidal thoughts ?                   0
Work/Study Hours                                        0
Financial Stress                                        0
Working Professional or Student_Student                 0
Working Professional or Student_Working Professional    0
Degree_B.Arch                                           0
Degree_B.Com                                            0
Degree_B.Ed                                             0
Degree_B.Pharm                                          0
Degree_B.Tech                                           0
Degree_BBA                                              0
Degree_BCA                                              0
Degree_BSc                                              0
Degree_Class 12                                         0
Degree_LLM    

### Shape

(93800, 24)

### Head

Unnamed: 0,Work Pressure,Study Satisfaction,Job Satisfaction,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Working Professional or Student_Student,Working Professional or Student_Working Professional,Degree_B.Arch,Degree_B.Com,...,Degree_BCA,Degree_BSc,Degree_Class 12,Degree_LLM,Degree_M.Ed,Degree_M.Pharm,Degree_M.Tech,Degree_MCA,Degree_MSc,Degree_Other
0,2,3,5,0,9,3,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,2,3,4,0,6,4,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,4,3,1,1,12,4,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1,1,4,1,10,4,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,3,5,1,3,4,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,5,3,5,0,10,1,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
96,4,3,2,0,8,4,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
97,1,1,5,1,8,5,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
98,5,4,2,1,0,5,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Tail

Unnamed: 0,Work Pressure,Study Satisfaction,Job Satisfaction,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Working Professional or Student_Student,Working Professional or Student_Working Professional,Degree_B.Arch,Degree_B.Com,...,Degree_BCA,Degree_BSc,Degree_Class 12,Degree_LLM,Degree_M.Ed,Degree_M.Pharm,Degree_M.Tech,Degree_MCA,Degree_MSc,Degree_Other
93700,1,3,5,0,3,4,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
93701,1,3,3,0,3,3,0.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
93702,2,3,1,0,6,1,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
93703,3,5,3,1,6,3,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
93704,5,3,5,1,0,1,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93795,3,3,5,1,2,2,0.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
93796,5,3,1,1,11,3,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
93797,2,4,2,0,7,1,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
93798,4,3,2,1,7,5,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


### Sample

Unnamed: 0,Work Pressure,Study Satisfaction,Job Satisfaction,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Working Professional or Student_Student,Working Professional or Student_Working Professional,Degree_B.Arch,Degree_B.Com,...,Degree_BCA,Degree_BSc,Degree_Class 12,Degree_LLM,Degree_M.Ed,Degree_M.Pharm,Degree_M.Tech,Degree_MCA,Degree_MSc,Degree_Other
51725,3,3,2,0,5,5,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
62929,4,3,3,1,10,5,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
8695,3,3,2,0,4,4,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1094,1,2,5,0,0,1,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
91737,4,3,3,1,12,5,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78536,1,3,1,1,1,1,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
68928,4,3,5,0,9,2,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
6634,5,3,5,1,3,4,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
51911,2,2,2,1,10,2,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


### 

# Save preprocessed final dataframe

In [339]:
train_final.head(100)

Unnamed: 0,Work Pressure,Study Satisfaction,Job Satisfaction,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression,Working Professional or Student_Student,Working Professional or Student_Working Professional,Degree_B.Arch,...,Degree_BCA,Degree_BSc,Degree_Class 12,Degree_LLM,Degree_M.Ed,Degree_M.Pharm,Degree_M.Tech,Degree_MCA,Degree_MSc,Degree_Other
0,5,3,2,0,1,2,0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,4,3,3,1,7,3,1,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,2,2,5,1,3,1,1,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,5,3,1,1,10,1,1,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1,3,1,1,9,4,0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2,3,5,1,10,4,0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
96,4,3,3,1,4,2,0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
97,1,3,3,0,1,2,0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
98,2,3,2,0,9,2,0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [340]:
test_final.head(100)

Unnamed: 0,Work Pressure,Study Satisfaction,Job Satisfaction,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Working Professional or Student_Student,Working Professional or Student_Working Professional,Degree_B.Arch,Degree_B.Com,...,Degree_BCA,Degree_BSc,Degree_Class 12,Degree_LLM,Degree_M.Ed,Degree_M.Pharm,Degree_M.Tech,Degree_MCA,Degree_MSc,Degree_Other
0,2,3,5,0,9,3,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,2,3,4,0,6,4,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,4,3,1,1,12,4,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1,1,4,1,10,4,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,3,5,1,3,4,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,5,3,5,0,10,1,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
96,4,3,2,0,8,4,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
97,1,1,5,1,8,5,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
98,5,4,2,1,0,5,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [341]:
if KAGGLE_ENV:
    train_final.to_csv(data_path + '/s4-e11-train-concat-final/s4-e11-train-concat-final.csv', index=False)
else:
    train_final.to_csv(data_path +  '/s4-e11-train-concat-final/s4-e11-train-concat-final.csv', index=False)

In [342]:
if KAGGLE_ENV:
    test_final.to_csv(data_path +'/s4-e11-test-concat-final/s4-e11-test-concat-final.csv', index=False)
else:
    test_final.to_csv(data_path +'/s4-e11-test-concat-final/s4-e11-test-concat-final.csv', index=False)

# Feature Selection (optional)
Delete attributes there aren't providing any information (correlation matrix).

# Create new Features (optional)
- Discretizing Continuous Features
- Decomposing Features and Date/Time Variables
- Aggregate attributes to better new attributes