In [131]:
from IPython.display import display, Markdown
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer, SimpleImputer, KNNImputer
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import StandardScaler , OneHotEncoder
from sklearn.tree import DecisionTreeRegressor
import math
import matplotlib.pyplot as plt
import numpy as np 
import seaborn as sns
import pandas as pd 
import scipy.stats as ss
import seaborn as sns
import os
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# Decide between local or kaggle cloud storage         
KAGGLE_ENV = 'kaggle' in os.listdir('/')
data_path = '/kaggle/input' if KAGGLE_ENV else '../kaggle/input'

# This is a good idea to work only locally. But If you wanna ran your NB also at kaggle... this is not working.
# # Pull the dataset from kaggle, it is concat dataset train + original dataset
# dataset_name = 'dantheshark/s4-e11-train-concat'
# if KAGGLE_ENV:
#     kaggle.api.dataset_download_files(dataset_name, path="../kaggle/input/", unzip=True)


for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
    
for dirname, _, filenames in os.walk(data_path):
    for filename in filenames:
        print(os.path.join(dirname, filename)) 

../kaggle/input/submission.csv
../kaggle/input/depression-surveydataset-for-analysis/final_depression_dataset_1.csv
../kaggle/input/playground-series-s4e11/sample_submission.csv
../kaggle/input/playground-series-s4e11/test.csv
../kaggle/input/playground-series-s4e11/train.csv
../kaggle/input/s4-e11-train-concat/s4-e11-train-concat.csv


# Load Data

In [132]:
# Load the data
train_original = pd.read_csv(data_path + '/playground-series-s4e11/train.csv')
train_original = pd.read_csv(data_path + '/playground-series-s4e11/test.csv')
sample_submission = pd.read_csv(data_path + '/playground-series-s4e11/sample_submission.csv')
original_data = pd.read_csv(data_path + '/depression-surveydataset-for-analysis/final_depression_dataset_1.csv')
train_concat_data = pd.read_csv(data_path + '/../working/s4-e11-train-concat1.csv')

# Preprocessing

## Convertion from Features

In [133]:
# Convert Float to Int, not needed float
columns_to_convert = ['Work Pressure', 'Job Satisfaction', 'Study Satisfaction', 'Work/Study Hours', 'Financial Stress']

for col in columns_to_convert:
    train_concat_data[col] = pd.to_numeric(train_concat_data[col], errors='coerce').astype('Int64')

In [134]:
def get_categorical_numerical_features(df):
    # Get Numeric & Categorical Features
    numeric_features = df.select_dtypes(include=[np.number]).columns.tolist()
    categorical_features = df.select_dtypes(include=['object', 'category']).columns.tolist()
    return numeric_features, categorical_features
numeric_features, categorical_features = get_categorical_numerical_features(train_concat_data)

## Outliers

In [135]:
# plt.figure(figsize=(12, 6))
# sns.boxplot(data=train_concat_data[numeric_features])
# plt.xticks(rotation=90)
# plt.show()

## Show general stats 

In [136]:
def show_general_stats(df):
    display(Markdown('### General Stats'))
    display(df.describe())
    display(Markdown('### Data Types'))
    display(df.dtypes)
    display(Markdown('### Missing Values'))
    display(df.isnull().sum())
    display(Markdown('### Shape'))
    display(df.shape)
    display(Markdown('### Head'))
    display(df.head(100))
    display(Markdown('### Tail'))
    display(df.tail(100))
    display(Markdown('### Sample'))
    display(df.sample(100))
    display(Markdown('### '))

In [137]:
show_general_stats(train_concat_data)

### General Stats

Unnamed: 0,Work Pressure,Study Satisfaction,Job Satisfaction,Work/Study Hours,Financial Stress,Depression
count,114836.0,28399.0,114844.0,143256.0,143252.0,143256.0
mean,2.999408,2.947252,2.975131,6.248597,2.988621,0.181647
std,1.405975,1.360518,1.416124,3.852275,1.413664,0.385555
min,1.0,1.0,1.0,0.0,1.0,0.0
25%,2.0,2.0,2.0,3.0,2.0,0.0
50%,3.0,3.0,3.0,6.0,3.0,0.0
75%,4.0,4.0,4.0,10.0,4.0,0.0
max,5.0,5.0,5.0,12.0,5.0,1.0


### Data Types

Working Professional or Student          object
Work Pressure                             Int64
Study Satisfaction                        Int64
Job Satisfaction                          Int64
Degree                                   object
Have you ever had suicidal thoughts ?    object
Work/Study Hours                          Int64
Financial Stress                          Int64
Depression                                int64
dtype: object

### Missing Values

Working Professional or Student               0
Work Pressure                             28420
Study Satisfaction                       114857
Job Satisfaction                          28412
Degree                                        0
Have you ever had suicidal thoughts ?         0
Work/Study Hours                              0
Financial Stress                              4
Depression                                    0
dtype: int64

### Shape

(143256, 9)

### Head

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression
0,Working Professional,5,,2,Other,No,1,2,0
1,Working Professional,4,,3,Other,Yes,7,3,1
2,Student,,2,,B.Pharm,Yes,3,1,1
3,Working Professional,5,,1,BBA,Yes,10,1,1
4,Working Professional,1,,1,BBA,Yes,9,4,0
...,...,...,...,...,...,...,...,...,...
95,Working Professional,2,,5,B.Ed,Yes,10,4,0
96,Working Professional,4,,3,B.Ed,Yes,4,2,0
97,Working Professional,1,,3,M.Tech,No,1,2,0
98,Working Professional,2,,2,B.Ed,No,9,2,0


### Tail

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression
143156,Working Professional,3,,3,BSc,No,10,5,0
143157,Working Professional,1,,5,BSc,No,5,4,0
143158,Working Professional,2,,4,M.Pharm,Yes,1,4,0
143159,Working Professional,4,,1,Other,Yes,2,5,1
143160,Student,,3,,Class 12,Yes,6,5,1
...,...,...,...,...,...,...,...,...,...
143251,Working Professional,1,,5,BBA,Yes,12,3,0
143252,Working Professional,3,,1,Other,Yes,8,3,1
143253,Working Professional,1,,4,Other,Yes,4,4,0
143254,Working Professional,2,,3,BBA,No,4,5,0


### Sample

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression
64806,Working Professional,4,,1,Other,No,5,1,0
78920,Working Professional,5,,5,B.Arch,No,10,3,0
34562,Working Professional,5,,4,M.Pharm,Yes,0,2,0
117452,Working Professional,4,,4,Other,Yes,12,5,1
3516,Working Professional,4,,5,B.Arch,Yes,3,5,0
...,...,...,...,...,...,...,...,...,...
20016,Working Professional,4,,4,Class 12,Yes,2,2,0
82959,Working Professional,3,,4,B.Com,No,1,1,0
8685,Working Professional,5,,3,M.Ed,No,12,4,0
68409,Working Professional,2,,1,B.Ed,No,2,2,0


### 

## MICE (Multiple Imputation by Chained Equations)
**We don't use MICE**, at the train data there is not enough correlation between the features (none of them > +/- 0.7!)

In [138]:
# def cramers_v(x, y):
#     confusion_matrix = pd.crosstab(x, y)
#     chi2 = ss.chi2_contingency(confusion_matrix)[0]
#     n = confusion_matrix.sum().sum()
#     phi2 = chi2 / n
#     r, k = confusion_matrix.shape
#     phi2corr = max(0, phi2 - ((k-1)*(r-1))/(n-1))
#     rcorr = r - ((r-1)**2)/(n-1)
#     kcorr = k - ((k-1)**2)/(n-1)
#     return np.sqrt(phi2corr / min((kcorr-1), (rcorr-1)))

# cramers_v(train_concat_data['FEATURE'], train_concat_data['Depression'])

## KNN Imputer (k-Nearest Neighbors)
**Takes way too long for this dataset**

In [139]:
# imputer = KNNImputer(n_neighbors=3)  # k=5 nearest neighbors
# print("Start KNN-Imputation...")
# train_original[numeric_features] = imputer.fit_transform(train_original[numeric_features])# only for numerical data
# print(train_original)

## Missing Data Numerical (int), IterativeImputer (estimator=RandomForestClassifier) 
Tyred also ExtraTreesRegressor, but does not converged.

In [140]:
# List of columns (only int) with missing values
columns_to_convert = ['Work Pressure', 'Job Satisfaction',  'Work/Study Hours', 'Financial Stress']#, 'Study Satisfaction']# Had problems here, messed up the data 

train_preprocessed = train_concat_data.copy()

imputer = IterativeImputer(
    estimator=RandomForestClassifier(n_estimators=50, max_depth=10, n_jobs=-1, random_state=42),
    max_iter=200,
    random_state=42,
    min_value=0,
    imputation_order="ascending" 
)
df_subset = train_preprocessed[columns_to_convert]
df_imputed_values = imputer.fit_transform(df_subset)
train_preprocessed[columns_to_convert] = np.round(df_imputed_values).astype(int)

print(imputer.n_iter_)
show_general_stats(train_preprocessed)

4


### General Stats

Unnamed: 0,Work Pressure,Study Satisfaction,Job Satisfaction,Work/Study Hours,Financial Stress,Depression
count,143256.0,28399.0,143256.0,143256.0,143256.0,143256.0
mean,3.031049,2.947252,2.850561,6.248597,2.988657,0.181647
std,1.420301,1.360518,1.422842,3.852275,1.413677,0.385555
min,1.0,1.0,1.0,0.0,1.0,0.0
25%,2.0,2.0,2.0,3.0,2.0,0.0
50%,3.0,3.0,3.0,6.0,3.0,0.0
75%,4.0,4.0,4.0,10.0,4.0,0.0
max,5.0,5.0,5.0,12.0,5.0,1.0


### Data Types

Working Professional or Student          object
Work Pressure                             int64
Study Satisfaction                        Int64
Job Satisfaction                          int64
Degree                                   object
Have you ever had suicidal thoughts ?    object
Work/Study Hours                          int64
Financial Stress                          int64
Depression                                int64
dtype: object

### Missing Values

Working Professional or Student               0
Work Pressure                                 0
Study Satisfaction                       114857
Job Satisfaction                              0
Degree                                        0
Have you ever had suicidal thoughts ?         0
Work/Study Hours                              0
Financial Stress                              0
Depression                                    0
dtype: int64

### Shape

(143256, 9)

### Head

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression
0,Working Professional,5,,2,Other,No,1,2,0
1,Working Professional,4,,3,Other,Yes,7,3,1
2,Student,2,2,5,B.Pharm,Yes,3,1,1
3,Working Professional,5,,1,BBA,Yes,10,1,1
4,Working Professional,1,,1,BBA,Yes,9,4,0
...,...,...,...,...,...,...,...,...,...
95,Working Professional,2,,5,B.Ed,Yes,10,4,0
96,Working Professional,4,,3,B.Ed,Yes,4,2,0
97,Working Professional,1,,3,M.Tech,No,1,2,0
98,Working Professional,2,,2,B.Ed,No,9,2,0


### Tail

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression
143156,Working Professional,3,,3,BSc,No,10,5,0
143157,Working Professional,1,,5,BSc,No,5,4,0
143158,Working Professional,2,,4,M.Pharm,Yes,1,4,0
143159,Working Professional,4,,1,Other,Yes,2,5,1
143160,Student,2,3,5,Class 12,Yes,6,5,1
...,...,...,...,...,...,...,...,...,...
143251,Working Professional,1,,5,BBA,Yes,12,3,0
143252,Working Professional,3,,1,Other,Yes,8,3,1
143253,Working Professional,1,,4,Other,Yes,4,4,0
143254,Working Professional,2,,3,BBA,No,4,5,0


### Sample

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression
60930,Working Professional,3,,2,B.Com,No,4,2,0
13517,Working Professional,1,,3,Other,Yes,1,2,0
112457,Working Professional,1,,5,M.Ed,No,5,1,0
25226,Student,3,2,3,Other,Yes,0,3,0
14681,Working Professional,3,,4,Other,No,9,1,0
...,...,...,...,...,...,...,...,...,...
77347,Student,5,2,1,B.Arch,Yes,10,5,1
139073,Working Professional,2,,4,Other,No,2,4,0
98821,Working Professional,1,,5,B.Tech,No,1,3,0
114522,Working Professional,4,,4,B.Com,Yes,0,5,1


### 

In [141]:
columns_to_convert = ['Study Satisfaction'] # a lot of missing values! over 114k! better we do the this calculation seperated

df2 = train_preprocessed.copy()

imputer = IterativeImputer(
    estimator=RandomForestClassifier(n_estimators=50, max_depth=10, n_jobs=-1, random_state=42),
    max_iter=200,
    random_state=42,
    min_value=0,
    imputation_order="ascending" 
)

df_subset = df2[columns_to_convert]
df_imputed_values = imputer.fit_transform(df_subset)
df2[columns_to_convert] = np.round(df_imputed_values).astype(int)

print(imputer.n_iter_)
show_general_stats(df2)

0


### General Stats

Unnamed: 0,Work Pressure,Study Satisfaction,Job Satisfaction,Work/Study Hours,Financial Stress,Depression
count,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0
mean,3.031049,2.989543,2.850561,6.248597,2.988657,0.181647
std,1.420301,0.606115,1.422842,3.852275,1.413677,0.385555
min,1.0,1.0,1.0,0.0,1.0,0.0
25%,2.0,3.0,2.0,3.0,2.0,0.0
50%,3.0,3.0,3.0,6.0,3.0,0.0
75%,4.0,3.0,4.0,10.0,4.0,0.0
max,5.0,5.0,5.0,12.0,5.0,1.0


### Data Types

Working Professional or Student          object
Work Pressure                             int64
Study Satisfaction                        int64
Job Satisfaction                          int64
Degree                                   object
Have you ever had suicidal thoughts ?    object
Work/Study Hours                          int64
Financial Stress                          int64
Depression                                int64
dtype: object

### Missing Values

Working Professional or Student          0
Work Pressure                            0
Study Satisfaction                       0
Job Satisfaction                         0
Degree                                   0
Have you ever had suicidal thoughts ?    0
Work/Study Hours                         0
Financial Stress                         0
Depression                               0
dtype: int64

### Shape

(143256, 9)

### Head

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression
0,Working Professional,5,3,2,Other,No,1,2,0
1,Working Professional,4,3,3,Other,Yes,7,3,1
2,Student,2,2,5,B.Pharm,Yes,3,1,1
3,Working Professional,5,3,1,BBA,Yes,10,1,1
4,Working Professional,1,3,1,BBA,Yes,9,4,0
...,...,...,...,...,...,...,...,...,...
95,Working Professional,2,3,5,B.Ed,Yes,10,4,0
96,Working Professional,4,3,3,B.Ed,Yes,4,2,0
97,Working Professional,1,3,3,M.Tech,No,1,2,0
98,Working Professional,2,3,2,B.Ed,No,9,2,0


### Tail

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression
143156,Working Professional,3,3,3,BSc,No,10,5,0
143157,Working Professional,1,3,5,BSc,No,5,4,0
143158,Working Professional,2,3,4,M.Pharm,Yes,1,4,0
143159,Working Professional,4,3,1,Other,Yes,2,5,1
143160,Student,2,3,5,Class 12,Yes,6,5,1
...,...,...,...,...,...,...,...,...,...
143251,Working Professional,1,3,5,BBA,Yes,12,3,0
143252,Working Professional,3,3,1,Other,Yes,8,3,1
143253,Working Professional,1,3,4,Other,Yes,4,4,0
143254,Working Professional,2,3,3,BBA,No,4,5,0


### Sample

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression
121733,Working Professional,5,3,1,Other,Yes,4,3,0
23196,Student,3,1,3,M.Ed,No,8,2,0
121965,Working Professional,3,3,4,BBA,Yes,11,2,0
42039,Student,5,3,1,Other,Yes,11,4,1
120972,Student,1,4,2,M.Ed,No,9,2,0
...,...,...,...,...,...,...,...,...,...
109346,Working Professional,3,3,3,LLM,Yes,1,2,0
13593,Working Professional,5,3,1,B.Tech,Yes,4,3,0
46569,Working Professional,4,3,1,Other,No,8,4,0
68596,Working Professional,2,3,2,B.Com,No,9,2,0


### 

# Scale Numerical features
not needed, there all between 1-5 or 1-10

# Save preprocessed dataframe

In [142]:
# if KAGGLE_ENV:
#     train.to_csv('/kaggle/working/s4-e11-train-concat-imputed.csv', index=False)
# else:
#     train.to_csv( '../kaggle/working/' + '/s4-e11-train-concat-imputed.csv', index=False)

## Fix categorical missing data
There are not any missing data for categorical features.

# Scale Categorical Features
There are different categories at this dataframe.
Working Professional or Student, it is just 1 or 0, so we use OneHotEncoder
Have you ever had suicidal thoughts?, yes/no, already binar
Degree, should use a OrdinalEncoder

In [143]:
df2.head(100)

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression
0,Working Professional,5,3,2,Other,No,1,2,0
1,Working Professional,4,3,3,Other,Yes,7,3,1
2,Student,2,2,5,B.Pharm,Yes,3,1,1
3,Working Professional,5,3,1,BBA,Yes,10,1,1
4,Working Professional,1,3,1,BBA,Yes,9,4,0
...,...,...,...,...,...,...,...,...,...
95,Working Professional,2,3,5,B.Ed,Yes,10,4,0
96,Working Professional,4,3,3,B.Ed,Yes,4,2,0
97,Working Professional,1,3,3,M.Tech,No,1,2,0
98,Working Professional,2,3,2,B.Ed,No,9,2,0


In [144]:
# Check the unique values in the column 'Degree'
num_unique_degrees = df2["Degree"].nunique()
unique_degrees = df2["Degree"].unique()

print(f"Number of unique categories in the Degree column': {num_unique_degrees}")
print(f"Unique categories in the 'Degree' column': {unique_degrees}")

Number of unique categories in the Degree column': 16
Unique categories in the 'Degree' column': ['Other' 'B.Pharm' 'BBA' 'MCA' 'BSc' 'B.Arch' 'BCA' 'B.Ed' 'B.Com'
 'Class 12' 'M.Tech' 'M.Ed' 'MSc' 'B.Tech' 'LLM' 'M.Pharm']


In [145]:
df = df2.copy()

encoder = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
degree_encoded = encoder.fit_transform(df2[["Degree"]])
degree_columns = encoder.get_feature_names_out(["Degree"])


df_degree = pd.DataFrame(degree_encoded, columns=degree_columns, index=df2.index)
train_final = df2.drop(columns=["Degree"]).join(df_degree)

# Show stats
print("OneHotEncoded Degree Data")
show_general_stats(train_final)

OneHotEncoded Degree Data


### General Stats

Unnamed: 0,Work Pressure,Study Satisfaction,Job Satisfaction,Work/Study Hours,Financial Stress,Depression,Degree_B.Arch,Degree_B.Com,Degree_B.Ed,Degree_B.Pharm,...,Degree_BCA,Degree_BSc,Degree_Class 12,Degree_LLM,Degree_M.Ed,Degree_M.Pharm,Degree_M.Tech,Degree_MCA,Degree_MSc,Degree_Other
count,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0,...,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0,143256.0
mean,3.031049,2.989543,2.850561,6.248597,2.988657,0.181647,0.061645,0.057436,0.082391,0.041492,...,0.04078,0.035747,0.104736,0.033025,0.040131,0.032264,0.031831,0.03729,0.034721,0.299373
std,1.420301,0.606115,1.422842,3.852275,1.413677,0.385555,0.24051,0.232674,0.27496,0.199426,...,0.197781,0.18566,0.306213,0.178702,0.196267,0.176701,0.175551,0.189472,0.183073,0.457985
min,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,3.0,2.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,3.0,3.0,3.0,6.0,3.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,4.0,3.0,4.0,10.0,4.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
max,5.0,5.0,5.0,12.0,5.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


### Data Types

Working Professional or Student           object
Work Pressure                              int64
Study Satisfaction                         int64
Job Satisfaction                           int64
Have you ever had suicidal thoughts ?     object
Work/Study Hours                           int64
Financial Stress                           int64
Depression                                 int64
Degree_B.Arch                            float64
Degree_B.Com                             float64
Degree_B.Ed                              float64
Degree_B.Pharm                           float64
Degree_B.Tech                            float64
Degree_BBA                               float64
Degree_BCA                               float64
Degree_BSc                               float64
Degree_Class 12                          float64
Degree_LLM                               float64
Degree_M.Ed                              float64
Degree_M.Pharm                           float64
Degree_M.Tech       

### Missing Values

Working Professional or Student          0
Work Pressure                            0
Study Satisfaction                       0
Job Satisfaction                         0
Have you ever had suicidal thoughts ?    0
Work/Study Hours                         0
Financial Stress                         0
Depression                               0
Degree_B.Arch                            0
Degree_B.Com                             0
Degree_B.Ed                              0
Degree_B.Pharm                           0
Degree_B.Tech                            0
Degree_BBA                               0
Degree_BCA                               0
Degree_BSc                               0
Degree_Class 12                          0
Degree_LLM                               0
Degree_M.Ed                              0
Degree_M.Pharm                           0
Degree_M.Tech                            0
Degree_MCA                               0
Degree_MSc                               0
Degree_Othe

### Shape

(143256, 24)

### Head

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression,Degree_B.Arch,Degree_B.Com,...,Degree_BCA,Degree_BSc,Degree_Class 12,Degree_LLM,Degree_M.Ed,Degree_M.Pharm,Degree_M.Tech,Degree_MCA,Degree_MSc,Degree_Other
0,Working Professional,5,3,2,No,1,2,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,Working Professional,4,3,3,Yes,7,3,1,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,Student,2,2,5,Yes,3,1,1,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Working Professional,5,3,1,Yes,10,1,1,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Working Professional,1,3,1,Yes,9,4,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Working Professional,2,3,5,Yes,10,4,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
96,Working Professional,4,3,3,Yes,4,2,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
97,Working Professional,1,3,3,No,1,2,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
98,Working Professional,2,3,2,No,9,2,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Tail

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression,Degree_B.Arch,Degree_B.Com,...,Degree_BCA,Degree_BSc,Degree_Class 12,Degree_LLM,Degree_M.Ed,Degree_M.Pharm,Degree_M.Tech,Degree_MCA,Degree_MSc,Degree_Other
143156,Working Professional,3,3,3,No,10,5,0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
143157,Working Professional,1,3,5,No,5,4,0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
143158,Working Professional,2,3,4,Yes,1,4,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
143159,Working Professional,4,3,1,Yes,2,5,1,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
143160,Student,2,3,5,Yes,6,5,1,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
143251,Working Professional,1,3,5,Yes,12,3,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
143252,Working Professional,3,3,1,Yes,8,3,1,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
143253,Working Professional,1,3,4,Yes,4,4,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
143254,Working Professional,2,3,3,No,4,5,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Sample

Unnamed: 0,Working Professional or Student,Work Pressure,Study Satisfaction,Job Satisfaction,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Depression,Degree_B.Arch,Degree_B.Com,...,Degree_BCA,Degree_BSc,Degree_Class 12,Degree_LLM,Degree_M.Ed,Degree_M.Pharm,Degree_M.Tech,Degree_MCA,Degree_MSc,Degree_Other
76359,Working Professional,5,3,5,No,12,3,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
35003,Working Professional,4,3,2,No,12,4,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
9381,Working Professional,1,3,1,Yes,0,3,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
39507,Working Professional,2,3,1,Yes,2,1,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
47918,Working Professional,1,3,4,No,11,3,0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86021,Working Professional,1,3,2,Yes,6,4,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
108038,Working Professional,5,3,4,Yes,1,2,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
84188,Working Professional,4,3,4,No,1,4,0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
70011,Working Professional,4,3,4,No,7,1,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### 

# Save preprocessed final dataframe

In [None]:
# if KAGGLE_ENV:
#     train_final.to_csv('/kaggle/working/s4-e11-train-concat-final.csv', index=False)
# else:
#     train_final.to_csv( '../kaggle/working/' + '/s4-e11-train-concat-final.csv', index=False)

# Feature Selection (optional)
Delete attributes there aren't providing any information (correlation matrix).

# Create new Features (optional)
- Discretizing Continuous Features
- Decomposing Features and Date/Time Variables
- Aggregate attributes to better new attributes