In [None]:
<h1><font color='green'>Libraries</font></h1>

In [None]:
import time
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from mlxtend.preprocessing import TransactionEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelBinarizer
from sklearn.impute import SimpleImputer

In [None]:
<h1><font color='green'>Adding Style to Plots</font></h1>
plt.style.use('fivethirtyeight')

In [None]:
<h1><font color='green'>Loading Data...</font></h1>
data = pd.read_csv('03.Cleaned Mental Health.csv').drop(['Unnamed: 0'], axis=1)
data

In [None]:
<h3><font color='red'>Function to find unique object values in columns</font></h3>

In [None]:
def unique_object_values_in_columns(df):
    for c in df.columns:
        if df[c].dtype == 'object':
            print(c, ':', df[c].unique())

In [None]:
<h4>Check data shape</h4>
data.shape
data.head(5)

In [None]:
<h3><font color='orange'>Analyzing Work Position</font></h3>

In [None]:
<h4>Visualizing Work Position</h4>
plt.figure(figsize=(15, 8))
plt.xticks(rotation=90)
sns.barplot(data['work_position'].value_counts().index, data['work_position'].value_counts().values)

In [None]:
<h4>Creating a list of unique professions</h4>
work_position_list = data['work_position'].value_counts().index.to_list()

In [None]:
<h4>Transforming Work Position Data</h4>
positions = []
for pos in data['work_position']:
    positions.append(pos.split('|'))
positions

In [None]:
<h1><font color='green'>Encoding Work Positions</font></h1>

In [None]:
<h4>Creating TransactionEncoder Object</h4>
te = TransactionEncoder()

In [None]:
<h4>Transforming Data</h4>
encoded_positions = te.fit_transform(positions)
encoded_df = pd.DataFrame(encoded_positions, columns=te.columns_)
encoded_df.head(5)

In [None]:
<h4>Removing Original Column and Merging Encoded Data</h4>
data = data.drop(columns=['work_position'])
data = data.join(encoded_df)
data.head(5)

In [None]:
<h1><font color='green'>One Hot Encoding on country_live</font></h1>

In [None]:
<h4>Creating OneHotEncoder Object</h4>
ohe = OneHotEncoder(sparse=False)

In [None]:
<h4>Transforming Data</h4>
country_live_encoded = ohe.fit_transform(data[['country_live']])
country_live_df = pd.DataFrame(country_live_encoded, columns=[f'live_in_{cat}' for cat in ohe.categories_[0]])
country_live_df.head(5)

In [None]:
<h4>Dropping Original Column and Merging Encoded Data</h4>
data = data.drop(columns=['country_live'])
data = data.join(country_live_df)
data.head(5)

In [None]:
<h1><font color='green'>Label Binarizer on country_work</font></h1>

In [None]:
<h4>Creating LabelBinarizer Object</h4>
lb = LabelBinarizer()

In [None]:
<h4>Transforming Data</h4>
country_work_encoded = lb.fit_transform(data['country_work'])
country_work_df = pd.DataFrame(country_work_encoded, columns=[f'work_at_{cat}' for cat in lb.classes_])
country_work_df.head(5)

In [None]:
<h4>Dropping Original Column and Merging Encoded Data</h4>
data = data.drop(columns=['country_work'])
data = data.join(country_work_df)
data.head(5)

In [None]:
<h1><font color='green'>Handling Missing Values with Simple Imputer</font></h1>

In [None]:
<h4>Checking for Missing Values</h4>
data.isnull().sum()

In [None]:
<h4>Visualizing Missing Values</h4>
plt.figure(figsize=(12,8))
sns.heatmap(data.isnull())

In [None]:
<h4>Creating SimpleImputer Object and Transforming Data</h4>
si = SimpleImputer(strategy='most_frequent')
data = pd.DataFrame(si.fit_transform(data), columns=data.columns)

In [None]:
<h4>Checking for Missing Values Again</h4>
plt.figure(figsize=(12,8))
sns.heatmap(data.isnull())
data.isnull().sum()