In [39]:
# Import dependencies
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sqlalchemy import create_engine
from sklearn.compose import ColumnTransformer

In [40]:
# Read the data into a Pandas dataframe
mental_df = pd.read_csv('cleaned_data.csv')

In [41]:
mental_df

Unnamed: 0,Are you self-employed?,How many employees does your company or organization have?,Is your employer primarily a tech company/organization?,Is your primary role within your company related to tech/IT?,Does your employer provide mental health benefits as part of healthcare coverage?,Do you know the options for mental health care available under your employer-provided coverage?,"Has your employer ever formally discussed mental health (for example, as part of a wellness campaign or other official communication)?",Does your employer offer resources to learn more about mental health concerns and options for seeking help?,Is your anonymity protected if you choose to take advantage of mental health or substance abuse treatment resources provided by your employer?,"If a mental health issue prompted you to request a medical leave from work, asking for that leave would be:",...,"If you have a mental health issue, do you feel that it interferes with your work when being treated effectively?","If you have a mental health issue, do you feel that it interferes with your work when NOT being treated effectively?",What is your age?,What is your gender?,What country do you live in?,What US state or territory do you live in?,What country do you work in?,What US state or territory do you work in?,Which of the following best describes your work position?,Do you work remotely?
0,0,26-100,1.0,0.0,not eligible for coverage / n/a,0,-1,-1,0,very easy,...,not applicable to me,not applicable to me,39,male,united kingdom,0,united kingdom,0,back-end developer,sometimes
1,0,25-jun,1.0,0.0,-1,1,1,1,1,somewhat easy,...,rarely,sometimes,29,male,united states of america,illinois,united states of america,illinois,back-end developer|front-end developer,never
2,0,25-jun,1.0,0.0,-1,0,-1,-1,0,neither easy nor difficult,...,not applicable to me,not applicable to me,38,Other,united kingdom,0,united kingdom,0,back-end developer,always
3,1,0,0.0,0.0,0,0,0,0,0,0,...,sometimes,sometimes,43,male,united kingdom,0,united kingdom,0,supervisor/team lead,sometimes
4,0,25-jun,0.0,1.0,1,1,-1,-1,-1,neither easy nor difficult,...,sometimes,sometimes,43,female,united states of america,illinois,united states of america,illinois,executive leadership|supervisor/team lead|dev ...,sometimes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1428,1,0,0.0,0.0,0,0,0,0,0,0,...,not applicable to me,not applicable to me,34,Other,united states of america,new york,united states of america,new york,other,sometimes
1429,1,0,0.0,0.0,0,0,0,0,0,0,...,sometimes,often,56,male,united states of america,california,Other,0,support,sometimes
1430,0,100-500,1.0,0.0,1,1,1,1,0,somewhat difficult,...,rarely,sometimes,52,male,united states of america,georgia,united states of america,georgia,back-end developer,sometimes
1431,0,100-500,0.0,1.0,0,i am not sure,-1,1,0,somewhat difficult,...,sometimes,often,30,female,united states of america,nebraska,united states of america,nebraska,devops/sysadmin,sometimes


In [42]:
# build a dataframe of the questions
dict = {}
count = 0
for column in mental_df.columns:
    
    dict[count] = [column]
    count = count + 1
    
    
dict

questions_df = pd.DataFrame.from_dict(dict, orient='index', columns=['Question'])
questions_df

Unnamed: 0,Question
0,Are you self-employed?
1,How many employees does your company or organi...
2,Is your employer primarily a tech company/orga...
3,Is your primary role within your company relat...
4,Does your employer provide mental health benef...
...,...
58,What US state or territory do you live in?
59,What country do you work in?
60,What US state or territory do you work in?
61,Which of the following best describes your wor...


In [43]:
# rename the columns in the dataframe to numbers
column_rename = {}
count = 0
for column in mental_df.columns:
    
    column_rename[column] = count
    count = count + 1
    
    
column_rename

renamed_df = mental_df.rename(columns=column_rename)
renamed_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,53,54,55,56,57,58,59,60,61,62
0,0,26-100,1.0,0.0,not eligible for coverage / n/a,0,-1,-1,0,very easy,...,not applicable to me,not applicable to me,39,male,united kingdom,0,united kingdom,0,back-end developer,sometimes
1,0,25-jun,1.0,0.0,-1,1,1,1,1,somewhat easy,...,rarely,sometimes,29,male,united states of america,illinois,united states of america,illinois,back-end developer|front-end developer,never
2,0,25-jun,1.0,0.0,-1,0,-1,-1,0,neither easy nor difficult,...,not applicable to me,not applicable to me,38,Other,united kingdom,0,united kingdom,0,back-end developer,always
3,1,0,0.0,0.0,0,0,0,0,0,0,...,sometimes,sometimes,43,male,united kingdom,0,united kingdom,0,supervisor/team lead,sometimes
4,0,25-jun,0.0,1.0,1,1,-1,-1,-1,neither easy nor difficult,...,sometimes,sometimes,43,female,united states of america,illinois,united states of america,illinois,executive leadership|supervisor/team lead|dev ...,sometimes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1428,1,0,0.0,0.0,0,0,0,0,0,0,...,not applicable to me,not applicable to me,34,Other,united states of america,new york,united states of america,new york,other,sometimes
1429,1,0,0.0,0.0,0,0,0,0,0,0,...,sometimes,often,56,male,united states of america,california,Other,0,support,sometimes
1430,0,100-500,1.0,0.0,1,1,1,1,0,somewhat difficult,...,rarely,sometimes,52,male,united states of america,georgia,united states of america,georgia,back-end developer,sometimes
1431,0,100-500,0.0,1.0,0,i am not sure,-1,1,0,somewhat difficult,...,sometimes,often,30,female,united states of america,nebraska,united states of america,nebraska,devops/sysadmin,sometimes


In [44]:
# Generate our categorical variable list
mental_cat = mental_df.dtypes[mental_df.dtypes == "object"].index.tolist()



In [45]:
# Check the number of unique values in each column
mental_df[mental_cat].nunique()

How many employees does your company or organization have?                                                                                                                             7
Does your employer provide mental health benefits as part of healthcare coverage?                                                                                                      4
Do you know the options for mental health care available under your employer-provided coverage?                                                                                        4
If a mental health issue prompted you to request a medical leave from work, asking for that leave would be:                                                                            6
Do you think that discussing a mental health disorder with your employer would have negative consequences?                                                                             4
Do you think that discussing a physical health issue with your employer wou

In [46]:
# Generate our categorical variable list
mental_cat = mental_df.dtypes[mental_df.dtypes == "object"].index.tolist()

# Check the number of unique values in each column
mental_df[mental_cat].nunique()

How many employees does your company or organization have?                                                                                                                             7
Does your employer provide mental health benefits as part of healthcare coverage?                                                                                                      4
Do you know the options for mental health care available under your employer-provided coverage?                                                                                        4
If a mental health issue prompted you to request a medical leave from work, asking for that leave would be:                                                                            6
Do you think that discussing a mental health disorder with your employer would have negative consequences?                                                                             4
Do you think that discussing a physical health issue with your employer wou

In [47]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(mental_df[mental_cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names_out(mental_cat)
encode_df.head()



Unnamed: 0,How many employees does your company or organization have?_0,How many employees does your company or organization have?_100-500,How many employees does your company or organization have?_25-jun,How many employees does your company or organization have?_26-100,How many employees does your company or organization have?_5-jan,How many employees does your company or organization have?_500-1000,How many employees does your company or organization have?_more than 1000,Does your employer provide mental health benefits as part of healthcare coverage?_-1,Does your employer provide mental health benefits as part of healthcare coverage?_0,Does your employer provide mental health benefits as part of healthcare coverage?_1,...,Which of the following best describes your work position?_support|devops/sysadmin,Which of the following best describes your work position?_support|front-end developer|back-end developer,Which of the following best describes your work position?_support|front-end developer|designer,Which of the following best describes your work position?_support|hr|supervisor/team lead|executive leadership,Which of the following best describes your work position?_support|other,Which of the following best describes your work position?_support|sales|back-end developer|front-end developer|designer|one-person shop,Which of the following best describes your work position?_support|sales|designer,Do you work remotely?_always,Do you work remotely?_never,Do you work remotely?_sometimes
0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [48]:
# Merge one-hot encoded features and drop the originals
mental_df = mental_df.merge(encode_df,left_index=True, right_index=True)
mental_df = mental_df.drop(mental_cat,1)
mental_df.head()

  mental_df = mental_df.drop(mental_cat,1)


Unnamed: 0,Are you self-employed?,Is your employer primarily a tech company/organization?,Is your primary role within your company related to tech/IT?,"Has your employer ever formally discussed mental health (for example, as part of a wellness campaign or other official communication)?",Does your employer offer resources to learn more about mental health concerns and options for seeking help?,Is your anonymity protected if you choose to take advantage of mental health or substance abuse treatment resources provided by your employer?,Do you feel that your employer takes mental health as seriously as physical health?,Have you heard of or observed negative consequences for co-workers who have been open about mental health issues in your workplace?,Do you have medical coverage (private insurance or state-provided) which includes treatment of mental health issues?,Do you have previous employers?,...,Which of the following best describes your work position?_support|devops/sysadmin,Which of the following best describes your work position?_support|front-end developer|back-end developer,Which of the following best describes your work position?_support|front-end developer|designer,Which of the following best describes your work position?_support|hr|supervisor/team lead|executive leadership,Which of the following best describes your work position?_support|other,Which of the following best describes your work position?_support|sales|back-end developer|front-end developer|designer|one-person shop,Which of the following best describes your work position?_support|sales|designer,Do you work remotely?_always,Do you work remotely?_never,Do you work remotely?_sometimes
0,0,1.0,0.0,-1,-1,0,0,-1,0.0,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0,1.0,0.0,1,1,1,1,-1,0.0,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0,1.0,0.0,-1,-1,0,0,-1,0.0,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,1,0.0,0.0,0,0,0,0,0,1.0,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0,0.0,1.0,-1,-1,-1,-1,-1,0.0,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [49]:
mental_df.to_excel('final_after_encode.xlsx', index=False)


In [50]:
mental_df.to_csv('final_after_encode.csv', index=False)
