In [47]:
# Import dependencies
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sqlalchemy import create_engine
from sklearn.compose import ColumnTransformer

In [48]:
# Read the data into a Pandas dataframe
mental_df = pd.read_csv('cleaned_data.csv')

In [49]:
mental_df

Unnamed: 0,Do you have previous employers?,Have your previous employers provided mental health benefits?,Were you aware of the options for mental health care provided by your previous employers?,Did your previous employers ever formally discuss mental health (as part of a wellness campaign or other official communication)?,Was your anonymity protected if you chose to take advantage of mental health or substance abuse treatment resources with previous employers?,Do you think that discussing a mental health disorder with previous employers would have negative consequences?,Would you have been willing to discuss a mental health issue with your direct supervisor(s)?,Did you feel that your previous employers took mental health as seriously as physical health?,Did you hear of or observe negative consequences for co-workers with mental health issues in your previous workplaces?,Would you bring up a mental health issue with a potential employer in an interview?,...,Do you have a family history of mental illness?,Have you had a mental health disorder in the past?,Do you currently have a mental health disorder?,Have you been diagnosed with a mental health condition by a medical professional?,Have you ever sought treatment for a mental health issue from a mental health professional?,"If you have a mental health issue, do you feel that it interferes with your work when being treated effectively?","If you have a mental health issue, do you feel that it interferes with your work when NOT being treated effectively?",What is your gender?,What country do you live in?,What country do you work in?
0,1,"No, none did",N/A (not currently aware),0,0,Some of them,Some of my previous employers,0,None of them,0,...,-1,1,-1,1,0,Not applicable to me,Not applicable to me,Male,United Kingdom,United Kingdom
1,1,"Yes, they all did",I was aware of some,None did,"Yes, always",None of them,Some of my previous employers,Some did,None of them,-1,...,1,1,1,1,1,Rarely,Sometimes,Male,United States of America,United States of America
2,1,"No, none did",N/A (not currently aware),None did,0,0,0,0,Some of them,1,...,-1,0,-1,-1,1,Not applicable to me,Not applicable to me,Other,United Kingdom,United Kingdom
3,1,Some did,N/A (not currently aware),None did,0,Some of them,Some of my previous employers,0,Some of them,0,...,-1,1,1,1,1,Sometimes,Sometimes,Male,United Kingdom,United Kingdom
4,1,0,N/A (not currently aware),Some did,0,Some of them,Some of my previous employers,Some did,Some of them,-1,...,1,1,1,1,1,Sometimes,Sometimes,Female,United States of America,United States of America
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1194,1,0,N/A (not currently aware),0,0,Some of them,Some of my previous employers,Some did,None of them,0,...,0,1,1,1,1,Rarely,Often,Female,Canada,Canada
1195,1,Some did,I was aware of some,None did,0,Some of them,"No, at none of my previous employers",None did,"Yes, all of them",-1,...,1,1,1,1,1,Rarely,Often,Female,United States of America,United States of America
1196,1,"Yes, they all did",I was aware of some,Some did,0,0,Some of my previous employers,Some did,None of them,-1,...,1,-1,-1,-1,1,Not applicable to me,Not applicable to me,Other,United States of America,United States of America
1197,1,Some did,I was aware of some,None did,Sometimes,"Yes, all of them",Some of my previous employers,None did,Some of them,-1,...,1,1,0,1,1,Rarely,Sometimes,Male,United States of America,United States of America


In [50]:
# build a dataframe of the questions
dict = {}
count = 0
for column in mental_df.columns:
    
    dict[count] = [column]
    count = count + 1
    
    
dict

questions_df = pd.DataFrame.from_dict(dict, orient='index', columns=['Question'])
questions_df

Unnamed: 0,Question
0,Do you have previous employers?
1,Have your previous employers provided mental h...
2,Were you aware of the options for mental healt...
3,Did your previous employers ever formally disc...
4,Was your anonymity protected if you chose to t...
5,Do you think that discussing a mental health d...
6,Would you have been willing to discuss a menta...
7,Did you feel that your previous employers took...
8,Did you hear of or observe negative consequenc...
9,Would you bring up a mental health issue with ...


In [51]:
# rename the columns in the dataframe to numbers
column_rename = {}
count = 0
for column in mental_df.columns:
    
    column_rename[column] = count
    count = count + 1
    
    
column_rename

renamed_df = mental_df.rename(columns=column_rename)
renamed_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,1,"No, none did",N/A (not currently aware),0,0,Some of them,Some of my previous employers,0,None of them,0,...,-1,1,-1,1,0,Not applicable to me,Not applicable to me,Male,United Kingdom,United Kingdom
1,1,"Yes, they all did",I was aware of some,None did,"Yes, always",None of them,Some of my previous employers,Some did,None of them,-1,...,1,1,1,1,1,Rarely,Sometimes,Male,United States of America,United States of America
2,1,"No, none did",N/A (not currently aware),None did,0,0,0,0,Some of them,1,...,-1,0,-1,-1,1,Not applicable to me,Not applicable to me,Other,United Kingdom,United Kingdom
3,1,Some did,N/A (not currently aware),None did,0,Some of them,Some of my previous employers,0,Some of them,0,...,-1,1,1,1,1,Sometimes,Sometimes,Male,United Kingdom,United Kingdom
4,1,0,N/A (not currently aware),Some did,0,Some of them,Some of my previous employers,Some did,Some of them,-1,...,1,1,1,1,1,Sometimes,Sometimes,Female,United States of America,United States of America
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1194,1,0,N/A (not currently aware),0,0,Some of them,Some of my previous employers,Some did,None of them,0,...,0,1,1,1,1,Rarely,Often,Female,Canada,Canada
1195,1,Some did,I was aware of some,None did,0,Some of them,"No, at none of my previous employers",None did,"Yes, all of them",-1,...,1,1,1,1,1,Rarely,Often,Female,United States of America,United States of America
1196,1,"Yes, they all did",I was aware of some,Some did,0,0,Some of my previous employers,Some did,None of them,-1,...,1,-1,-1,-1,1,Not applicable to me,Not applicable to me,Other,United States of America,United States of America
1197,1,Some did,I was aware of some,None did,Sometimes,"Yes, all of them",Some of my previous employers,None did,Some of them,-1,...,1,1,0,1,1,Rarely,Sometimes,Male,United States of America,United States of America


In [52]:
# Generate our categorical variable list
mental_cat = mental_df.dtypes[mental_df.dtypes == "object"].index.tolist()



In [53]:
# Check the number of unique values in each column
mental_df[mental_cat].nunique()

Have your previous employers provided mental health benefits?                                                                                   4
Were you aware of the options for mental health care provided by your previous employers?                                                       4
Did your previous employers ever formally discuss mental health (as part of a wellness campaign or other official communication)?               4
Was your anonymity protected if you chose to take advantage of mental health or substance abuse treatment resources with previous employers?    4
Do you think that discussing a mental health disorder with previous employers would have negative consequences?                                 4
Would you have been willing to discuss a mental health issue with your direct supervisor(s)?                                                    4
Did you feel that your previous employers took mental health as seriously as physical health?                               

In [54]:
# Generate our categorical variable list
mental_cat = mental_df.dtypes[mental_df.dtypes == "object"].index.tolist()

# Check the number of unique values in each column
mental_df[mental_cat].nunique()

Have your previous employers provided mental health benefits?                                                                                   4
Were you aware of the options for mental health care provided by your previous employers?                                                       4
Did your previous employers ever formally discuss mental health (as part of a wellness campaign or other official communication)?               4
Was your anonymity protected if you chose to take advantage of mental health or substance abuse treatment resources with previous employers?    4
Do you think that discussing a mental health disorder with previous employers would have negative consequences?                                 4
Would you have been willing to discuss a mental health issue with your direct supervisor(s)?                                                    4
Did you feel that your previous employers took mental health as seriously as physical health?                               

In [57]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(mental_df[mental_cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names_out(mental_cat)
encode_df.head()



Unnamed: 0,Have your previous employers provided mental health benefits?_0,"Have your previous employers provided mental health benefits?_No, none did",Have your previous employers provided mental health benefits?_Some did,"Have your previous employers provided mental health benefits?_Yes, they all did",Were you aware of the options for mental health care provided by your previous employers?_I was aware of some,Were you aware of the options for mental health care provided by your previous employers?_N/A (not currently aware),"Were you aware of the options for mental health care provided by your previous employers?_No, I only became aware later","Were you aware of the options for mental health care provided by your previous employers?_Yes, I was aware of all of them",Did your previous employers ever formally discuss mental health (as part of a wellness campaign or other official communication)?_0,Did your previous employers ever formally discuss mental health (as part of a wellness campaign or other official communication)?_None did,...,What country do you live in?_United Kingdom,What country do you live in?_United States of America,What country do you work in?_Australia,What country do you work in?_Canada,What country do you work in?_Germany,What country do you work in?_Netherlands,What country do you work in?_Other,What country do you work in?_Sweden,What country do you work in?_United Kingdom,What country do you work in?_United States of America
0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [58]:
# Merge one-hot encoded features and drop the originals
mental_df = mental_df.merge(encode_df,left_index=True, right_index=True)
mental_df = mental_df.drop(mental_cat,1)
mental_df.head()

  mental_df = mental_df.drop(mental_cat,1)


Unnamed: 0,Do you have previous employers?,Would you bring up a mental health issue with a potential employer in an interview?,Do you have a family history of mental illness?,Have you had a mental health disorder in the past?,Do you currently have a mental health disorder?,Have you been diagnosed with a mental health condition by a medical professional?,Have you ever sought treatment for a mental health issue from a mental health professional?,Have your previous employers provided mental health benefits?_0,"Have your previous employers provided mental health benefits?_No, none did",Have your previous employers provided mental health benefits?_Some did,...,What country do you live in?_United Kingdom,What country do you live in?_United States of America,What country do you work in?_Australia,What country do you work in?_Canada,What country do you work in?_Germany,What country do you work in?_Netherlands,What country do you work in?_Other,What country do you work in?_Sweden,What country do you work in?_United Kingdom,What country do you work in?_United States of America
0,1,0,-1,1,-1,1,0,0.0,1.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,1,-1,1,1,1,1,1,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,1,1,-1,0,-1,-1,1,0.0,1.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,1,0,-1,1,1,1,1,0.0,0.0,1.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,1,-1,1,1,1,1,1,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [60]:
mental_df.to_excel('detection.xlsx', index=False)


In [61]:
mental_df.to_csv('detection.csv', index=False)
