In [1]:
# import packages
import pandas as pd
pd.set_option("display.max_columns", None)
import numpy as np

import warnings
warnings.filterwarnings("ignore")

In [2]:
# import data
data_2017 = pd.read_csv("Datasets/2017_survey.csv")
data_2018 = pd.read_csv("Datasets/2018_survey.csv")

data = pd.concat([data_2017,data_2018],sort=False,ignore_index=True)

In [3]:
# cleaning column titles
columns_to_clean = data.columns[data.columns.str.contains("<strong>")]

In [4]:
list_1 = (columns_to_clean[:7],columns_to_clean[9:11])
list_2 = (columns_to_clean[-5],columns_to_clean[-3])
list_3 = (columns_to_clean[-4],columns_to_clean[-2])

for item in list_1:
    for question in range(len(item)):
        data.rename(columns = {f"{item[question]}": f"{item[question][8:-9]}"},inplace=True)

for question in list_2:
    data.rename(columns = {f"{question}": f"{question[:20]+question[28:32]+question[-4:]}"},inplace=True)

for question in list_3:
    data.rename(columns = {f"{question}": f"{question[:34]+question[42:46]+question[-4:]}"},inplace=True)

data.rename(
    columns = {"If you have a mental health disorder, how often do you feel that it interferes with your work <strong>when being treated effectively?</strong>":\
               "If you have a mental health disorder, how often do you feel that it interferes with your work when being treated effectively?"},inplace=True)

In [5]:
# insert unique id
data.insert(0,"id",(data.index+1))

In [6]:
data.drop(columns = "#",inplace=True)

In [7]:
# combining and cleaning disorders
start_num = 50

while start_num < 64:
    data.iloc[:,start_num].fillna(data.iloc[:,(start_num+13)],inplace=True)
    data.iloc[:,start_num].fillna(data.iloc[:,(start_num+26)],inplace=True)
    data.iloc[:,start_num].fillna(0,inplace=True)
    data.iloc[:,start_num].where(data.iloc[:,start_num]==0,1,inplace=True)
    start_num += 1

In [8]:
# checking number of entries in each column
others_dummy = pd.concat([pd.get_dummies(data["Other.1"]),pd.get_dummies(data["Other.2"])],axis=1)
others_dummy.sum()

ADHD                                            1
Asperger                                        1
Asperger's                                      1
Autism                                          1
Suicidal                                        1
ADD                                             1
Adjustment disorder                             1
Aspergers Syndrome                              1
Attention Deficit Disorder (Non-Hyperactive)    1
Autism                                          1
Autism Spectrum Disorder                        1
Bipolar                                         1
Codependence                                    1
Cyclothymia                                     1
Depression                                      2
Depression                                      1
Depression, Anxiety, Developmental Trauma       1
Gender Dysphoria                                2
Multiple Sclerosis & Mental Health              1
Panic Disorder                                  1


In [9]:
def combine_columns(first_num,num_list,df_name):
    '''
    This function combine duplicate columns.
    '''
    for num in num_list:
        df_name.iloc[:,first_num] = df_name.iloc[:,first_num] + df_name.iloc[:,num]

In [10]:
# combining columns
ADHD_list = (5,8)
ASD_list = (1,2,3,7,9)
Depression_list = (15,16)

combine_columns(0,ADHD_list,others_dummy)
combine_columns(10,ASD_list,others_dummy)
combine_columns(14,Depression_list,others_dummy)

others_dummy.iloc[:,19] = others_dummy.iloc[:,19] + others_dummy.iloc[:,20]

In [11]:
# dropping duplicate columns in others_dummy
drop_list = list(ADHD_list + ASD_list + Depression_list)
drop_list.append(20)
column_names = []

for num in drop_list:
    column_names.append(others_dummy.columns[num])

others_dummy.drop(columns = column_names,inplace=True)

In [12]:
# checking work
others_dummy.sum()

ADHD                                  3
Suicidal                              1
Adjustment disorder                   1
Autism Spectrum Disorder              6
Bipolar                               1
Codependence                          1
Cyclothymia                           1
Depression                            4
Gender Dysphoria                      2
Multiple Sclerosis & Mental Health    1
Panic Disorder                        2
Tourette's                            1
dtype: int64

In [13]:
# dropping duplicate columns in data
data.drop(columns=data.columns[62:89],inplace=True)

In [14]:
# Mood disorder
data.iloc[:,51] = data.iloc[:,51] + others_dummy.iloc[:,-5] + others_dummy.iloc[:,4]

# ADHD
data.iloc[:,54] += others_dummy.iloc[:,0]

# How would team members react to your MH dx
data.iloc[:,76].fillna(data.iloc[:,-2],inplace=True)
data.drop(columns=data.columns[-2],inplace=True)

# Current employer provide MH coverage
data.iloc[:,5].fillna(data.iloc[:,-3],inplace=True)
data.drop(columns=data.columns[-3],inplace=True)

# drop last column in data
data.drop(columns=data.columns[-1],inplace=True)

# drop columns in others_dummy
drop_list2 = (0,4,7)
column_names2 = []

for num in drop_list2:
    column_names2.append(others_dummy.columns[num])

others_dummy.drop(columns = column_names2,inplace=True)

In [15]:
# interfering with work when MH not tx properly
data.iloc[:,66].fillna(data.iloc[:,-1],inplace=True)
data.drop(columns=data.columns[-1],inplace=True)

In [16]:
data.rename(
    columns = {"If you have a mental health disorder, how often do you feel that it interferes with your work <strong>when <em>NOT</em> being treated effectively (i.e., when you are experiencing symptoms)?</strong>":\
               "If you have a mental health disorder, how often do you feel that it interferes with your work when NOT being treated effectively (i.e., when you are experiencing symptoms)?"},inplace=True)

In [17]:
# combining df
data = pd.concat([data.iloc[:,:62], others_dummy, data.iloc[:,62:]],axis = 1)

In [18]:
# making a copy of the dataset
df = data.copy()

In [19]:
# take a peek at the columns that have over 50% NaN value
df.isna().sum()[df.isna().sum() > 587]

Describe the conversation you had with your employer about your mental health, including their reactions and what actions were taken to address your mental health issue/questions.     905
Describe the conversation with coworkers you had about your mental health including their reactions.                                                                                    841
Describe the conversation your coworker had with you about their mental health (please do not use names).                                                                               819
Do you have medical coverage (private insurance or state-provided) that includes treatment of mental health disorders?                                                                 1004
Do you know local or online resources to seek help for a mental health issue?                                                                                                          1004
If you have been diagnosed or treated for a mental health di

In [20]:
# fill in some NaN to keep those columns
df.iloc[:,49].fillna("Did not answer",inplace=True)
df.iloc[:,-6].fillna("Did not answer",inplace=True)

In [21]:
# drop columns with over 50% NaN values
delete_list = df.isna().sum()[df.isna().sum() > 587]

for num in range(len(delete_list)):
    df.drop(columns = delete_list.index[num],inplace=True)

In [22]:
# take a peek at the columns that have over 25% NaN value
df.isna().sum()[df.isna().sum() > 300]

Briefly describe what you think the industry as a whole and/or employers could do to improve mental health support for employees.                                                                                                          384
Would you be willing to talk to one of us more extensively about your experiences with mental health issues in the tech industry? (Note that all interview responses would be used <em>anonymously</em> and only with your permission.)    419
What US state or territory do you live in?                                                                                                                                                                                                 365
What is your race?                                                                                                                                                                                                                         389
What US state or territory do you work in?  

In [23]:
# drop columns with over 25% NaN values that are deemed not essential
df.drop(columns=df.columns[-12],inplace=True)

In [24]:
# fillna for describing things to improve
df.iloc[:,-12].fillna("Did not answer",inplace=True)

In [25]:
# check my work
df.isna().sum()[df.isna().sum() > 300]

What US state or territory do you live in?    365
What is your race?                            389
What US state or territory do you work in?    356
dtype: int64

In [26]:
# cleaning race column
df["What is your race?"].fillna(df["Other.3"],inplace=True)

In [27]:
df["What is your race?"].groupby(df["What is your race?"]).count()

What is your race?
American Indian or Alaska Native                          1
Ashkenazi                                                 1
Asian                                                    22
Black or African American                                11
Did not answer                                          365
European American                                         1
Hispanic                                                  3
Hispanic or Latino                                        1
Hispanic, White                                           1
I am of the race of Adam, the first human.                1
I prefer not to answer                                   23
Indian                                                    1
Indo-Caribbean                                            1
Jewish                                                    1
Latina                                                    2
Latino                                                    2
Latinx               

In [28]:
# cleaning up the responses
hispanics = ["Hispanic","Hispanic or Latino","Latina","Latino","Latinx","mexican american "]
no_answer = ["Did not answer","I prefer not to answer","I am of the race of Adam, the first human."]
mixed = ["Mixed","More than one of the above","Hispanic, White","Mestizo"]
jewish = ["Jewish","Ashkenazi"]
caucasian = ["Caucasian","White","European American","My race is white, but my ethnicity is Latin American"]
caribbean = ["Caribbean","Indo-Caribbean","West Indian"]
asian = ["Asian","South Asian"]
aa = ["Afrcian American","Black or African American"]

race_list = [hispanics,no_answer,mixed,jewish,caucasian,caribbean,asian,aa]

def combine_info(my_list,column_name = "What is your race?"):
    for num,info in enumerate(my_list):
        if num > 0:
            df[column_name][df[column_name]==info] = my_list[0]

for race in race_list:
    combine_info(race,column_name = "What is your race?")

In [29]:
# checking my work
df["What is your race?"].groupby(df["What is your race?"]).count()

What is your race?
Afrcian American                     11
American Indian or Alaska Native      1
Asian                                24
Caribbean                             2
Caucasian                           702
Did not answer                      389
Hispanic                             10
Indian                                1
Jewish                                2
Middle Eastern                        1
Mixed                                29
Persian                               1
Name: What is your race?, dtype: int64

In [30]:
# dropping duplicate column
df.drop(columns="Other.3",inplace=True)

In [31]:
# Cleaning gender
df["What is your gender?"].isna().sum()

16

In [32]:
df["What is your gender?"].fillna("Did not answer",inplace=True)

In [33]:
male = ["Male","Cis Male","Cis male","Cis-male","Cisgender male","M","MALE","cis hetero male","cis male",
        "cis male ","cis-male","dude","m","male","male (hey this is the tech industry you're talking about)",
        "male, born with xy chromosoms","male/androgynous","man","God King of the Valajar","Mail","Male ",
        "Male (cis)","Male, cis","SWM","Malel","Man","Ostensibly Male"]

female = ["Female","*shrug emoji* (F)","Cis female ","Cis woman","Cis-Female","Cisgendered woman","F",
          "F, cisgender","Female ","Female (cis) ","Female (cisgender)","I identify as female","Woman",
          "Woman-identified","cis female","cis-Female","cisgender female","f","femail","female",
          "female (cis)","female (cisgender)","femalw","woman","My sex is female."]

genderqueer = ["Genderqueer","Agender","Agender/genderfluid","Contextual","Female-ish","Demiguy",
               "Female/gender non-binary.","Genderfluid","Genderqueer demigirl","Genderqueer/non-binary",
               "Male (or female, or both)","Male-ish","NB","Non binary","Non-binary","Nonbinary",
               "Nonbinary/femme","She/her/they/them","gender non-conforming woman","genderfluid",
               "non binary","non-binary","nonbinary","uhhhhhhhhh fem genderqueer?","male/androgynous "]

transgender = ["Transgender","Trans female","Trans man","Trans woman","Transfeminine",
               "trans woman","transgender"]

other = ["Other","None","\-","none","sometimes"]

gender_list = [male,female,genderqueer,transgender,other]

for gender in gender_list:
    combine_info(gender,column_name = "What is your gender?")

In [34]:
df["What is your gender?"].groupby(df["What is your gender?"]).count()

What is your gender?
Did not answer     16
Female            344
Genderqueer        31
Male              771
Other               5
Transgender         6
Name: What is your gender?, dtype: int64

In [35]:
# Clean up # of employees
df["How many employees does your company or organization have?"].groupby(df["How many employees does your company or organization have?"]).count()

How many employees does your company or organization have?
1-5                25
100-500           284
26-100            198
500-1000           79
6-25              155
More than 1000    263
Name: How many employees does your company or organization have?, dtype: int64

In [36]:
df["How many employees does your company or organization have?"].fillna(0,inplace=True)

In [37]:
# checking my work
df["How many employees does your company or organization have?"].groupby(df["How many employees does your company or organization have?"]).count()

How many employees does your company or organization have?
0                 169
1-5                25
100-500           284
26-100            198
500-1000           79
6-25              155
More than 1000    263
Name: How many employees does your company or organization have?, dtype: int64

In [38]:
df.isna().sum()

id                                                                                                                                                                                 0
Are you self-employed?                                                                                                                                                             0
How many employees does your company or organization have?                                                                                                                         0
Is your employer primarily a tech company/organization?                                                                                                                          169
Is your primary role within your company related to tech/IT?                                                                                                                     169
Does your employer provide mental health benefits as part of healthcare coverage?              

In [39]:
# grabbing a list of participants who are self-employed
self_employed = df[df["How many employees does your company or organization have?"]==0].index.values

In [40]:
# check and see if # NaN = 169 are all from self-employed participants
my_list = df.isna().sum().index[df.isna().sum()==169]
index_list = list(my_list.values)

for i,j in enumerate(index_list):
    b = df[index_list[i]][df[index_list[i]].isna()==True].index.values
    print(np.bincount(self_employed == b))

# looks like all these are from the self-employed group

[  0 169]
[  0 169]
[  0 169]
[  0 169]
[  0 169]
[  0 169]
[  0 169]
[  0 169]
[  0 169]
[  0 169]
[  0 169]
[  0 169]
[  0 169]


In [41]:
# changing NaN values in columns with text data to NA
my_list = df.isna().sum().index[5:13]
column_list = list(my_list.values)
column_list.append(df.isna().sum().index[14])

for question in column_list:
    df.loc[(self_employed),column_list]="Not Applicable"

In [42]:
df.isna().sum()

id                                                                                                                                                                                 0
Are you self-employed?                                                                                                                                                             0
How many employees does your company or organization have?                                                                                                                         0
Is your employer primarily a tech company/organization?                                                                                                                          169
Is your primary role within your company related to tech/IT?                                                                                                                     169
Does your employer provide mental health benefits as part of healthcare coverage?              

In [43]:
def fillna_with_median(question = "What is your age?"):
    
    median = np.median(df[question][df[question].isna()==False])
    df[question].fillna(median,inplace=True)

In [44]:
fillna_with_median(question = "What is your age?")
fillna_with_median(question = "Overall, how well do you think the tech industry supports employees with mental health issues?")

In [45]:
rating_list = df.isna().sum().index[df.isna().sum().index.str.contains("Overall")].values

In [46]:
df[rating_list[3]].isna().sum()

143

In [47]:
df[df["What country do you live in?"].isna()==True]

Unnamed: 0,id,Are you self-employed?,How many employees does your company or organization have?,Is your employer primarily a tech company/organization?,Is your primary role within your company related to tech/IT?,Does your employer provide mental health benefits as part of healthcare coverage?,Do you know the options for mental health care available under your employer-provided health coverage?,"Has your employer ever formally discussed mental health (for example, as part of a wellness campaign or other official communication)?",Does your employer offer resources to learn more about mental health disorders and options for seeking help?,Is your anonymity protected if you choose to take advantage of mental health or substance abuse treatment resources provided by your employer?,"If a mental health issue prompted you to request a medical leave from work, how easy or difficult would it be to ask for that leave?",Would you feel more comfortable talking to your coworkers about your physical health or your mental health?,Would you feel comfortable discussing a mental health issue with your direct supervisor(s)?,Have you ever discussed your mental health with your employer?,Would you feel comfortable discussing a mental health issue with your coworkers?,Have you ever discussed your mental health with coworkers?,Have you ever had a coworker discuss their or another coworker's mental health with you?,"Overall, how much importance does your employer place on physical health?","Overall, how much importance does your employer place on mental health?",Do you have previous employers?,Was your employer primarily a tech company/organization?,Have your previous employers provided mental health benefits?,Were you aware of the options for mental health care provided by your previous employers?,Did your previous employers ever formally discuss mental health (as part of a wellness campaign or other official communication)?,Did your previous employers provide resources to learn more about mental health disorders and how to seek help?,Was your anonymity protected if you chose to take advantage of mental health or substance abuse treatment resources with previous employers?,Would you have felt more comfortable talking to your previous employer about your physical health or your mental health?,Would you have been willing to discuss your mental health with your direct supervisor(s)?,Did you ever discuss your mental health with your previous employer?,Would you have been willing to discuss your mental health with your coworkers at previous employers?,Did you ever discuss your mental health with a previous coworker(s)?,Did you ever have a previous coworker discuss their or another coworker's mental health with you?,"Overall, how much importance did your previous employer place on physical health?","Overall, how much importance did your previous employer place on mental health?",Do you currently have a mental health disorder?,Have you ever been diagnosed with a mental health disorder?,"Anxiety Disorder (Generalized, Social, Phobia, etc)","Mood Disorder (Depression, Bipolar Disorder, etc)","Psychotic Disorder (Schizophrenia, Schizoaffective, etc)","Eating Disorder (Anorexia, Bulimia, etc)",Attention Deficit Hyperactivity Disorder,"Personality Disorder (Borderline, Antisocial, Paranoid, etc)",Obsessive-Compulsive Disorder,Post-Traumatic Stress Disorder,Stress Response Syndromes,Dissociative Disorder,Substance Use Disorder,Addictive Disorder,Suicidal,Adjustment disorder,Autism Spectrum Disorder,Codependence,Cyclothymia,Gender Dysphoria,Multiple Sclerosis & Mental Health,Panic Disorder,Tourette's,Have you had a mental health disorder in the past?,Have you ever sought treatment for a mental health disorder from a mental health professional?,Do you have a family history of mental illness?,"If you have a mental health disorder, how often do you feel that it interferes with your work when being treated effectively?","If you have a mental health disorder, how often do you feel that it interferes with your work when NOT being treated effectively (i.e., when you are experiencing symptoms)?",Have your observations of how another individual who discussed a mental health issue made you less likely to reveal a mental health issue yourself in your current workplace?,How willing would you be to share with friends and family that you have a mental illness?,Would you be willing to bring up a physical health issue with a potential employer in an interview?,Why or why not?,Would you bring up your mental health with a potential employer in an interview?,Why or why not?.1,Are you openly identified at work as a person with a mental health issue?,"If they knew you suffered from a mental health disorder, how do you think that team members/co-workers would react?",Have you observed or experienced an unsupportive or badly handled response to a mental health issue in your current or previous workplace?,Have you observed or experienced supportive or well handled response to a mental health issue in your current or previous workplace?,"Overall, how well do you think the tech industry supports employees with mental health issues?",Briefly describe what you think the industry as a whole and/or employers could do to improve mental health support for employees.,What is your age?,What is your gender?,What country do you live in?,What US state or territory do you live in?,What is your race?,What country do you work in?,What US state or territory do you work in?,Start Date (UTC),Submit Date (UTC),Network ID
753,754,1,0,,,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,,Not Applicable,,,,,1,0.0,I don't know,N/A (was not aware),None did,None did,No,Same level of comfort for each,"No, none of my previous supervisors",0.0,"No, at none of my previous employers",0.0,0.0,1.0,1.0,Yes,Yes,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Yes,1,Yes,Rarely,Sometimes,Maybe,1,Maybe,ww,No,ww,,,,,3.0,Did not answer,34.0,Did not answer,,,Did not answer,,,2017-08-31 18:05:07,2017-08-31 18:06:56,bae691937c
755,756,1,0,,,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,,Not Applicable,,,,,1,1.0,"Yes, they all did",I was aware of some,Some did,None did,"Yes, always",Physical health,Some of my previous supervisors,1.0,Some of my previous employers,1.0,1.0,4.0,6.0,Yes,Yes,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Yes,1,Yes,Sometimes,Often,No,8,No,Lala,No,Off!,,,,,3.0,Did not answer,34.0,Did not answer,,,Did not answer,,,2017-08-31 13:40:57,2017-08-31 13:45:48,ebd922c723


In [48]:
df.loc[755,"What country do you live in?"]="Did not answer"

In [49]:
df.iloc[753,-8]="United States of America"
df.iloc[753,-7]="Indiana"
df.iloc[753,-5]="United States of America"
df.iloc[753,-4]="Indiana"

In [50]:
df["What US state or territory do you live in?"].fillna("NA",inplace=True)

In [51]:
df.drop(columns = df.columns[-5:-3],inplace=True)

In [52]:
df.head()

Unnamed: 0,id,Are you self-employed?,How many employees does your company or organization have?,Is your employer primarily a tech company/organization?,Is your primary role within your company related to tech/IT?,Does your employer provide mental health benefits as part of healthcare coverage?,Do you know the options for mental health care available under your employer-provided health coverage?,"Has your employer ever formally discussed mental health (for example, as part of a wellness campaign or other official communication)?",Does your employer offer resources to learn more about mental health disorders and options for seeking help?,Is your anonymity protected if you choose to take advantage of mental health or substance abuse treatment resources provided by your employer?,"If a mental health issue prompted you to request a medical leave from work, how easy or difficult would it be to ask for that leave?",Would you feel more comfortable talking to your coworkers about your physical health or your mental health?,Would you feel comfortable discussing a mental health issue with your direct supervisor(s)?,Have you ever discussed your mental health with your employer?,Would you feel comfortable discussing a mental health issue with your coworkers?,Have you ever discussed your mental health with coworkers?,Have you ever had a coworker discuss their or another coworker's mental health with you?,"Overall, how much importance does your employer place on physical health?","Overall, how much importance does your employer place on mental health?",Do you have previous employers?,Was your employer primarily a tech company/organization?,Have your previous employers provided mental health benefits?,Were you aware of the options for mental health care provided by your previous employers?,Did your previous employers ever formally discuss mental health (as part of a wellness campaign or other official communication)?,Did your previous employers provide resources to learn more about mental health disorders and how to seek help?,Was your anonymity protected if you chose to take advantage of mental health or substance abuse treatment resources with previous employers?,Would you have felt more comfortable talking to your previous employer about your physical health or your mental health?,Would you have been willing to discuss your mental health with your direct supervisor(s)?,Did you ever discuss your mental health with your previous employer?,Would you have been willing to discuss your mental health with your coworkers at previous employers?,Did you ever discuss your mental health with a previous coworker(s)?,Did you ever have a previous coworker discuss their or another coworker's mental health with you?,"Overall, how much importance did your previous employer place on physical health?","Overall, how much importance did your previous employer place on mental health?",Do you currently have a mental health disorder?,Have you ever been diagnosed with a mental health disorder?,"Anxiety Disorder (Generalized, Social, Phobia, etc)","Mood Disorder (Depression, Bipolar Disorder, etc)","Psychotic Disorder (Schizophrenia, Schizoaffective, etc)","Eating Disorder (Anorexia, Bulimia, etc)",Attention Deficit Hyperactivity Disorder,"Personality Disorder (Borderline, Antisocial, Paranoid, etc)",Obsessive-Compulsive Disorder,Post-Traumatic Stress Disorder,Stress Response Syndromes,Dissociative Disorder,Substance Use Disorder,Addictive Disorder,Suicidal,Adjustment disorder,Autism Spectrum Disorder,Codependence,Cyclothymia,Gender Dysphoria,Multiple Sclerosis & Mental Health,Panic Disorder,Tourette's,Have you had a mental health disorder in the past?,Have you ever sought treatment for a mental health disorder from a mental health professional?,Do you have a family history of mental illness?,"If you have a mental health disorder, how often do you feel that it interferes with your work when being treated effectively?","If you have a mental health disorder, how often do you feel that it interferes with your work when NOT being treated effectively (i.e., when you are experiencing symptoms)?",Have your observations of how another individual who discussed a mental health issue made you less likely to reveal a mental health issue yourself in your current workplace?,How willing would you be to share with friends and family that you have a mental illness?,Would you be willing to bring up a physical health issue with a potential employer in an interview?,Why or why not?,Would you bring up your mental health with a potential employer in an interview?,Why or why not?.1,Are you openly identified at work as a person with a mental health issue?,"If they knew you suffered from a mental health disorder, how do you think that team members/co-workers would react?",Have you observed or experienced an unsupportive or badly handled response to a mental health issue in your current or previous workplace?,Have you observed or experienced supportive or well handled response to a mental health issue in your current or previous workplace?,"Overall, how well do you think the tech industry supports employees with mental health issues?",Briefly describe what you think the industry as a whole and/or employers could do to improve mental health support for employees.,What is your age?,What is your gender?,What country do you live in?,What US state or territory do you live in?,What is your race?,Start Date (UTC),Submit Date (UTC),Network ID
0,1,0,100-500,1.0,1.0,No,Yes,No,I don't know,I don't know,I don't know,Same level of comfort for each,Yes,0.0,Yes,1.0,1.0,6.0,0.0,1,0.0,I don't know,N/A (was not aware),Some did,Some did,"Yes, always",Physical health,"Yes, all of my previous supervisors",0.0,"No, at none of my previous employers",0.0,0.0,3.0,3.0,Possibly,Did not answer,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Possibly,1,No,Sometimes,Sometimes,No,5,Yes,,No,I'd be worried they wouldn't hire me,0.0,10.0,"Yes, I experienced","Yes, I experienced",1.0,They don't take it seriously,27.0,Female,United Kingdom,,Did not answer,2018-05-16 12:32:04,2018-05-16 12:42:40,464b7a12f1
1,2,0,100-500,1.0,1.0,Yes,Yes,No,No,I don't know,I don't know,Same level of comfort for each,Maybe,0.0,Yes,1.0,1.0,7.0,2.0,1,1.0,Some did,I was aware of some,None did,None did,I don't know,Physical health,"No, none of my previous supervisors",0.0,At some of my previous employers,1.0,0.0,5.0,2.0,Possibly,Did not answer,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Possibly,0,No,Not applicable to me,Sometimes,No,4,Yes,it may require specific measures to accomodate...,No,mental health issues are stigmatised and misun...,0.0,6.0,"Yes, I observed",Maybe/Not sure,2.0,"raise awareness, talk about it to lessen the s...",31.0,Male,United Kingdom,,Did not answer,2018-05-16 12:31:13,2018-05-16 12:40:40,464b7a12f1
2,3,0,6-25,1.0,1.0,I don't know,No,I don't know,No,Yes,Difficult,Same level of comfort for each,Yes,1.0,Maybe,1.0,0.0,0.0,1.0,1,1.0,Some did,N/A (was not aware),None did,None did,I don't know,Physical health,"No, none of my previous supervisors",0.0,At some of my previous employers,1.0,0.0,8.0,0.0,Yes,Yes,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,Yes,1,Yes,Sometimes,Sometimes,Yes,5,Maybe,I will sometimes bring up my psoriasis just as...,No,stigma,1.0,5.0,"Yes, I experienced","Yes, I experienced",1.0,"Education and awareness, statistics, add suppo...",36.0,Male,United States of America,Missouri,Caucasian,2018-05-09 05:34:05,2018-05-09 05:46:04,1eb7e0cb94
3,4,0,More than 1000,1.0,1.0,Yes,Yes,I don't know,I don't know,Yes,Difficult,Same level of comfort for each,Yes,1.0,Yes,1.0,0.0,7.0,5.0,0,,,,,,,,,,,,,,,Yes,Yes,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,No,1,I don't know,Sometimes,Often,No,10,No,Anything that may hurt my chances to be hired ...,No,Might hurt my chances,0.0,5.0,Maybe/Not sure,Maybe/Not sure,2.0,"More support, less burnout and death marches",22.0,Male,United States of America,Washington,Caucasian,2018-05-04 23:19:14,2018-05-04 23:23:23,63852edbc4
4,5,1,0,,,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,,Not Applicable,,,,,1,0.0,"No, none did",N/A (none offered),None did,None did,"Yes, always",Same level of comfort for each,"No, none of my previous supervisors",0.0,"No, at none of my previous employers",1.0,1.0,8.0,8.0,No,Did not answer,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Yes,1,Yes,Often,Sometimes,No,10,Maybe,It depends. it's not something you start with ...,No,Don't think it's connected to the job. You do ...,0.0,4.0,No,"Yes, I observed",1.0,I think tech is more internal and they don't r...,52.0,Female,United States of America,Illinois,Mixed,2018-05-03 00:40:24,2018-05-03 00:53:20,43237889f1


In [53]:
my_list = df.isna().sum()[df.isna().sum() > 1].index
positions = [0,1,3,4,5,8,-3,-4,-7,-8,-11,-12,-14]

for i in positions:
    df.loc[:,my_list[i]].fillna(-1,inplace=True)

In [54]:
df.isna().sum()[df.isna().sum() > 1]

Do you know the options for mental health care available under your employer-provided health coverage?                                          104
Overall, how much importance does your employer place on physical health?                                                                       169
Overall, how much importance does your employer place on mental health?                                                                         169
Have your previous employers provided mental health benefits?                                                                                   143
Were you aware of the options for mental health care provided by your previous employers?                                                       143
Did your previous employers ever formally discuss mental health (as part of a wellness campaign or other official communication)?               143
Did your previous employers provide resources to learn more about mental health disorders and how to seek help? 

In [55]:
df[rating_list[3]].isna().sum()

143

In [56]:
original = {}
final = {}
train = {}
test = {}

for num in range(len(rating_list)-1):
    original[num] = df.loc[:,(rating_list[num],"What is your gender?",
                       "What country do you live in?","What is your race?")]
    # maybe add age back in later too...?
    
    dummies = pd.get_dummies(original[num].iloc[:,1:])
    final[num] = pd.concat([original[num].iloc[:,0],dummies],axis=1)

    train[num] = final[num][final[num].iloc[:,0].isna()==False]
    test[num] = final[num][final[num].iloc[:,0].isna()==True]

#overall industry ratings need to be dealt with separately

In [57]:
# from sklearn import pipeline
# from sklearn.model_selection import GridSearchCV
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.tree import DecisionTreeClassifier
# from xgboost import XGBClassifier

# for num in range(len(rating_list)-1):
#     x = train[num].iloc[:,1:]
#     y = train[num].iloc[:,0]
#     x_test = test[num].iloc[:,1:]

#     estimators = [('model', DecisionTreeClassifier())]

#     pipe = pipeline.Pipeline(estimators)

#     param_grid = [{'model': [DecisionTreeClassifier()]},
#                   {'model': [RandomForestClassifier()]},
#                   {'model': [XGBClassifier()]}]

#     grid = GridSearchCV(pipe, param_grid, cv=5)
#     grid_search = grid.fit(x, y)
#     print(num,grid_search.best_estimator_)

In [58]:
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
for num in range(len(rating_list)-1):
    x = train[num].iloc[:,1:]
    y = train[num].iloc[:,0]
    x_test = test[num].iloc[:,1:]

    if num == 0:
        dt = DecisionTreeClassifier()
        # 5-fold cross-validated to be the best one out of the box

        dt.fit(x,y)
        results = dt.predict(x_test)
    
    else:
        xgb = XGBClassifier()
        # 5-fold cross-validated to be the best one out of the box

        xgb.fit(x,y)
        results = xgb.predict(x_test)
    
    values = df.loc[:,rating_list[num]][df[rating_list[num]].isna()==True].index.values

    for position, value in enumerate(values):
        df.loc[value,rating_list[num]] = results[position]

In [59]:
column_list = df.isna().sum()[df.isna().sum() > 1].index

In [60]:
for column in column_list:
    df.loc[:,column].fillna("Did not answer",inplace=True)

In [61]:
df.isna().sum()

id                                                                                                                                                                               0
Are you self-employed?                                                                                                                                                           0
How many employees does your company or organization have?                                                                                                                       0
Is your employer primarily a tech company/organization?                                                                                                                          0
Is your primary role within your company related to tech/IT?                                                                                                                     0
Does your employer provide mental health benefits as part of healthcare coverage?                        

In [62]:
df.iloc[:,36:57].columns

Index(['Anxiety Disorder (Generalized, Social, Phobia, etc)',
       'Mood Disorder (Depression, Bipolar Disorder, etc)',
       'Psychotic Disorder (Schizophrenia, Schizoaffective, etc)',
       'Eating Disorder (Anorexia, Bulimia, etc)',
       'Attention Deficit Hyperactivity Disorder',
       'Personality Disorder (Borderline, Antisocial, Paranoid, etc)',
       'Obsessive-Compulsive Disorder', 'Post-Traumatic Stress Disorder',
       'Stress Response Syndromes', 'Dissociative Disorder',
       'Substance Use Disorder', 'Addictive Disorder', 'Suicidal',
       'Adjustment disorder', 'Autism Spectrum Disorder', 'Codependence',
       'Cyclothymia', 'Gender Dysphoria', 'Multiple Sclerosis & Mental Health',
       'Panic Disorder', 'Tourette's'],
      dtype='object')

In [63]:
neuro = ["Attention Deficit Hyperactivity Disorder","Autism Spectrum Disorder","Tourette's"]
#Neurodevelopmental Disorders

adjust = ["Adjustment disorder","Stress Response Syndromes"]

substance = ["Substance Use Disorder","Addictive Disorder"]
#Substance-Related and Addictive Disorders

anxiety = ["Anxiety Disorder (Generalized, Social, Phobia, etc)","Panic Disorder"]

mood = ["Mood Disorder (Depression, Bipolar Disorder, etc)","Cyclothymia"]

other = ['Suicidal','Codependence','Gender Dysphoria', 'Multiple Sclerosis & Mental Health']
#Other

column_list = [neuro,adjust,substance,anxiety,mood,other]

for var in column_list:
    for num,column in enumerate(var):
        if num > 0:
            df.loc[:,var[0]] += df.loc[:,var[num]]
            df.drop(columns = var[num],inplace=True)

In [64]:
df.rename(columns = {"Attention Deficit Hyperactivity Disorder" : "Neurodevelopmental Disorders"},inplace=True)
df.rename(columns = {"Substance Use Disorder" : "Substance-Related and Addictive Disorders"},inplace=True)
df.rename(columns = {'Suicidal' : "Other"},inplace=True)

df.rename(columns = {"Anxiety Disorder (Generalized, Social, Phobia, etc)" : "Anxiety Disorder"},inplace=True)
df.rename(columns = {"Mood Disorder (Depression, Bipolar Disorder, etc)" : "Mood Disorder"},inplace=True)
df.rename(columns = {'Psychotic Disorder (Schizophrenia, Schizoaffective, etc)' : "Psychotic Disorder"},inplace=True)

df.rename(columns = {"Eating Disorder (Anorexia, Bulimia, etc)" : "Eating Disorder"},inplace=True)
df.rename(columns = {"Mood Disorder (Depression, Bipolar Disorder, etc)" : "Mood Disorder"},inplace=True)
df.rename(columns = {'Personality Disorder (Borderline, Antisocial, Paranoid, etc)' : "Personality Disorder"},inplace=True)

In [65]:
for num in range(36,48):
    df.iloc[:,num].replace(2,1,inplace=True)

In [66]:
df.loc[:,"Would you have been willing to discuss your mental health with your coworkers at previous employers?"]\
[df.loc[:,"Would you have been willing to discuss your mental health with your coworkers at previous employers?"]==\
 "At some of my previous employers"]="Some of my previous employers"

In [67]:
df.to_csv('saved_csv/df.csv')

In [68]:
df.head()

Unnamed: 0,id,Are you self-employed?,How many employees does your company or organization have?,Is your employer primarily a tech company/organization?,Is your primary role within your company related to tech/IT?,Does your employer provide mental health benefits as part of healthcare coverage?,Do you know the options for mental health care available under your employer-provided health coverage?,"Has your employer ever formally discussed mental health (for example, as part of a wellness campaign or other official communication)?",Does your employer offer resources to learn more about mental health disorders and options for seeking help?,Is your anonymity protected if you choose to take advantage of mental health or substance abuse treatment resources provided by your employer?,"If a mental health issue prompted you to request a medical leave from work, how easy or difficult would it be to ask for that leave?",Would you feel more comfortable talking to your coworkers about your physical health or your mental health?,Would you feel comfortable discussing a mental health issue with your direct supervisor(s)?,Have you ever discussed your mental health with your employer?,Would you feel comfortable discussing a mental health issue with your coworkers?,Have you ever discussed your mental health with coworkers?,Have you ever had a coworker discuss their or another coworker's mental health with you?,"Overall, how much importance does your employer place on physical health?","Overall, how much importance does your employer place on mental health?",Do you have previous employers?,Was your employer primarily a tech company/organization?,Have your previous employers provided mental health benefits?,Were you aware of the options for mental health care provided by your previous employers?,Did your previous employers ever formally discuss mental health (as part of a wellness campaign or other official communication)?,Did your previous employers provide resources to learn more about mental health disorders and how to seek help?,Was your anonymity protected if you chose to take advantage of mental health or substance abuse treatment resources with previous employers?,Would you have felt more comfortable talking to your previous employer about your physical health or your mental health?,Would you have been willing to discuss your mental health with your direct supervisor(s)?,Did you ever discuss your mental health with your previous employer?,Would you have been willing to discuss your mental health with your coworkers at previous employers?,Did you ever discuss your mental health with a previous coworker(s)?,Did you ever have a previous coworker discuss their or another coworker's mental health with you?,"Overall, how much importance did your previous employer place on physical health?","Overall, how much importance did your previous employer place on mental health?",Do you currently have a mental health disorder?,Have you ever been diagnosed with a mental health disorder?,Anxiety Disorder,Mood Disorder,Psychotic Disorder,Eating Disorder,Neurodevelopmental Disorders,Personality Disorder,Obsessive-Compulsive Disorder,Post-Traumatic Stress Disorder,Dissociative Disorder,Substance-Related and Addictive Disorders,Other,Adjustment disorder,Have you had a mental health disorder in the past?,Have you ever sought treatment for a mental health disorder from a mental health professional?,Do you have a family history of mental illness?,"If you have a mental health disorder, how often do you feel that it interferes with your work when being treated effectively?","If you have a mental health disorder, how often do you feel that it interferes with your work when NOT being treated effectively (i.e., when you are experiencing symptoms)?",Have your observations of how another individual who discussed a mental health issue made you less likely to reveal a mental health issue yourself in your current workplace?,How willing would you be to share with friends and family that you have a mental illness?,Would you be willing to bring up a physical health issue with a potential employer in an interview?,Why or why not?,Would you bring up your mental health with a potential employer in an interview?,Why or why not?.1,Are you openly identified at work as a person with a mental health issue?,"If they knew you suffered from a mental health disorder, how do you think that team members/co-workers would react?",Have you observed or experienced an unsupportive or badly handled response to a mental health issue in your current or previous workplace?,Have you observed or experienced supportive or well handled response to a mental health issue in your current or previous workplace?,"Overall, how well do you think the tech industry supports employees with mental health issues?",Briefly describe what you think the industry as a whole and/or employers could do to improve mental health support for employees.,What is your age?,What is your gender?,What country do you live in?,What US state or territory do you live in?,What is your race?,Start Date (UTC),Submit Date (UTC),Network ID
0,1,0,100-500,1.0,1.0,No,Yes,No,I don't know,I don't know,I don't know,Same level of comfort for each,Yes,0.0,Yes,1.0,1.0,6.0,0.0,1,0.0,I don't know,N/A (was not aware),Some did,Some did,"Yes, always",Physical health,"Yes, all of my previous supervisors",0.0,"No, at none of my previous employers",0.0,0.0,3.0,3.0,Possibly,Did not answer,1,0,0,0,0,0,0,0,0,0,0,0,Possibly,1,No,Sometimes,Sometimes,No,5,Yes,Did not answer,No,I'd be worried they wouldn't hire me,0.0,10.0,"Yes, I experienced","Yes, I experienced",1.0,They don't take it seriously,27.0,Female,United Kingdom,,Did not answer,2018-05-16 12:32:04,2018-05-16 12:42:40,464b7a12f1
1,2,0,100-500,1.0,1.0,Yes,Yes,No,No,I don't know,I don't know,Same level of comfort for each,Maybe,0.0,Yes,1.0,1.0,7.0,2.0,1,1.0,Some did,I was aware of some,None did,None did,I don't know,Physical health,"No, none of my previous supervisors",0.0,Some of my previous employers,1.0,0.0,5.0,2.0,Possibly,Did not answer,0,1,0,0,0,0,0,0,0,0,0,0,Possibly,0,No,Not applicable to me,Sometimes,No,4,Yes,it may require specific measures to accomodate...,No,mental health issues are stigmatised and misun...,0.0,6.0,"Yes, I observed",Maybe/Not sure,2.0,"raise awareness, talk about it to lessen the s...",31.0,Male,United Kingdom,,Did not answer,2018-05-16 12:31:13,2018-05-16 12:40:40,464b7a12f1
2,3,0,6-25,1.0,1.0,I don't know,No,I don't know,No,Yes,Difficult,Same level of comfort for each,Yes,1.0,Maybe,1.0,0.0,0.0,1.0,1,1.0,Some did,N/A (was not aware),None did,None did,I don't know,Physical health,"No, none of my previous supervisors",0.0,Some of my previous employers,1.0,0.0,8.0,0.0,Yes,Yes,1,1,0,0,0,0,0,0,0,1,0,0,Yes,1,Yes,Sometimes,Sometimes,Yes,5,Maybe,I will sometimes bring up my psoriasis just as...,No,stigma,1.0,5.0,"Yes, I experienced","Yes, I experienced",1.0,"Education and awareness, statistics, add suppo...",36.0,Male,United States of America,Missouri,Caucasian,2018-05-09 05:34:05,2018-05-09 05:46:04,1eb7e0cb94
3,4,0,More than 1000,1.0,1.0,Yes,Yes,I don't know,I don't know,Yes,Difficult,Same level of comfort for each,Yes,1.0,Yes,1.0,0.0,7.0,5.0,0,-1.0,Did not answer,Did not answer,Did not answer,Did not answer,Did not answer,Did not answer,Did not answer,-1.0,Did not answer,-1.0,-1.0,5.0,5.0,Yes,Yes,0,0,0,0,1,0,0,0,0,0,0,0,No,1,I don't know,Sometimes,Often,No,10,No,Anything that may hurt my chances to be hired ...,No,Might hurt my chances,0.0,5.0,Maybe/Not sure,Maybe/Not sure,2.0,"More support, less burnout and death marches",22.0,Male,United States of America,Washington,Caucasian,2018-05-04 23:19:14,2018-05-04 23:23:23,63852edbc4
4,5,1,0,-1.0,-1.0,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,Not Applicable,-1.0,Not Applicable,-1.0,-1.0,5.0,5.0,1,0.0,"No, none did",N/A (none offered),None did,None did,"Yes, always",Same level of comfort for each,"No, none of my previous supervisors",0.0,"No, at none of my previous employers",1.0,1.0,8.0,8.0,No,Did not answer,0,0,0,0,0,0,0,0,0,0,0,0,Yes,1,Yes,Often,Sometimes,No,10,Maybe,It depends. it's not something you start with ...,No,Don't think it's connected to the job. You do ...,0.0,4.0,No,"Yes, I observed",1.0,I think tech is more internal and they don't r...,52.0,Female,United States of America,Illinois,Mixed,2018-05-03 00:40:24,2018-05-03 00:53:20,43237889f1
