## About the Survey
      Survey has been conducted in the technical workplace to measure the attitude towards mental illness and examine the frequency of health disorders ocuuring in work place.

In [None]:
library(ggplot2) # Data visualization
library(readr) # CSV file I/O, e.g. the read_csv function
library(tidyverse)
library(rpart)
library(wordcloud2)
library(tidytext)
library(gridExtra)




In [None]:
survey<-read.csv("../input/survey.csv")
head(survey,4)

In [None]:
options(scipen=999)
mytheme <- theme(axis.text.x=element_text(angle =90, size=8, vjust = 0.4),
                  plot.title=element_text(size=16, vjust = 2, family="Georgia",face = "bold", margin = margin(b = 20)),
                  axis.title.y = element_text(margin = margin(r = 20)),
                  axis.title.x =element_text(size=12, vjust = -0.35, margin = margin(t = 20)),
                  plot.background = element_rect(fill = "#EDEFF7"),
                  panel.background = element_rect(fill = "#EDEFF7" ),
                  legend.background = element_rect(fill = "#EDEFF7"),
                  legend.title = element_text(size = 10, family = "Arial", face = "bold"),
                  legend.text = element_text(size = 8, family = "Arial"),
                  panel.grid.major = element_line(size = 0.4, linetype = "solid", color = "#cccccc"),
                  panel.grid.minor = element_line(size = 0),
                  axis.ticks = element_blank(),
                  plot.margin = unit(c(0.5, 1, 1, 1), "cm")
)
colors = c("#2E0142", "#D33E4F", "#F46D43" ,"#FDAE61", "#FEE08B", "#e7fe8b","#bcfe8b","#8bfeb1","#8bc4fe","#8b96fe","#ad8bfe","#d98bfe","#fe8bd1","#fe8b96")

In [None]:

#survey$Timestamp<-ymd_hms(survey$Timestamp)
#summary(survey$Timestamp)


## Demographics
 Get the details of the respondents country, age, gender etc.

In [None]:
options(repr.plot.width=5, repr.plot.height=5)
percent <-function(col,tab=survey){
    tab %>% 
    filter_(!col=="")%>%
    group_by_(col)%>%
    summarise(tot=n())%>%
    mutate(percent=round(tot/sum(tot)*100))%>%
    arrange(desc(tot))
}
percent(col="Country")%>%filter(percent >0)%>%ggplot(aes(x=reorder(Country,percent),y=percent,fill=Country))+geom_bar(stat="identity") +
  theme(
    axis.title=element_blank(),
    
    panel.grid = element_blank(),
        legend.position = "none" )+ geom_text( aes(x=Country, y=percent, label=paste(percent,"%",sep="")), color="white", fontface="bold",vjust=0.7 )+scale_fill_manual(values=colors) +labs(title="Countrywise Respondents")+coord_flip()

In [None]:
summary(survey$Age)

**Age ** variable contains incorrect values where min is in negative and max of age was very very high.  Fix those values 

In [None]:
survey$Age[which(survey$Age<0)]<-20
survey$Age[which(survey$Age>100)]<-60
summary(survey$Age)
ggplot(survey,aes(x=Age))+geom_histogram(aes(y=..density..),fill="#62AB61")+geom_density(col="#3438BD",size=1)+mytheme+labs(x="Age",title="Distribution of Age")

## Employer 
Lets find out what respondents say about their employers, facitilities , options given to the employees.

In [None]:
survey %>% ggplot(aes(x=no_employees,fill=no_employees))+geom_bar(stat="count")+mytheme+labs(title="Companies Employee Count")+scale_fill_manual(values=colors)+theme(legend.position = "")

In [None]:
survey %>%select(no_employees,treatment)%>%group_by(no_employees,treatment)%>%summarise(c=n()) %>%ggplot(aes(x=no_employees,y=c,fill=treatment))+geom_bar(stat="identity",position="dodge")+mytheme+labs(title="Employee Counts vs Treatment",y="count")+scale_fill_manual(values=colors)

In [None]:
options(repr.plot.width=5, repr.plot.height=5)
percent(col="tech_company")%>%ggplot(aes(x="",y=percent,fill=tech_company)) + geom_bar(stat='identity',width = 1) + 
  coord_polar(theta="y") + theme_void() + theme(axis.text.x=element_blank(),legend.position='bottom') +scale_fill_manual(values=colors)+
  
  geom_text(aes(y=c(50,0.02),label = paste(tech_company,": ",percent," %")),col="white")+labs(title="Employed by Tech Company")

In [None]:
options(repr.plot.width=5, repr.plot.height=5)
library(packcircles)


data<-percent(col="benefits")

  
data<-data.frame(grp=paste(data$benefits,"\n ",data$percent," %"),value=data$percent)
packing <- circleProgressiveLayout(data$value, sizetype='area')
data <- cbind(data, packing)

dat.gg <- circleLayoutVertices(packing, npoints=100)

ggplot() + 
  geom_polygon(data = dat.gg, aes(x, y, group = id, fill=as.factor(id))) +scale_fill_manual(values= colors)+
  geom_text(data = data, aes(x, y, size=value, label = grp),col="white")+scale_size_continuous(range = c(3,8))+ theme_void()+theme(legend.position = "none")+labs(title="Does your employer provide mental health benefits?")+coord_equal()

data<-percent(col="wellness_program")

  
data<-data.frame(grp=paste(data$wellness_program,"\n ",data$percent," %"),value=data$percent)
packing <- circleProgressiveLayout(data$value, sizetype='area')
data <- cbind(data, packing)

dat.gg <- circleLayoutVertices(packing, npoints=100)

ggplot() + 
  geom_polygon(data = dat.gg, aes(x, y, group = id, fill=as.factor(id))) +scale_fill_manual(values= colors)+
  geom_text(data = data, aes(x, y, size=value, label = grp),col="white")+scale_size_continuous(range = c(3,8))+ theme_void()+theme(legend.position = "none")+labs(title="Has your employer ever discussed mental health \n as part of an employee wellness program?")+coord_equal()
  
  
  
  data<-percent(col="care_options")

  
data<-data.frame(grp=paste(data$care_options,"\n ",data$percent," %"),value=data$percent)
packing <- circleProgressiveLayout(data$value, sizetype='area')
data <- cbind(data, packing)

dat.gg <- circleLayoutVertices(packing, npoints=100)

ggplot() + 
  geom_polygon(data = dat.gg, aes(x, y, group = id, fill=as.factor(id))) +scale_fill_manual(values= colors)+
  geom_text(data = data, aes(x, y, size=value, label = grp),col="white")+scale_size_continuous(range = c(3,8))+ theme_void()+theme(legend.position = "none")+labs(title="Do you know the options for mental health \n care your employer provides?")+coord_equal()

In [None]:
options(repr.plot.width=10, repr.plot.height=5)
f1<-survey %>%select(self_employed)%>%filter(!is.na(self_employed))%>%group_by(self_employed)%>%summarise(se_tot=n())%>%mutate(se_percent=round(se_tot/sum(se_tot)*100,2))%>%
ggplot(aes(x="",y=se_percent,fill=self_employed)) + geom_bar(stat='identity',width = 1) + 
  coord_polar(theta="y") + theme_void() + theme(axis.text.x=element_blank(),legend.position='bottom') +scale_fill_manual(values=colors)+
  
  geom_text(aes(y=c(40,9),label = paste(self_employed,": ",se_percent," %")),col="white")+labs(title="Are you Self Employed?")


f2<-survey %>%select(family_history)%>%filter(!is.na(family_history))%>%group_by(family_history)%>%summarise(se_tot=n())%>%mutate(se_percent=round(se_tot/sum(se_tot)*100,2))%>%
ggplot(aes(x="",y=se_percent,fill=family_history)) + geom_bar(stat='identity',width = 1) + 
  coord_polar(theta="y") + theme_void() + theme(axis.text.x=element_blank(),legend.position='bottom') +scale_fill_manual(values=colors)+
  
  geom_text(aes(y=c(50,10),label = paste(family_history,": ",se_percent," %")),col="white")+labs(title="Do you have a family \n history of mental illness?")
grid.arrange(f1,f2,nrow=1,ncol=2)

In [None]:
options(repr.plot.width=5, repr.plot.height=5)
survey %>% ggplot(aes(x=treatment,fill=treatment))+geom_bar(stat="count")+mytheme+labs(title="Employees Underwent Treatment?")+scale_fill_manual(values=colors)+theme(legend.position = "")

### Mental/Physical Health Issue will have negative consequence?

In [None]:
options(repr.plot.width=10, repr.plot.height=4)

p1<-percent(col="mental_health_consequence")%>%ggplot(aes(x=reorder(mental_health_consequence,percent),y=percent,fill=mental_health_consequence))+geom_bar(stat="identity") +
  theme(
    axis.title=element_blank(),
    
    panel.grid = element_blank(),
        legend.position = "none" )+ geom_text( aes(x=mental_health_consequence, y=percent, label=paste(percent,"%",sep=" - ")), color="white", fontface="bold",vjust=0.9 ) +
scale_fill_manual(values=colors)+labs(title="Do you think that discussing a mental health issue with \n your employer  would have negative consequences?")


p2<-percent(col="phys_health_consequence")%>%ggplot(aes(x=reorder(phys_health_consequence,percent),y=percent,fill=phys_health_consequence))+geom_bar(stat="identity") +
  theme(
    axis.title=element_blank(),
    
    panel.grid = element_blank(),
        legend.position = "none" )+ geom_text( aes(x=phys_health_consequence, y=percent, label=paste(percent,"%",sep=" - ")), color="white", fontface="bold",vjust=0.9 ) +scale_fill_manual(values=colors)+labs(title="Do you think that discussing a physical health issue with \n your employer  would have negative consequences?")
grid.arrange(p1,p2,nrow=1,ncol=2)

In [None]:
options(repr.plot.width=5, repr.plot.height=4)
percent(col="seek_help")%>%ggplot(aes(x=reorder(seek_help,percent),y=percent,fill=seek_help))+geom_bar(stat="identity") +
  theme(
    axis.title=element_blank(),
    
    panel.grid = element_blank(),
        legend.position = "none" )+ geom_text( aes(x=seek_help, y=percent, label=paste(percent,"%",sep=" - ")), color="white", fontface="bold",vjust=0.9 ) +scale_fill_manual(values=colors)+labs(title="Does your employer provide resources to learn more about \n mental health issues  and how to seek help?")

In [None]:
percent(col="coworkers")%>%ggplot(aes(x=reorder(coworkers,percent),y=percent,fill=coworkers))+geom_bar(stat="identity") +
  theme(
    axis.title=element_blank(),
    
    panel.grid = element_blank(),
        legend.position = "none" )+ geom_text( aes(x=coworkers, y=percent, label=paste(percent,"%",sep=" - ")), color="white", fontface="bold",vjust=0.9 ) +scale_fill_manual(values=colors)+labs(title="Would you be willing to discuss a mental health issue with your coworkers?")

### Wordcloud on Comments given by respondents

In [None]:
survey$comments<-as.character(survey$comments)

tidy_comments<-survey %>%unnest_tokens(word,comments)%>%anti_join(stop_words,by="word")
wrd<-tidy_comments %>%count(word,sort=TRUE)%>%filter(!is.na(word))
wordcloud2(wrd)