<a href="https://colab.research.google.com/github/osisamkay/employee_wellbeing/blob/main/Capstone_project_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ***Project Title: "Impact of Remote Work on Mental Health and Well-Being"***




---


# **Objective**

To analyze the effects of remote work on employees' mental health, including stress levels, work-life balance, and job satisfaction. The goal is to provide insights for employers on the benefits and challenges of remote work and to identify factors that may mitigate mental health risks.

---







# **1. Data Collection and Loading**

---



In [None]:
import pandas as pd
import numpy as np
from bokeh.io import output_notebook, show
from bokeh.plotting import figure,gridplot
from bokeh.models import ColumnDataSource,HoverTool,FactorRange
from bokeh.transform import factor_cmap,dodge
from bokeh.palettes import Viridis256,Viridis,Category10,Category20
from ipywidgets import widgets, interact

output_notebook()

df = pd.read_csv('Impact_of_Remote_Work_on_Mental_Health.csv')

df.head()


Unnamed: 0,Employee_ID,Age,Gender,Job_Role,Industry,Years_of_Experience,Work_Location,Hours_Worked_Per_Week,Number_of_Virtual_Meetings,Work_Life_Balance_Rating,Stress_Level,Mental_Health_Condition,Access_to_Mental_Health_Resources,Productivity_Change,Social_Isolation_Rating,Satisfaction_with_Remote_Work,Company_Support_for_Remote_Work,Physical_Activity,Sleep_Quality,Region
0,EMP0001,32,Non-binary,HR,Healthcare,13,Hybrid,47,7,2,Medium,Depression,No,Decrease,1,Unsatisfied,1,Weekly,Good,Europe
1,EMP0002,40,Female,Data Scientist,IT,3,Remote,52,4,1,Medium,Anxiety,No,Increase,3,Satisfied,2,Weekly,Good,Asia
2,EMP0003,59,Non-binary,Software Engineer,Education,22,Hybrid,46,11,5,Medium,Anxiety,No,No Change,4,Unsatisfied,5,,Poor,North America
3,EMP0004,27,Male,Software Engineer,Finance,20,Onsite,32,8,4,High,Depression,Yes,Increase,3,Unsatisfied,3,,Poor,Europe
4,EMP0005,49,Male,Sales,Consulting,32,Onsite,35,12,2,High,,Yes,Decrease,3,Unsatisfied,3,Weekly,Average,North America


In [None]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 20 columns):
 #   Column                             Non-Null Count  Dtype 
---  ------                             --------------  ----- 
 0   Employee_ID                        5000 non-null   object
 1   Age                                5000 non-null   int64 
 2   Gender                             5000 non-null   object
 3   Job_Role                           5000 non-null   object
 4   Industry                           5000 non-null   object
 5   Years_of_Experience                5000 non-null   int64 
 6   Work_Location                      5000 non-null   object
 7   Hours_Worked_Per_Week              5000 non-null   int64 
 8   Number_of_Virtual_Meetings         5000 non-null   int64 
 9   Work_Life_Balance_Rating           5000 non-null   int64 
 10  Stress_Level                       5000 non-null   object
 11  Mental_Health_Condition            3804 non-null   object
 12  Access



---

# 2. Data Cleaning and Preprocessing

---





In [None]:
#check for null vaulues
df.isnull().sum()

Unnamed: 0,0
Employee_ID,0
Age,0
Gender,0
Job_Role,0
Industry,0
Years_of_Experience,0
Work_Location,0
Hours_Worked_Per_Week,0
Number_of_Virtual_Meetings,0
Work_Life_Balance_Rating,0


In [None]:

# Mapping Stress_Level and Sleep_Quality to numerical values for easier analysis
stress_mapping = {'Low': 1, 'Medium': 2, 'High': 3}
sleep_mapping = {'Poor': 1, 'Average': 2, 'Good': 3}

df['Stress_Level_Numeric'] = df['Stress_Level'].map(stress_mapping)
df['Sleep_Quality_Numeric'] = df['Sleep_Quality'].map(sleep_mapping)

# Clean data by removing rows with missing values in key columns
df_cleaned = df.dropna()
# df_cleaned = df.dropna(subset=['Hours_Worked_Per_Week', 'Stress_Level_Numeric', 'Work_Life_Balance_Rating'])?

df_cleaned.isnull().sum()


Unnamed: 0,0
Employee_ID,0
Age,0
Gender,0
Job_Role,0
Industry,0
Years_of_Experience,0
Work_Location,0
Hours_Worked_Per_Week,0
Number_of_Virtual_Meetings,0
Work_Life_Balance_Rating,0


In [None]:
df_cleaned.duplicated().sum() # check for duplicate entries

0



---

# **3. Exploratory Data Analysi**s






## **Stress Level count  vs hours woked per week**

In [None]:
# Bin 'Hours_Worked_Per_Week' into intervals
df_cleaned['Hours_Worked_Per_Week_Binned'] = pd.cut(
    df_cleaned['Hours_Worked_Per_Week'], bins=[0, 20, 30, 40, 50, 60],
    labels=['0-20', '21-30', '31-40', '41-50', '51-60']
)

# Count occurrences of each stress level in each binned hours group
stress_counts = df_cleaned.groupby(['Hours_Worked_Per_Week_Binned', 'Stress_Level'], observed=False).size().reset_index(name='Count')

# Define unique categories for x-axis and stress levels for grouping
hours_worked_factors = stress_counts['Hours_Worked_Per_Week_Binned'].astype(str).unique().tolist()
stress_levels = ['Low', 'Medium', 'High']
colors = Viridis[3]

# Set up the Bokeh plot
stress_count_vs_hour_worked_plt = figure(x_range=hours_worked_factors, height=400, width=900,
             title="Stress Level Counts by Hours Worked Per Week",
             x_axis_label="Hours Worked Per Week",
             y_axis_label="Count",
            )

# Add separate bars for each stress level using dodge
for idx, level in enumerate(stress_levels):
    level_data = stress_counts[stress_counts['Stress_Level'] == level]
    level_source = ColumnDataSource(level_data)

    stress_count_vs_hour_worked_plt.vbar(
        x=dodge('Hours_Worked_Per_Week_Binned', -0.25 + idx * 0.25, range=stress_count_vs_hour_worked_plt.x_range),
        top='Count', width=0.2, source=level_source,
        color=colors[idx], legend_label=level
    )

# Add HoverTool for interactive tooltips
hover = HoverTool(tooltips=[("Hours Worked Range", "@Hours_Worked_Per_Week_Binned"),
                            ("Stress Level", "@Stress_Level"),
                            ("Count", "@Count")])
stress_count_vs_hour_worked_plt.add_tools(hover)

# Customize legend and axis
stress_count_vs_hour_worked_plt.legend.title = "Stress Level"
stress_count_vs_hour_worked_plt.legend.location = "top_left"
stress_count_vs_hour_worked_plt.legend.orientation = "vertical"
stress_count_vs_hour_worked_plt.xgrid.grid_line_color = None
stress_count_vs_hour_worked_plt.legend.location = "top_right"
stress_count_vs_hour_worked_plt.add_layout(stress_count_vs_hour_worked_plt.legend[0], 'right')

# Show the plot
show(stress_count_vs_hour_worked_plt)





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned['Hours_Worked_Per_Week_Binned'] = pd.cut(


This bar chart shows the relationship between hours worked per week and stress levels (low, medium, and high).

### Simple Analysis

- **Fewer Hours, Lower Stress**: People who work fewer hours (0-20 per week) mostly have low stress, with very few experiencing medium or high stress. This suggests that shorter work hours are associated with lower stress levels.
- **21-30 Hours: Mostly Medium Stress**: In the 21-30 hours range, the majority experience medium stress, with high stress close behind. This suggests that as work hours increase a bit, stress levels also start to rise.
- **31-60 Hours: Consistent High Stress**: For those working between 31-60 hours per week, high stress is common across all these ranges. In fact, high stress (yellow) is almost as frequent as low and medium stress, especially in the 41-60 hour range, where stress levels become more evenly distributed.
- **51-60 Hours: Increase in High Stress**: In the highest hours category (51-60), high stress becomes particularly common, slightly exceeding medium and low stress levels.

### Key Takeaways

1. **Longer Hours, Higher Stress**: As people work more hours, stress levels generally increase, with high stress being especially prevalent from 31 hours per week onward.
2. **Moderate Hours Still Have High Stress**: Even working a moderate number of hours (21-30) doesn’t eliminate high stress, though medium stress is most common here.
3. **Low Hours, Low Stress**: Working fewer hours per week (0-20) is most associated with low stress.

In summary, the chart shows that working more hours is closely linked with higher stress levels.

## Stress Level by Work-Life Balance Rating

In [None]:
df_cleaned['Stress_Level'] = df_cleaned['Stress_Level'].astype(str)

# Group data by Work-Life Balance Rating and Stress Level to get counts
balance_vs_stress_counts = df_cleaned.groupby(['Work_Life_Balance_Rating', 'Stress_Level']).size().reset_index(name='Count')

# Sort Work_Life_Balance_Rating in ascending order for the x-axis
balance_vs_stress_counts = balance_vs_stress_counts.sort_values(by='Work_Life_Balance_Rating')
balance_ratings = sorted(df_cleaned['Work_Life_Balance_Rating'].unique())

# Define the color palette for each stress level
palette = Category20[3]

# Create the Bokeh plot with numeric x-axis range
balance_vs_stress_plt = figure(x_range=(min(balance_ratings)-0.5, max(balance_ratings)+0.5),  # Add padding for visibility
              height=400, width=800, title="Stress Level Counts by Work-Life Balance Rating")

# Plot each Stress_Level as separate bars
for idx, level in enumerate(['Low', 'Medium', 'High']):
    # Filter the data for the current stress level
    level_data = balance_vs_stress_counts[balance_vs_stress_counts['Stress_Level'] == level]
    source = ColumnDataSource(level_data)

    # Use dodge to position bars within each Work_Life_Balance_Rating
    balance_vs_stress_plt.vbar(
        x=dodge('Work_Life_Balance_Rating', -0.25 + idx * 0.25, range=balance_vs_stress_plt.x_range),  # Position bars within the category
        top='Count', width=0.2, source=source,
        color=palette[idx], legend_label=level
    )

# Customize plot appearance
balance_vs_stress_plt.xaxis.axis_label = "Work-Life Balance Rating"
balance_vs_stress_plt.yaxis.axis_label = "Count"
balance_vs_stress_plt.xgrid.grid_line_color = None
balance_vs_stress_plt.legend.title = "Stress Level"
balance_vs_stress_plt.legend.location = "top_right"
balance_vs_stress_plt.add_layout(balance_vs_stress_plt.legend[0], 'right')

# Add interactive hover tool
hover = HoverTool(tooltips=[
    ("Work-Life Balance Rating", "@Work_Life_Balance_Rating"),
    ("Stress Level", "@Stress_Level"),
    ("Count", "@Count")
])
balance_vs_stress_plt.add_tools(hover)

# Show the plot
show(balance_vs_stress_plt)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned['Stress_Level'] = df_cleaned['Stress_Level'].astype(str)


This bar chart shows how different levels of stress (low, medium, and high) relate to people's work-life balance ratings (from 1 to 5).

### Summary in Simple Terms

- **Higher Balance, Lower Stress**: Generally, people with a higher work-life balance rating (4 and 5) experience less stress.
- **Moderate Balance, High Stress**: Interestingly, people with a middle rating of 3 seem to have the highest counts of high stress, suggesting that a "moderate" balance might be stressful for some.
- **High Stress Exists at All Levels**: High stress shows up at every balance level, which means that even a good work-life balance doesn’t guarantee low stress.

### Key Takeaways

1. **Better balance helps** lower stress for many people, but it’s not a perfect solution.
2. **A middle level of balance** (rating of 3) might actually be the most stressful for some reason.
3. **Everyone experiences stress**, no matter their balance level, though it tends to be less for people with higher balance ratings.

In short, the chart shows that improving work-life balance can reduce stress, but stress affects everyone to some extent, regardless of balance.

## **Productivity based on work location**

In [None]:
# Prepare the data by counting occurrences
count_data = df.groupby(['Work_Location', 'Productivity_Change']).size().reset_index(name='Count')

# Define factors for the x-axis
work_location_factors = list(df['Work_Location'].unique())
productivity_change_factors = list(df['Productivity_Change'].unique())
factors = [(wl, pc) for wl in work_location_factors for pc in productivity_change_factors]

# Create a ColumnDataSource with x-axis factors and counts
source = ColumnDataSource(data=dict(
    x=[(wl, pc) for wl, pc in zip(count_data['Work_Location'], count_data['Productivity_Change'])],
    counts=count_data['Count'],
    Productivity_Change=count_data['Productivity_Change']
))

# Create a color palette and color map
palette = ['green', 'red', 'blue']
color_map = factor_cmap('x', palette=palette, factors=productivity_change_factors, start=1, end=2)

# Create the Bokeh plot with tooltips and legend
location_vs_productivity_plot = figure(x_range=FactorRange(*factors),
           height=400,
           width=900,
           title="Work Location vs. Productivity Change",
           )

# Add bars with color mapping and legend field
location_vs_productivity_plot.vbar(x='x', top='counts',
       width=0.8,
       source=source,
       fill_color=color_map,
       legend_field="Productivity_Change")

#Add tooltips
hover = HoverTool(tooltips=[ ("Productivity Change", "@Productivity_Change"), ("Count", "@counts")])
location_vs_productivity_plot.add_tools(hover)

# Customize plot
location_vs_productivity_plot.xaxis.axis_label = "Work Location"
location_vs_productivity_plot.yaxis.axis_label = "Count"
location_vs_productivity_plot.xgrid.grid_line_color = None
location_vs_productivity_plot.legend.location = "bottom"
location_vs_productivity_plot.legend.title = "Productivity Change"
location_vs_productivity_plot.legend.orientation = "vertical"
location_vs_productivity_plot.add_layout(location_vs_productivity_plot.legend[0], 'right')

# Show plot
show(location_vs_productivity_plot)


This bar chart compares productivity changes (Decrease, Increase, No Change) across different work locations (Hybrid, Remote, and Onsite).

### Simple Analysis

1. **Hybrid Work**:
   - **Decrease in Productivity**: The majority of employees working in a hybrid setup experience a decrease in productivity (green).
   - **Increase and No Change**: The numbers for productivity increase (red) and no change (blue) are fairly balanced but both are lower than the decrease category.
   - This suggests that hybrid work might not be as productive for most people in this sample.

2. **Remote Work**:
   - **Decrease in Productivity**: Similar to hybrid, a high count of remote workers report a decrease in productivity.
   - **Increase and No Change**: Both are present but fewer than those reporting a decrease, with no change (blue) being the least.
   - This implies that remote work is also associated with lower productivity for many in this group.

3. **Onsite Work**:
   - **Decrease in Productivity**: There’s also a high count for decreased productivity (green) for onsite workers, similar to hybrid and remote.
   - **Balanced Increase and No Change**: Both increase (red) and no change (blue) counts are similar, but lower than the decrease.
   - This suggests that even onsite work has a high rate of productivity decrease among workers.

### Key Takeaways

1. **Productivity Decrease Across All Locations**: Most people report decreased productivity across all three work setups (Hybrid, Remote, Onsite).
2. **Limited Productivity Increase**: Very few report an increase in productivity, especially in remote and onsite settings.
3. **Similar Patterns Across Locations**: The pattern of productivity change is fairly consistent regardless of work location.

In summary, across all work locations, decreased productivity is the most common outcome, suggesting that productivity challenges are present regardless of whether employees work in hybrid, remote, or onsite setups.

# **Analysis on Mental health**

In [None]:
from bokeh.io import show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool
import pandas as pd
import numpy as np

# Calculate counts for each Mental Health Condition
mental_health_counts = df['Mental_Health_Condition'].value_counts().reset_index()
mental_health_counts.columns = ['Mental_Health_Condition', 'Count']

# Calculate angle for each wedge in the pie chart
mental_health_counts['Angle'] = mental_health_counts['Count'] / mental_health_counts['Count'].sum() * 2 * np.pi

# Define a color palette
palette = ["#e41a1c", "#377eb8", "#4daf4a"]
# Adjust palette size if there are more unique conditions than colors
if len(mental_health_counts) > len(palette):
    from itertools import cycle
    palette = [color for color, _ in zip(cycle(palette), range(len(mental_health_counts)))]

# Add color column to the data
mental_health_counts['Color'] = palette

# Create a ColumnDataSource for Bokeh
source = ColumnDataSource(mental_health_counts)

# Create a Bokeh figure
mental_health_pie_chart = figure(height=400, width=400, title="Mental Health Condition of Employees",
                                 tooltips="@Mental_Health_Condition: @Count",
                                 x_range=(-1, 1), y_range=(-1, 1))

# Add wedges to the pie chart
# Calculate start_angle and end_angle within the ColumnDataSource
mental_health_counts['start_angle'] = np.cumsum(mental_health_counts['Angle']) - mental_health_counts['Angle']
mental_health_counts['end_angle'] = np.cumsum(mental_health_counts['Angle'])

# Update the source with the new columns
source.data = mental_health_counts.to_dict(orient='list')

mental_health_pie_chart.wedge(x=0, y=0, radius=0.8,
                              start_angle='start_angle', # Reference column name
                              end_angle='end_angle',     # Reference column name
                              line_color="white", fill_color='Color', legend_field='Mental_Health_Condition',
                              source=source)

# Customize plot
mental_health_pie_chart.axis.axis_label = None
mental_health_pie_chart.axis.visible = False
mental_health_pie_chart.grid.visible = False

# Show plot
show(mental_health_pie_chart)

This pie chart illustrates the distribution of mental health conditions among employees, categorized into **Burnout**, **Anxiety**, and **Depression**.

### Simple Analysis

- **Burnout (Red)**: This segment takes up a significant portion of the chart, suggesting that burnout is a major mental health issue among employees. It appears slightly smaller than anxiety but still prominent.
- **Anxiety (Blue)**: Anxiety is the largest portion of the chart, indicating that it is the most common mental health condition among employees in this group.
- **Depression (Green)**: Depression is also prevalent, taking up a large section of the chart, close to the size of burnout, and indicating it's a significant issue.

### Key Takeaways

1. **Anxiety is the Most Common Condition**: Among employees, anxiety has the largest share, implying it's the leading mental health challenge.
2. **Burnout and Depression Are Also High**: Both burnout and depression represent substantial portions, suggesting that many employees experience one of these conditions.
3. **Mental Health Challenges Are Widespread**: The chart shows all three conditions are significant, which implies that a large portion of employees are affected by mental health issues.

In summary, anxiety is the most common mental health issue among employees, followed closely by burnout and depression. This highlights a strong need for mental health support in the workplace.

# **Access to Mental Health Resources**

In [None]:

from bokeh.transform import cumsum
from math import pi

# Step 1: Calculate counts and percentages for each category
access_counts = df['Access_to_Mental_Health_Resources'].value_counts()
access_data = pd.DataFrame({'Access': access_counts.index, 'Count': access_counts.values})
access_data['Percentage'] = (access_data['Count'] / access_data['Count'].sum()) * 100

# Step 2: Set up angles for the wedges
access_data['Angle'] = access_data['Count'] / access_data['Count'].sum() * 2 * pi

# Step 3: Define colors for each category and add them to the DataFrame
colors = ['red', 'Green']
access_data['Color'] = colors[:len(access_data)]  # Assign colors based on number of unique values

# Step 4: Create a ColumnDataSource with the color information
source = ColumnDataSource(access_data)

# Step 5: Create a Bokeh figure for the pie chart
Access_to_Mental_Health_plot = figure(height=400,
           width=400,
           title="Access to Mental Health Resources",
           x_range=(-1, 1),
           y_range=(-1, 1))

# Step 6: Add wedges with color mapped from the Color column in the source
Access_to_Mental_Health_plot.wedge(x=0, y=0, radius=0.8,
        start_angle=cumsum('Angle', include_zero=True), end_angle=cumsum('Angle'),
        line_color="white", fill_color='Color', legend_field='Access', source=source)

# Step 7: Add tooltips to show category and percentage on hover
hover = HoverTool(tooltips=[("Access", "@Access"), ("Percentage", "@Percentage{0.1f}%")])
Access_to_Mental_Health_plot.add_tools(hover)

# Step 8: Customize plot appearance
Access_to_Mental_Health_plot.axis.axis_label = None
Access_to_Mental_Health_plot.axis.visible = False
Access_to_Mental_Health_plot.grid.visible = False

# Show plot
show(Access_to_Mental_Health_plot)


This pie chart shows the proportion of employees with and without access to mental health resources, represented by **Yes** (green) and **No** (red).

### Simple Analysis

- **No Access (Red)**: The red portion of the chart represents employees who do not have access to mental health resources. It covers slightly more than half of the chart, indicating that a majority of employees lack access.
- **Yes Access (Green)**: The green section represents employees who do have access to mental health resources. This portion is slightly smaller than the red, indicating that less than half of employees have access.

### Key Takeaways

1. **Limited Access to Mental Health Resources**: More than half of employees do not have access to mental health resources, which may contribute to the high levels of mental health issues (burnout, anxiety, depression) seen in previous charts.
2. **Slightly Less Than Half Have Access**: A significant portion of employees do have access, but it's not enough to cover the majority.

In summary, the chart highlights a gap in mental health resource availability, with a slight majority of employees lacking access to support. This suggests an area for improvement in workplace support for mental health.

## **Access to Mental Health Resources by Job Role**

In [None]:
# Count occurrences of each access level by job role
access_counts = df_cleaned.groupby(['Job_Role', 'Access_to_Mental_Health_Resources']).size().unstack(fill_value=0).reset_index()

# Prepare data for Bokeh
source = ColumnDataSource(data=dict(
    Job_Role=access_counts['Job_Role'],
    Yes=access_counts['Yes'],
    No=access_counts['No']
))

# Define the Bokeh figure
mental_access_plt = figure(x_range=FactorRange(*access_counts['Job_Role']), height=400, width=900,
           title="Count of Access to Mental Health Resources by Job Role")

# Colors for 'Yes' and 'No' access levels
colors = ["green", "red"]

# Plot stacked bars for actual counts
mental_access_plt.vbar(x=dodge('Job_Role', -0.15, range=mental_access_plt.x_range), top='Yes', width=0.25, source=source,
       color=colors[0], legend_label="Yes", name="Yes")
mental_access_plt.vbar(x=dodge('Job_Role', 0.15, range=mental_access_plt.x_range), top='No', width=0.25, source=source,
       color=colors[1], legend_label="No", name="No")

# Customize plot appearance
mental_access_plt.xaxis.axis_label = "Job Role"
mental_access_plt.yaxis.axis_label = "Count of Mental Health Resource Access"
mental_access_plt.legend.title = "Access to Mental Health Resources"
mental_access_plt.xgrid.grid_line_color = None
mental_access_plt.legend.location = "top_right"
mental_access_plt.add_layout(mental_access_plt.legend[0], 'right')
mental_access_plt.legend.orientation = "vertical"

# Add hover tooltips to show exact counts
hover = HoverTool(tooltips=[("Job Role", "@Job_Role"), ("Access", "$name"), ("Count", "@$name")])
mental_access_plt.add_tools(hover)

# Show plot
show(mental_access_plt)

This bar chart shows the count of employees with and without access to mental health resources across different job roles.

### Simple Analysis

- **Data Scientist, Designer, HR, Marketing**: These roles have a fairly balanced distribution between those with access (green) and those without access (red). This suggests that access to mental health resources is similar for employees in these roles, with no significant difference.
  
- **Project Manager**: This role has a higher count of employees without access to mental health resources (red) compared to those with access (green). Project managers seem to be particularly underserved in terms of access to mental health support.

- **Sales and Software Engineer**: These roles also show a higher count of employees without access (red) compared to those with access (green), similar to project managers. However, the difference is less pronounced than for project managers.

### Key Takeaways

1. **Uneven Access Across Roles**: Some roles, like Project Managers, Sales, and Software Engineers, have noticeably more employees without access to mental health resources, indicating potential gaps in support.
2. **Better Balance in Certain Roles**: Data Scientists, Designers, HR, and Marketing employees have a more balanced access, suggesting that these roles might have better support systems or access.

In summary, access to mental health resources varies by job role, with Project Managers, Sales, and Software Engineers facing more limited access. Addressing these gaps could improve overall support for employee mental health across the organization.

Regional Differences in Stress and Satisfaction

In [None]:
# Calculate counts for stress levels by region
satisfaction_counts = df_cleaned.groupby(['Region', 'Satisfaction_with_Remote_Work']).size().unstack(fill_value=0).reset_index()
source = ColumnDataSource(satisfaction_counts)


# Define colors for different stress levels
colors_stress = Category20[3]

# Create Bokeh figure
region_satisfaction_plt = figure(x_range=FactorRange(*satisfaction_counts['Region']), height=400, width=900,
                           title="Satisfaction with remote work Counts by Region")

# Plot stress levels as stacked bars
for i, satisfaction in enumerate(sorted(satisfaction_counts.columns[1:])):  # Skip 'Region' column
    region_satisfaction_plt.vbar(x=dodge('Region', -0.25 + i * 0.25, range=region_satisfaction_plt.x_range),
                           top=f'{satisfaction}', width=0.2,
                           source=source, color=colors_stress[i], legend_label=f'{satisfaction}')

# Customize plot appearance
region_satisfaction_plt.legend.location = "top_right"
region_satisfaction_plt.xaxis.axis_label = "Region"
region_satisfaction_plt.yaxis.axis_label = "Count of Employees"
region_satisfaction_plt.xgrid.grid_line_color = None
region_satisfaction_plt.legend.orientation = "vertical"
region_satisfaction_plt.legend.location = "top_right"
region_satisfaction_plt.add_layout(region_satisfaction_plt.legend[0], 'right')

# Add HoverTool for interactivity
hover = HoverTool(tooltips=[
    ("Region", "@Region"),
    ("Neutral", "@Neutral"),
    ("Satisfied", "@Satisfied"),
    ("Unsatisfied", "@Unsatisfied")
])
region_satisfaction_plt.add_tools(hover)

# Show plot
show(region_satisfaction_plt)


This bar chart illustrates employee satisfaction with remote work across different regions (Africa, Asia, Europe, North America, Oceania, South America), categorized into **Neutral**, **Satisfied**, and **Unsatisfied**.

### Simple Analysis

1. **Africa**:
   - **High Unsatisfaction**: Africa has a notable portion of employees who are unsatisfied with remote work (orange), higher than those who are satisfied (light blue).
   - **Neutral**: A strong presence of neutral responses (dark blue) suggests mixed feelings in this region.

2. **Asia**:
   - **Even Split**: Asia shows a balanced distribution across neutral, satisfied, and unsatisfied responses. No single category dominates, indicating varied opinions about remote work.

3. **Europe**:
   - **Similar to Asia**: Europe also has a balanced distribution of satisfaction levels, with a slightly higher neutral count. Opinions on remote work satisfaction are fairly divided.

4. **North America**:
   - **Higher Neutral**: In North America, the neutral responses are the highest, followed closely by satisfied responses. Unsatisfied responses are the lowest, suggesting that employees are generally okay with remote work, if not overwhelmingly positive.

5. **Oceania**:
   - **High Satisfaction**: Oceania has the highest count of satisfied employees (light blue) compared to other regions. Unsatisfied responses are present but much lower, showing a stronger preference or acceptance for remote work in this region.

6. **South America**:
   - **Mixed Feelings with More Unsatisfied**: South America has a more balanced distribution but leans slightly toward unsatisfaction, similar to Africa.

### Key Takeaways

1. **Varied Satisfaction by Region**: Different regions show distinct patterns of satisfaction with remote work.
   - **High Satisfaction**: Oceania stands out with the most satisfied employees.
   - **Higher Unsatisfaction**: Africa and South America have more unsatisfied employees, indicating possible regional challenges with remote work.

2. **General Neutrality in Some Regions**: North America and Europe show a significant portion of neutral responses, suggesting that remote work may be seen as acceptable but not particularly satisfying or dissatisfying.

In summary, employee satisfaction with remote work varies across regions, with Oceania showing the most positive response, while Africa and South America lean toward dissatisfaction. North America and Europe have more neutral responses, reflecting an overall acceptance but not strong enthusiasm for remote work in these regions.

## **Stress level count by region**

In [None]:
# Calculate counts for stress levels by region
stress_counts = df_cleaned.groupby(['Region', 'Stress_Level']).size().unstack(fill_value=0).reset_index()
source = ColumnDataSource(stress_counts)

# Define colors for different stress levels
colors_stress = Category10[3]

# Create Bokeh figure
region_stress_plt = figure(x_range=FactorRange(*stress_counts['Region']), height=400, width=900,
                           title="Stress Level Counts by Region")

# Plot stress levels as stacked bars
for i, level in enumerate(sorted(stress_counts.columns[1:])):  # Skip 'Region' column
    region_stress_plt.vbar(x=dodge('Region', -0.25 + i * 0.25, range=region_stress_plt.x_range),
                           top=f'{level}', width=0.2,
                           source=source, color=colors_stress[i], legend_label=f'Stress Level {level}')

# Customize plot appearance
region_stress_plt.legend.location = "top_right"
region_stress_plt.xaxis.axis_label = "Region"
region_stress_plt.yaxis.axis_label = "Count of Employees"
region_stress_plt.xgrid.grid_line_color = None
region_stress_plt.legend.orientation = "vertical"
region_stress_plt.legend.location = "top_right"
region_stress_plt.add_layout(region_stress_plt.legend[0], 'right')

# Add HoverTool for interactivity
hover = HoverTool(tooltips=[
    ("Region", "@Region"),
    ("High Stress Level Count", "@High"),
    ("Low Stress Level Count", "@Low"),
    ("Medium Stress Level Count", "@Medium")
])
region_stress_plt.add_tools(hover)

# Show plot
show(region_stress_plt)


This bar chart shows the distribution of stress levels (High, Medium, Low) across different regions (Africa, Asia, Europe, North America, Oceania, South America).

### Simple Analysis

1. **Africa**:
   - **High Stress**: Africa has the highest count of employees experiencing high stress (blue) compared to low and medium stress levels.
   - **Low and Medium Stress**: Both low (orange) and medium (green) stress levels are present but are lower than high stress.

2. **Asia**:
   - **Balanced Stress Levels**: Asia shows a more balanced distribution across high, medium, and low stress levels, with medium stress (green) being slightly higher than the others.

3. **Europe**:
   - **Even Distribution**: Europe has a fairly even distribution across all stress levels, with medium stress (green) slightly leading. This suggests a more balanced stress experience among employees in this region.

4. **North America**:
   - **Moderate Stress Levels**: North America shows a similar pattern to Europe, with a balanced distribution and a slight lean towards medium stress, followed closely by high stress.

5. **Oceania**:
   - **High and Medium Stress**: Oceania has high and medium stress levels almost equally distributed, with low stress levels slightly lower. This suggests that employees in Oceania experience both high and medium stress levels frequently.

6. **South America**:
   - **Lower Stress Overall**: South America has relatively lower stress counts across all categories, with medium stress being slightly more common.

### Key Takeaways

1. **High Stress in Africa**: Africa stands out for its high levels of stress among employees, with fewer low-stress responses.
2. **Balanced Stress Levels in Asia, Europe, and North America**: These regions show a more balanced distribution of stress levels, indicating a more even experience of stress.
3. **Mixed Stress Levels in Oceania**: Oceania has similar levels of high and medium stress, suggesting frequent moderate to high stress.
4. **Lower Stress in South America**: South America shows relatively lower stress levels, with medium stress being the most common.

In summary, stress levels vary by region, with Africa experiencing the highest stress and South America showing the lowest. Asia, Europe, and North America present a more balanced mix of stress levels. Oceania has both high and medium stress levels, indicating a tendency toward moderate to high stress among employees.

In [None]:
from bokeh.models import TabPanel, Tabs, Div
from bokeh.layouts import column
from bokeh.io import show

# List of plot titles, their corresponding plots, and summaries
plots = [
    (
        "Stress Count vs. Hours Worked",
        stress_count_vs_hour_worked_plt,
        "As work hours increase, stress levels generally rise. Those working 31-60 hours experience high stress frequently, "
        "while people working 0-20 hours report mostly low stress, suggesting shorter work hours are associated with lower stress."
    ),
    (
        "Balance vs. Stress",
        balance_vs_stress_plt,
        "Higher work-life balance ratings generally correlate with lower stress. However, high stress is still observed at all balance levels, "
        "especially around a balance rating of 3, indicating moderate balance can be stressful for some."
    ),
    (
        "Location vs. Productivity",
        location_vs_productivity_plot,
        "Across all work locations (Hybrid, Remote, Onsite), most employees report a decrease in productivity. This pattern suggests that "
        "productivity challenges persist regardless of work location."
    ),
    (
        "Mental Health Condition Distribution",
        mental_health_pie_chart,
        "Anxiety is the most common mental health condition among employees, followed closely by burnout and depression, "
        "highlighting a strong need for mental health support in the workplace."
    ),
    (
        "Access to Mental Health Resources",
        Access_to_Mental_Health_plot,
        "Slightly more than half of employees lack access to mental health resources, indicating an area for improvement "
        "in workplace mental health support."
    ),
    (
        "Mental Health Access by Job Role",
        mental_access_plt,
        "Access to mental health resources varies by job role, with Project Managers, Sales, and Software Engineers having more limited access. "
        "Addressing these gaps could enhance mental health support across different roles."
    ),
    (
        "Region Satisfaction",
        region_satisfaction_plt,
        "Employee satisfaction with remote work varies by region. Oceania shows the most satisfaction, while Africa and South America "
        "have higher levels of dissatisfaction. North America and Europe show more neutral responses, indicating general acceptance."
    ),
    (
        "Region Stress Levels",
        region_stress_plt,
        "Stress levels vary by region, with Africa experiencing the highest stress and South America showing the lowest. Asia, Europe, "
        "and North America have a more balanced mix of stress levels, while Oceania shows moderate to high stress levels."
    )
]

# Creating tabs for each plot
tabs = []
for title, plot, summary in plots:
    # Caption with title in bold and centered, and summary below
    caption = Div(text=f"<div style='text-align:center; font-weight:bold;'><h3>{title}</h3></div>"
                       f"<p style='text-align:center;'>{summary}</p>")
    # Combine caption and plot into a single column layout
    layout = column(caption, plot, width=900, sizing_mode='stretch_width')
    # Add this layout as a TabPanel
    tab = TabPanel(child=layout, title=title)
    tabs.append(tab)

# Combine all TabPanels into Tabs layout
tabs_layout = Tabs(tabs=tabs)

# Show tabs
show(tabs_layout)
