# MSc. Dissertation - DataScience By Robert Solomon
### Analysis of Remote Work Impact on Employee Well-Being (Cleaned Primary Dataset)

In [46]:
# Importing necessary libraries here below:

import numpy as np
import pandas as pd
import scipy.stats as stats
from scipy.stats import f_oneway
import statsmodels.api as sm
from statsmodels.formula.api import ols

### 1. Loading in the primary survey data

In [34]:
# Loading cleaned dataset
wfh_cleanedMentalHealth_surveyData = pd.read_csv('../Primary_Research/PR_Dataset/cleaned_WFH-Mental_Health_(Survey).csv')

In [36]:
# Displaying the first few rows to inspect the data
wfh_cleanedMentalHealth_surveyData.head()

Unnamed: 0,Timestamp,Age_Group,Gender,Work_Location,Industry,Work_Life_Balance,Weekly_Hours_Worked,Stress_Level,Stress_Factors,Social_Isolation_Frequency,Lack_Of_Team_Connection,Employer_Mental_Health_Support,Mental_Health_Recommendations,stress_score
0,2025/01/27 12:02:17 PM GMT,25–34,0,,IT/Technology,4,35,4,Isolation from colleagues,1.0,1.0,2,Stop remote work and have people come into the...,3.0
1,2025/01/27 12:02:39 PM GMT,25–34,0,1.0,IT/Technology,5,40,5,Lack of clarity about whether I will be forced...,2.0,0.0,1,No Response,3.0
2,2025/01/27 12:11:36 PM GMT,35–44,0,1.0,IT/Technology,5,39,5,Other (please specify);Less commute / Less tim...,3.0,0.0,3,Regional hot desk options at some of the many ...,2.0
3,2025/01/27 12:14:54 PM GMT,45–54,0,1.0,IT/Technology,5,39,5,On call over weekends,3.0,0.0,3,Occasional lunchtime meetups if enough people ...,2.0
4,2025/01/27 12:25:24 PM GMT,35–44,1,1.0,IT/Technology,2,9,2,Increased workload,4.0,0.0,3,Add remote workers team lead to identify possi...,-2.0


### 2. Grouping stress levels by work location

In [41]:
remote_stress = wfh_cleanedMentalHealth_surveyData[wfh_cleanedMentalHealth_surveyData['Work_Location'] == 1.0]['Stress_Level']
hybrid_stress = wfh_cleanedMentalHealth_surveyData[wfh_cleanedMentalHealth_surveyData['Work_Location'] == 2.0]['Stress_Level']
onsite_stress = wfh_cleanedMentalHealth_surveyData[wfh_cleanedMentalHealth_surveyData['Work_Location'] == 3.0]['Stress_Level']

### 2.1 Performing one-way ANOVA (for Work Location and Stress Levels) for Primary Data

In [48]:
anova_result = stats.f_oneway(remote_stress, hybrid_stress, onsite_stress)

In [50]:
# Printing the results
print("One-Way ANOVA Results for Work Location and Stress Levels")
print(f"F-statistic: {anova_result.statistic:.4f}, p-value: {anova_result.pvalue:.4f}")

# Interpreting the results
if anova_result.pvalue < 0.05:
    print("Result: Significant difference found in stress levels across work locations.")
else:
    print("Result: No significant difference in stress levels across work locations.")

One-Way ANOVA Results for Work Location and Stress Levels
F-statistic: 1.6206, p-value: 0.2105
Result: No significant difference in stress levels across work locations.


### 2.1.1 Interpretation of the above Output
If p-value < 0.05, we reject the null hypothesis, meaning stress levels differ significantly across remote, hybrid, and onsite workers.
If p-value ≥ 0.05, we fail to reject the null hypothesis, meaning stress levels are not significantly different between work types which in this case appear to be the case.