In [23]:
import pandas as pd

In [47]:
from tabulate import tabulate 

In [49]:
from scipy.stats import chi2_contingency

In [51]:
df = pd.read_csv('Modified chart.csv')

In [53]:
df_copy = df.copy(deep=True) 

In [55]:
df_copy['Increased_Work_Hours'] = pd.to_numeric(df_copy['Increased_Work_Hours'], errors='coerce') 

In [57]:
columns_to_analyze = ['Increased_Work_Hours', 'Work_From_Home', 'Hours_Worked_Per_Day', 'Meetings_Per_Day', 'Productivity_Change',
                     'Stress_Level', 'Health_Issue', 'Job_Security', 'Childcare_Responsibilities', 'Commuting_Changes',
                     'Technology_Adaptation', 'Salary_Changes', 'Team_Collaboration_Challenges', 'Sector', 'Affected_by_Covid']

In [59]:
numerical_columns = ['Increased_Work_Hours', 'Work_From_Home', 'Hours_Worked_Per_Day', 'Meetings_Per_Day', 'Productivity_Change',
                     'Stress_Level', 'Health_Issue', 'Job_Security', 'Childcare_Responsibilities', 'Commuting_Changes',
                     'Technology_Adaptation', 'Salary_Changes', 'Team_Collaboration_Challenges', 'Sector', 'Affected_by_Covid']

In [61]:
for column in columns_to_analyze:
    column_counts = df_copy[column].value_counts()
    
    if column == 'Increased_Work_Hours':
        column_counts.index = column_counts.index.map({0: 'Decreased', 1: 'Increased'}) 
    
    freq_table = pd.DataFrame({column: column_counts.index, 'Frequency': column_counts})
    print(f"\nFrequency Table for {column}:\n")
    print(tabulate(freq_table, headers='keys', tablefmt='grid')) 

for column in numerical_columns:
    print(f"\nSummary Statistics for {column}:\n")
    print(df_copy[column].describe())


Frequency Table for Increased_Work_Hours:

+------------------------+------------------------+-------------+
| Increased_Work_Hours   | Increased_Work_Hours   | Frequency   |
+------------------------+------------------------+-------------+

Frequency Table for Work_From_Home:

+------------------+------------------+-------------+
| Work_From_Home   | Work_From_Home   |   Frequency |
| Yes              | Yes              |         813 |
+------------------+------------------+-------------+
| No               | No               |         187 |
+------------------+------------------+-------------+

Frequency Table for Hours_Worked_Per_Day:

+------------------------+------------------------+-------------+
|   Hours_Worked_Per_Day |   Hours_Worked_Per_Day |   Frequency |
|                      8 |                      8 |         197 |
+------------------------+------------------------+-------------+
|                      7 |                      7 |         167 |
+---------------------

In [63]:
cross_tab = pd.crosstab(index=df_copy['Sector'], columns=df_copy['Work_From_Home'])
cross_tab['Total'] = cross_tab.sum(axis = 1)
cross_tab.loc['Total'] = cross_tab.sum(axis = 0)
cross_tab.columns = ['Work_From_Home (No)', 'Work_From_Home (Yes)', 'Total']
print(cross_tab.to_markdown(numalign="left", stralign="left")) 

print("\n\n")

cross_tab = pd.crosstab(index=df_copy['Sector'], columns=df_copy['Stress_Level'])
cross_tab['Total'] = cross_tab.sum(axis = 1)
cross_tab.loc['Total'] = cross_tab.sum(axis = 0)
cross_tab.columns = ['Stress_Level (Low)', 'Stress_Level (Medium)', 'Stress_Level (High)', 'Total']
print(cross_tab.to_markdown(numalign="left", stralign="left"))

print("\n\n")

cross_tab = pd.crosstab(index=df_copy['Sector'], columns=df_copy['Team_Collaboration_Challenges'])
cross_tab['Total'] = cross_tab.sum(axis = 1)
cross_tab.loc['Total'] = cross_tab.sum(axis = 0)
cross_tab.columns = ['Team_Collaboration_Challenges (No)', 'Team_Collaboration_Challenges (Yes)', 'Total']
print(cross_tab.to_markdown(numalign="left", stralign="left"))

print("\n\n")

cross_tab = pd.crosstab(index=df_copy['Work_From_Home'], columns=df_copy['Stress_Level'])
cross_tab['Total'] = cross_tab.sum(axis = 1)
cross_tab.loc['Total'] = cross_tab.sum(axis = 0)
cross_tab.columns = ['Stress_Level (Low)', 'Stress_Level (Medium)', 'Stress_Level (High)', 'Total']
print(cross_tab.to_markdown(numalign="left", stralign="left"))

print("\n\n")

df['Increased_Work_Hours (Increased)'] = (df['Increased_Work_Hours'] == 'Increased').astype(int)
df['Increased_Work_Hours (Decreased)'] = (df['Increased_Work_Hours'] == 'Decreased').astype(int)
cross_tab_data = {
    'Increased_Work_Hours (Increased)': [
        (df['Increased_Work_Hours (Increased)'] & (df['Productivity_Change'] == 'Improved Productivity')).sum(),
        (df['Increased_Work_Hours (Increased)'] & (df['Productivity_Change'] == 'No Productivity')).sum()
    ],
    'Increased_Work_Hours (Decreased)': [
        (df['Increased_Work_Hours (Decreased)'] & (df['Productivity_Change'] == 'Improved Productivity')).sum(),
        (df['Increased_Work_Hours (Decreased)'] & (df['Productivity_Change'] == 'No Productivity')).sum()
    ]
}
cross_tab = pd.DataFrame(cross_tab_data, 
                        index=['Productivity_Change (Improved Productivity)', 'Productivity_Change (No Productivity)'])
cross_tab['Total'] = cross_tab.sum(axis=1)
cross_tab.loc['Total'] = cross_tab.sum(axis=0)
print(cross_tab.to_markdown(numalign="left", stralign="left")) 

| Sector     | Work_From_Home (No)   | Work_From_Home (Yes)   | Total   |
|:-----------|:----------------------|:-----------------------|:--------|
| Education  | 46                    | 215                    | 261     |
| Healthcare | 43                    | 205                    | 248     |
| IT         | 56                    | 205                    | 261     |
| Retail     | 42                    | 188                    | 230     |
| Total      | 187                   | 813                    | 1000    |



| Sector     | Stress_Level (Low)   | Stress_Level (Medium)   | Stress_Level (High)   | Total   |
|:-----------|:---------------------|:------------------------|:----------------------|:--------|
| Education  | 83                   | 56                      | 122                   | 261     |
| Healthcare | 77                   | 47                      | 124                   | 248     |
| IT         | 79                   | 61                      | 121                   |

In [65]:
cross_tab = pd.crosstab(index=df['Sector'], columns=df['Work_From_Home'])

In [67]:
chi2_stat, p_value, dof, expected = chi2_contingency(cross_tab)

In [69]:
print("Chi-square statistic:", chi2_stat)
print("p-value:", p_value)
print("Degrees of Freedom:", dof)
print("Expected Frequencies:\n", expected)

Chi-square statistic: 1.8339400692378605
p-value: 0.607577194060132
Degrees of Freedom: 3
Expected Frequencies:
 [[ 48.807 212.193]
 [ 46.376 201.624]
 [ 48.807 212.193]
 [ 43.01  186.99 ]]


In [115]:
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis. There is a statistically significant association between Sector and Work_From_Home.")
else:
    print("Fail to reject the null hypothesis. There is no statistically significant association between Sector and Work_From_Home.")

Fail to reject the null hypothesis. There is no statistically significant association between Sector and Work_From_Home.


In [77]:
cross_tab = pd.crosstab(index=df['Sector'], columns=df['Stress_Level'])

In [79]:
chi2_stat, p_value, dof, expected = chi2_contingency(cross_tab)

In [81]:
print("Chi-square statistic:", chi2_stat)
print("p-value:", p_value)
print("Degrees of Freedom:", dof)
print("Expected Frequencies:\n", expected)

Chi-square statistic: 4.261200600848883
p-value: 0.6413753651033982
Degrees of Freedom: 6
Expected Frequencies:
 [[ 83.781  53.244 123.975]
 [ 79.608  50.592 117.8  ]
 [ 83.781  53.244 123.975]
 [ 73.83   46.92  109.25 ]]


In [119]:
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis. There is a statistically significant association between Sector and Stress_Level.")
else:
    print("Fail to reject the null hypothesis. There is no statistically significant association between Sector and Stress_Level.")

Fail to reject the null hypothesis. There is no statistically significant association between Sector and Stress_Level.


In [87]:
cross_tab = pd.crosstab(index=df['Sector'], columns=df['Team_Collaboration_Challenges'])

In [89]:
chi2_stat, p_value, dof, expected = chi2_contingency(cross_tab)

In [91]:
print("Chi-square statistic:", chi2_stat)
print("p-value:", p_value)
print("Degrees of Freedom:", dof)
print("Expected Frequencies:\n", expected)

Chi-square statistic: 4.580971018521904
p-value: 0.2051805743511553
Degrees of Freedom: 3
Expected Frequencies:
 [[ 77.517 183.483]
 [ 73.656 174.344]
 [ 77.517 183.483]
 [ 68.31  161.69 ]]


In [123]:
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis. There is a statistically significant association between Sector and Team_Collaboration_Challenges.")
else:
    print("Fail to reject the null hypothesis. There is no statistically significant association between Sector and Team_Collaboration_Challenges.")

Fail to reject the null hypothesis. There is no statistically significant association between Sector and Team_Collaboration_Challenges.


In [95]:
cross_tab = pd.crosstab(index=df['Work_From_Home'], columns=df['Stress_Level'])

In [133]:
chi2_stat, p_value, dof, expected = chi2_contingency(cross_tab)

In [135]:
print("Chi-square statistic:", chi2_stat)
print("p-value:", p_value)
print("Degrees of Freedom:", dof)
print("Expected Frequencies:\n", expected)

Chi-square statistic: 0.4746115241145468
p-value: 0.4908732666304465
Degrees of Freedom: 1
Expected Frequencies:
 [[169.66 170.34]
 [329.34 330.66]]


In [127]:
alpha = 0.10
if p_value < alpha:
    print("Reject the null hypothesis. There is a statistically significant association between Work_From_Home and Stress_Level.")
else:
    print("Fail to reject the null hypothesis. There is no statistically significant association between Work_From_Home and Stress_Level.")

Fail to reject the null hypothesis. There is no statistically significant association between Work_From_Home and Stress_Level.


In [105]:
cross_tab = pd.crosstab(index=df['Increased_Work_Hours'], columns=df['Productivity_Change'])

In [107]:
chi2_stat, p_value, dof, expected = chi2_contingency(cross_tab)

In [109]:
print("Chi-square statistic:", chi2_stat)
print("p-value:", p_value)
print("Degrees of Freedom:", dof)
print("Expected Frequencies:\n", expected)

Chi-square statistic: 0.4746115241145468
p-value: 0.4908732666304465
Degrees of Freedom: 1
Expected Frequencies:
 [[169.66 170.34]
 [329.34 330.66]]


In [131]:
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis. There is a statistically significant association between Increased_Work_Hours and Productivity_Change.")
else:
    print("Fail to reject the null hypothesis. There is no statistically significant association between Increased_Work_Hours and Productivity_Change.")

Fail to reject the null hypothesis. There is no statistically significant association between Increased_Work_Hours and Productivity_Change.


In [137]:
cross_tab = pd.crosstab(index=df['Salary_Changes'], columns=df['Sector'])

In [139]:
chi2_stat, p_value, dof, expected = chi2_contingency(cross_tab)

In [141]:
print("Chi-square statistic:", chi2_stat)
print("p-value:", p_value)
print("Degrees of Freedom:", dof)
print("Expected Frequencies:\n", expected)

Chi-square statistic: 6.57004488006167
p-value: 0.08694066272421012
Degrees of Freedom: 3
Expected Frequencies:
 [[ 53.505  50.84   53.505  47.15 ]
 [207.495 197.16  207.495 182.85 ]]


In [147]:
alpha = 0.10
if p_value < alpha:
    print("Reject the null hypothesis. There is a statistically significant association between Salary_Changes and Sector.")
else:
    print("Fail to reject the null hypothesis. There is no statistically significant association between Salary_Changes and Sector.")

Reject the null hypothesis. There is a statistically significant association between Salary_Changes and Sector.


In [149]:
cross_tab = pd.crosstab(index=df['Salary_Changes'], columns=df['Work_From_Home'])

In [151]:
chi2_stat, p_value, dof, expected = chi2_contingency(cross_tab)

In [153]:
print("Chi-square statistic:", chi2_stat)
print("p-value:", p_value)
print("Degrees of Freedom:", dof)
print("Expected Frequencies:\n", expected)

Chi-square statistic: 0.700127069881332
p-value: 0.402741000221729
Degrees of Freedom: 1
Expected Frequencies:
 [[ 38.335 166.665]
 [148.665 646.335]]


In [175]:
alpha = 0.10
if p_value < alpha:
    print("Reject the null hypothesis. There is a statistically significant association between Salary_Changes and Work_From_Home.")
else:
    print("Fail to reject the null hypothesis. There is no statistically significant association between Salary_Changes and Work_From_Home.")

Fail to reject the null hypothesis. There is no statistically significant association between Salary_Changes and Work_From_Home.


In [163]:
cross_tab = pd.crosstab(index=df['Salary_Changes'], columns=df['Stress_Level'])

In [165]:
chi2_stat, p_value, dof, expected = chi2_contingency(cross_tab)

In [167]:
print("Chi-square statistic:", chi2_stat)
print("p-value:", p_value)
print("Degrees of Freedom:", dof)
print("Expected Frequencies:\n", expected)

Chi-square statistic: 0.8040985050428592
p-value: 0.6689477975132307
Degrees of Freedom: 2
Expected Frequencies:
 [[ 65.805  41.82   97.375]
 [255.195 162.18  377.625]]


In [181]:
alpha = 0.10
if p_value < alpha:
    print("Reject the null hypothesis. There is a statistically significant association between Salary_Changes and Stress_Level.")
else:
    print("Fail to reject the null hypothesis. There is no statistically significant association between Salary_Changes and Stress_Level.")

Fail to reject the null hypothesis. There is no statistically significant association between Salary_Changes and Stress_Level.


In [183]:
cross_tab = pd.crosstab(index=df['Affected_by_Covid'], columns=df['Stress_Level'])

In [185]:
chi2_stat, p_value, dof, expected = chi2_contingency(cross_tab)

In [187]:
print("Chi-square statistic:", chi2_stat)
print("p-value:", p_value)
print("Degrees of Freedom:", dof)
print("Expected Frequencies:\n", expected)

Chi-square statistic: 0.0
p-value: 1.0
Degrees of Freedom: 0
Expected Frequencies:
 [[321. 204. 475.]]


In [195]:
alpha = 0.10
if p_value < alpha:
    print("Reject the null hypothesis. There is a statistically significant association between Affected_by_Covid and Stress_Level.")
else:
    print("Fail to reject the null hypothesis. There is no statistically significant association between Affected_by_Covid and Stress_Level.")

Fail to reject the null hypothesis. There is no statistically significant association between Affected_by_Covid and Stress_Level.


In [197]:
cross_tab = pd.crosstab(index=df['Affected_by_Covid'], columns=df['Work_From_Home'])

In [199]:
chi2_stat, p_value, dof, expected = chi2_contingency(cross_tab)

In [201]:
print("Chi-square statistic:", chi2_stat)
print("p-value:", p_value)
print("Degrees of Freedom:", dof)
print("Expected Frequencies:\n", expected)

Chi-square statistic: 0.0
p-value: 1.0
Degrees of Freedom: 0
Expected Frequencies:
 [[187. 813.]]


In [209]:
alpha = 0.10
if p_value < alpha:
    print("Reject the null hypothesis. There is a statistically significant association between Affected_by_Covid and Work_From_Home.")
else:
    print("Fail to reject the null hypothesis. There is no statistically significant association between Affected_by_Covid and Work_From_Home.")

Fail to reject the null hypothesis. There is no statistically significant association between Affected_by_Covid and Work_From_Home.


In [211]:
cross_tab = pd.crosstab(index=df['Affected_by_Covid'], columns=df['Team_Collaboration_Challenges'])

In [213]:
chi2_stat, p_value, dof, expected = chi2_contingency(cross_tab)

In [215]:
print("Chi-square statistic:", chi2_stat)
print("p-value:", p_value)
print("Degrees of Freedom:", dof)
print("Expected Frequencies:\n", expected)

Chi-square statistic: 0.0
p-value: 1.0
Degrees of Freedom: 0
Expected Frequencies:
 [[297. 703.]]


In [223]:
alpha = 0.10
if p_value < alpha:
    print("Reject the null hypothesis. There is a statistically significant association between Affected_by_Covid and Team_Collaboration_Challenges.")
else:
    print("Fail to reject the null hypothesis. There is no statistically significant association between Affected_by_Covid and Team_Collaboration_Challenges.")

Fail to reject the null hypothesis. There is no statistically significant association between Affected_by_Covid and Team_Collaboration_Challenges.


In [225]:
cross_tab = pd.crosstab(index=df['Childcare_Responsibilities'], columns=df['Stress_Level'])

In [227]:
chi2_stat, p_value, dof, expected = chi2_contingency(cross_tab)

In [229]:
print("Chi-square statistic:", chi2_stat)
print("p-value:", p_value)
print("Degrees of Freedom:", dof)
print("Expected Frequencies:\n", expected)

Chi-square statistic: 3.321494756444695
p-value: 0.18999692745871138
Degrees of Freedom: 2
Expected Frequencies:
 [[187.464 119.136 277.4  ]
 [133.536  84.864 197.6  ]]


In [245]:
alpha = 0.10
if p_value < alpha:
    print("Reject the null hypothesis. There is a statistically significant association between Childcare_Responsibilities and Stress_Level.")
else:
    print("Fail to reject the null hypothesis. There is no statistically significant association between Childcare_Responsibilities and Stress_Level.")

Fail to reject the null hypothesis. There is no statistically significant association between Childcare_Responsibilities and Stress_Level.


In [247]:
cross_tab = pd.crosstab(index=df['Childcare_Responsibilities'], columns=df['Work_From_Home'])

In [249]:
chi2_stat, p_value, dof, expected = chi2_contingency(cross_tab)

In [251]:
print("Chi-square statistic:", chi2_stat)
print("p-value:", p_value)
print("Degrees of Freedom:", dof)
print("Expected Frequencies:\n", expected)

Chi-square statistic: 0.07898368697599206
p-value: 0.7786794398611161
Degrees of Freedom: 1
Expected Frequencies:
 [[109.208 474.792]
 [ 77.792 338.208]]


In [257]:
alpha = 0.10
if p_value < alpha:
    print("Reject the null hypothesis. There is a statistically significant association between Childcare_Responsibilities and Work_From_Home.")
else:
    print("Fail to reject the null hypothesis. There is no statistically significant association between Childcare_Responsibilities and Work_From_Home.")

Fail to reject the null hypothesis. There is no statistically significant association between Childcare_Responsibilities and Work_From_Home.


In [261]:
cross_tab = pd.crosstab(index=df['Commuting_Changes'], columns=df['Stress_Level'])

In [263]:
chi2_stat, p_value, dof, expected = chi2_contingency(cross_tab)

In [265]:
print("Chi-square statistic:", chi2_stat)
print("p-value:", p_value)
print("Degrees of Freedom:", dof)
print("Expected Frequencies:\n", expected)

Chi-square statistic: 1.332669950210456
p-value: 0.5135874434044903
Degrees of Freedom: 2
Expected Frequencies:
 [[164.673 104.652 243.675]
 [156.327  99.348 231.325]]


In [281]:
alpha = 0.10
if p_value < alpha:
    print("Reject the null hypothesis. There is a statistically significant association between Commuting_Changes and Stress_Level.")
else:
    print("Fail to reject the null hypothesis. There is no statistically significant association between Commuting_Changes and Stress_Level.")

Fail to reject the null hypothesis. There is no statistically significant association between Commuting_Changes and Stress_Level.


In [285]:
cross_tab = pd.crosstab(index=df['Technology_Adaptation'], columns=df['Stress_Level'])

In [287]:
chi2_stat, p_value, dof, expected = chi2_contingency(cross_tab)

In [289]:
print("Chi-square statistic:", chi2_stat)
print("p-value:", p_value)
print("Degrees of Freedom:", dof)
print("Expected Frequencies:\n", expected)

Chi-square statistic: 1.3295465360844574
p-value: 0.5143901431701257
Degrees of Freedom: 2
Expected Frequencies:
 [[126.153  80.172 186.675]
 [194.847 123.828 288.325]]


In [297]:
alpha = 0.10
if p_value < alpha:
    print("Reject the null hypothesis. There is a statistically significant association between Technology_Adaptation and Stress_Level.")
else:
    print("Fail to reject the null hypothesis. There is no statistically significant association between Technology_Adaptation and Stress_Level.")

Fail to reject the null hypothesis. There is no statistically significant association between Technology_Adaptation and Stress_Level.


In [299]:
cross_tab = pd.crosstab(index=df['Technology_Adaptation'], columns=df['Work_From_Home'])

In [301]:
chi2_stat, p_value, dof, expected = chi2_contingency(cross_tab)

In [303]:
print("Chi-square statistic:", chi2_stat)
print("p-value:", p_value)
print("Degrees of Freedom:", dof)
print("Expected Frequencies:\n", expected)

Chi-square statistic: 0.028071714538951662
p-value: 0.8669403331698733
Degrees of Freedom: 1
Expected Frequencies:
 [[ 73.491 319.509]
 [113.509 493.491]]


In [311]:
alpha = 0.10
if p_value < alpha:
    print("Reject the null hypothesis. There is a statistically significant association between Technology_Adaptation and Work_From_Home.")
else:
    print("Fail to reject the null hypothesis. There is no statistically significant association between Technology_Adaptation and Work_From_Home.")

Fail to reject the null hypothesis. There is no statistically significant association between Technology_Adaptation and Work_From_Home.
