In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import scipy

In [None]:
#Load the datasets
df1 = 'Datasets/mental_health_and_technology_usage_2024.csv'

data = pd.read_csv(df1)

In [None]:
#Display basic info from the dataset
print(data.head)

print(data.columns)

In [None]:
# Define the age bins and corresponding labels
age_bins = [18, 35, 50, 65]  # Bin edges for ages: 18 to 35, 36 to 50, 51 to 65
age_labels = ['18-35', '36-50', '51-65']  # Labels for each bin

# Apply pd.cut() to create the 'Age_Group' column in the dataset
data['Age_Group'] = pd.cut(data['Age'], bins=age_bins, labels=age_labels, right=False)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Prepare a figure with 3 rows and 2 columns for the plots
fig, axes = plt.subplots(3, 2, figsize=(14, 18))
fig.suptitle("Social Media Usage vs Physical Activity and Sleep Hours by Age Group", fontsize=16)

# Loop through each age group label (e.g., "18-35", "36-50", "51-65")
for i, age_group in enumerate(age_labels):
    # Filter the dataset to include only rows for the current age group
    subset = data[data['Age_Group'] == age_group]
    
    # Scatter plot for Social Media Usage Hours vs Physical Activity Hours
    sns.scatterplot(ax=axes[i, 0], data=subset, x='Social_Media_Usage_Hours', y='Physical_Activity_Hours', alpha=0.5)
    axes[i, 0].set_title(f"{age_group}: Social Media Usage vs Physical Activity Hours")
    axes[i, 0].set_xlabel("Social Media Usage Hours")
    axes[i, 0].set_ylabel("Physical Activity Hours")
    
    # Scatter plot for Social Media Usage Hours vs Sleep Hours
    sns.scatterplot(ax=axes[i, 1], data=subset, x='Social_Media_Usage_Hours', y='Sleep_Hours', alpha=0.5)
    axes[i, 1].set_title(f"{age_group}: Social Media Usage vs Sleep Hours")
    axes[i, 1].set_xlabel("Social Media Usage Hours")
    axes[i, 1].set_ylabel("Sleep Hours")

# Adjust layout for better readability
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()

In [None]:
# Create a dictionary to store correlation matrices for each age group
correlations = {}

# Loop through each age group label (e.g., "18-35", "36-50", "51-65")
for age_group in age_labels:
    # Filter the data to include only rows for the current age group
    subset = data[data['Age_Group'] == age_group]
    
    # Calculate the correlation matrix for Social_Media_Usage_Hours, Physical_Activity_Hours, and Sleep_Hours
    correlations[age_group] = subset[['Social_Media_Usage_Hours', 'Physical_Activity_Hours', 'Sleep_Hours']].corr()

# Print the correlation coefficients for each age group
for age_group, corr_matrix in correlations.items():
    print(f"Correlation matrix for age group {age_group}:\n")
    print(corr_matrix)
    print("\n" + "="*50 + "\n")  # Separator for readability

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Select the subset of data for the three relevant variables
correlation_matrix = data[['Social_Media_Usage_Hours', 'Physical_Activity_Hours', 'Sleep_Hours']].corr()

# Plot the heatmap
plt.figure(figsize=(6, 4))
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", vmin=-1, vmax=1, cbar=True, square=True)
plt.title("Correlation Heatmap: Social Media Usage, Physical Activity, and Sleep Hours")
plt.show()


In [None]:
# Ensure Pandas and Numpy are imported
import pandas as pd
import numpy as np

# Assume 'data' is your main DataFrame with columns 'Age_Group', 'Stress_Level', 'Mental_Health_Status',
# 'Social_Media_Usage_Hours', 'Physical_Activity_Hours', and 'Sleep_Hours'

# Map qualitative values to numeric for Stress Level and Mental Health
stress_mapping = {'Low': 1, 'Medium': 2, 'High': 3}
mental_health_mapping = {'Excellent': 4, 'Good': 3, 'Fair': 2, 'Poor': 1}
data['Stress_Level_Numeric'] = data['Stress_Level'].map(stress_mapping)
data['Mental_Health_Numeric'] = data['Mental_Health_Status'].map(mental_health_mapping)

# Calculate means and standard deviations for each metric by age group
summary = data.groupby('Age_Group', observed=False)[
    ['Stress_Level_Numeric', 'Mental_Health_Numeric', 'Social_Media_Usage_Hours', 'Physical_Activity_Hours', 'Sleep_Hours']
].agg(['mean', 'std']).reset_index()

# Separate mean and standard deviation columns into distinct DataFrames for easy plotting
summary_mean = summary.xs('mean', level=1, axis=1).copy()
summary_std = summary.xs('std', level=1, axis=1).copy()

# Rename columns for readability
summary_mean.columns = ['Stress_Level_Numeric', 'Mental_Health_Numeric', 'Social_Media_Usage_Hours', 'Physical_Activity_Hours', 'Sleep_Hours']
summary_std.columns = ['Stress_Level_Numeric', 'Mental_Health_Numeric', 'Social_Media_Usage_Hours', 'Physical_Activity_Hours', 'Sleep_Hours']

# Add Age_Group back to both DataFrames for plotting
summary_mean['Age_Group'] = summary['Age_Group']
summary_std['Age_Group'] = summary['Age_Group']

In [None]:
#Verify Data Creation
print("summary_mean:")
print(summary_mean.head())
print("\nsummary_std:")
print(summary_std.head())

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Set up data for plotting
age_groups = summary_mean['Age_Group']
stress_means = summary_mean['Stress_Level_Numeric']
stress_std = summary_std['Stress_Level_Numeric']
mental_health_means = summary_mean['Mental_Health_Numeric']
mental_health_std = summary_std['Mental_Health_Numeric']
social_media_means = summary_mean['Social_Media_Usage_Hours']
social_media_std = summary_std['Social_Media_Usage_Hours']
physical_activity_means = summary_mean['Physical_Activity_Hours']
physical_activity_std = summary_std['Physical_Activity_Hours']
sleep_means = summary_mean['Sleep_Hours']
sleep_std = summary_std['Sleep_Hours']

# Plotting Stress Level Analysis with error bars
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
fig.suptitle("Average Stress Level and Related Metrics by Age Group with Standard Deviation Error Bars", fontsize=16)

# Bar plot for average Stress Level by Age Group with error bars
axes[0].bar(age_groups, stress_means, yerr=stress_std, capsize=5, color='skyblue')
axes[0].set_title("Average Stress Level by Age Group")
axes[0].set_ylabel("Average Stress Level (1=Low, 3=High)")

# Line plot for Social Media Usage by Age Group with error bars
axes[1].errorbar(age_groups, social_media_means, yerr=social_media_std, fmt='-o', capsize=5)
axes[1].set_title("Social Media Usage Hours by Age Group")
axes[1].set_ylabel("Average Social Media Usage Hours")

# Line plot for Physical Activity and Sleep Hours by Age Group with error bars
axes[2].errorbar(age_groups, physical_activity_means, yerr=physical_activity_std, fmt='-o', label="Physical Activity Hours", capsize=5)
axes[2].errorbar(age_groups, sleep_means, yerr=sleep_std, fmt='-o', label="Sleep Hours", capsize=5)
axes[2].set_title("Physical Activity and Sleep Hours by Age Group")
axes[2].set_ylabel("Average Hours")
axes[2].legend()

plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()

# Plotting Mental Health Analysis with error bars
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
fig.suptitle("Average Mental Health Status and Related Metrics by Age Group with Standard Deviation Error Bars", fontsize=16)

# Bar plot for average Mental Health Status by Age Group with error bars
axes[0].bar(age_groups, mental_health_means, yerr=mental_health_std, capsize=5, color='salmon')
axes[0].set_title("Average Mental Health Status by Age Group")
axes[0].set_ylabel("Average Mental Health Status (1=Poor, 4=Excellent)")

# Line plot for Social Media Usage by Age Group with error bars
axes[1].errorbar(age_groups, social_media_means, yerr=social_media_std, fmt='-o', capsize=5)
axes[1].set_title("Social Media Usage Hours by Age Group")
axes[1].set_ylabel("Average Social Media Usage Hours")

# Line plot for Physical Activity and Sleep Hours by Age Group with error bars
axes[2].errorbar(age_groups, physical_activity_means, yerr=physical_activity_std, fmt='-o', label="Physical Activity Hours", capsize=5)
axes[2].errorbar(age_groups, sleep_means, yerr=sleep_std, fmt='-o', label="Sleep Hours", capsize=5)
axes[2].set_title("Physical Activity and Sleep Hours by Age Group")
axes[2].set_ylabel("Average Hours")
axes[2].legend()

plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()
