**Phase 1 - Data Acquisition and Cleaning**

In [None]:
# Load libraries
library(dplyr)
library(readr)

# Load dataset
data <- read_csv("health_fitness_dataset.csv")

# Rename relevant columns with "avg_" prefix
data <- data %>%
  rename(
    user_id = participant_id,
    avg_daily_steps = daily_steps,
    avg_sleep_duration = sleep_hours
  )

# Clean data: remove NAs and duplicates
data_clean <- data %>%
  distinct() %>%
  filter(
    !is.na(user_id),
    !is.na(avg_daily_steps),
    !is.na(avg_sleep_duration)
  )

# Summarize per participant
summary_data <- data_clean %>%
  group_by(user_id) %>%
  summarise(
    avg_daily_steps = round(mean(avg_daily_steps, na.rm = TRUE), 0),
    avg_sleep_duration = round(mean(avg_sleep_duration, na.rm = TRUE), 2),
    .groups = "drop"
  ) %>%
  # Classify lifestyle category
  mutate(
    lifestyle_category = case_when(
      avg_daily_steps < 5000 ~ "Sedentary",
      avg_daily_steps < 10000 ~ "Moderately Active",
      TRUE ~ "Active"
    )
  )
# Count number of participants per lifestyle category
category_counts <- summary_data %>%
  count(lifestyle_category, name = "number_of_participants")

# View the result
print(category_counts)

cat("\n")

# View result
print(summary_data)

# Save to CSV
write_csv(summary_data, "cleaned_fitness_data.csv")

[1mRows: [22m[34m275602[39m [1mColumns: [22m[34m23[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (6): date, day_of_week, gender, activity_type, intensity, smoking_status
[32mdbl[39m (17): participant_id, age, height_cm, weight_kg, bmi, duration_minutes, ...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


[90m# A tibble: 3 × 2[39m
  lifestyle_category number_of_participants
  [3m[90m<chr>[39m[23m                               [3m[90m<int>[39m[23m
[90m1[39m Active                                639
[90m2[39m Moderately Active                    [4m1[24m635
[90m3[39m Sedentary                             726

[90m# A tibble: 3,000 × 4[39m
   user_id avg_daily_steps avg_sleep_duration lifestyle_category
     [3m[90m<dbl>[39m[23m           [3m[90m<dbl>[39m[23m              [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m             
[90m 1[39m       1            [4m7[24m518               7.02 Moderately Active 
[90m 2[39m       2            [4m7[24m507               6.95 Moderately Active 
[90m 3[39m       3           [4m1[24m[4m2[24m554               7.05 Active            
[90m 4[39m       4            [4m7[24m415               7.22 Moderately Active 
[90m 5[39m       5            [4m7[24m388               7.04 Moderately Active 
[90m 6

**Phase 2 - Statistical Analysis**

**Task A: Large-Sample Hypothesis Test**

In [None]:
# Step 1: Filter for Active users
active_group <- summary_data %>% filter(lifestyle_category == "Active")

# View result
print(active_group)
cat("\n")

# Save to CSV
write_csv(active_group, "taskA_active_group.csv")

# Step 2: Compute sample statistics
n <- nrow(active_group)
x_bar <- mean(active_group$avg_daily_steps)
s <- sd(active_group$avg_daily_steps)
mu_0 <- 10000

# Step 3: Compute z-score
z_score <- (x_bar - mu_0) / (s / sqrt(n))

# Step 4: Compute p-value (one-tailed test: mean > 10,000)
p_value <- 1 - pnorm(z_score)

# Step 5: Compile results into a clean table
test_results <- data.frame(
  Statistic = c("Sample Mean (x̄)", "Sample Std Dev (s)", "Sample Size (n)", "Z-Score", "P-Value"),
  Value = c(round(x_bar, 2), round(s, 2), n, round(z_score, 4), round(p_value, 4))
)

# Step 6: Print results
cat("=== One-Sample Z-Test Results (All Active Users) ===\n")
print(test_results)

# Step 7: Conclusion
alpha <- 0.05
if (p_value < alpha) {
  cat("Conclusion: Reject H0 — The average daily steps for Active users is significantly greater than 10,000.\n")
  cat("Reason: p-value < alpha:", p_value, "<", alpha, "\n")
} else {
  cat("Conclusion: Fail to reject H0 — Not enough evidence to say the average exceeds 10,000 steps.\n")
  cat("Reason: p-value >= alpha:", p_value, ">=", alpha, "\n")
}

[90m# A tibble: 639 × 4[39m
   user_id avg_daily_steps avg_sleep_duration lifestyle_category
     [3m[90m<dbl>[39m[23m           [3m[90m<dbl>[39m[23m              [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m             
[90m 1[39m       3           [4m1[24m[4m2[24m554               7.05 Active            
[90m 2[39m       6           [4m1[24m[4m2[24m472               7.12 Active            
[90m 3[39m      15           [4m1[24m[4m2[24m443               6.98 Active            
[90m 4[39m      22           [4m1[24m[4m2[24m187               6.88 Active            
[90m 5[39m      26           [4m1[24m[4m2[24m422               7.06 Active            
[90m 6[39m      42           [4m1[24m[4m2[24m509               6.93 Active            
[90m 7[39m      43           [4m1[24m[4m2[24m449               6.89 Active            
[90m 8[39m      50           [4m1[24m[4m2[24m213               6.93 Active            
[90m 9[39m      53

**Task B: Small-Sample Paired t-Test**

In [None]:
# Install and load required packages
library(dplyr)
library(readr)
library(tidyr)

# Load the dataset
data <- read.csv("health_fitness_dataset.csv")

# Convert date column
data$date <- as.Date(data$date)

# Classify each day as Weekday or Weekend
data$day_type <- ifelse(weekdays(data$date) %in% c("Saturday", "Sunday"), "Weekend", "Weekday")

# Filter users with at least 7 days of valid step & sleep data
user_summary <- data %>%
  group_by(participant_id) %>%
  summarise(
    days_with_steps = sum(!is.na(daily_steps)),
    days_with_sleep = sum(!is.na(sleep_hours))
  ) %>%
  filter(days_with_steps >= 7 & days_with_sleep >= 7)

# Select 10 users
selected_users <- head(user_summary$participant_id, 10)

filtered_data <- data %>% filter(participant_id %in% selected_users)

# Filter the main dataset to include only selected users
filtered_data <- data %>%
  filter(participant_id %in% selected_users)

# Calculate average weekday and weekend steps for each selected user
summary_table <- filtered_data %>%
  group_by(participant_id, day_type) %>%
  summarise(avg_steps = mean(daily_steps, na.rm = TRUE)) %>%
  pivot_wider(names_from = day_type, values_from = avg_steps) %>%
  rename(Weekday_Steps = Weekday, Weekend_Steps = Weekend)

# Display the summary table
print(summary_table)

# Calculate average step counts for weekdays and weekends
step_summary <- filtered_data %>%
  group_by(participant_id, day_type) %>%
  summarise(avg_steps = mean(daily_steps, na.rm = TRUE)) %>%
  pivot_wider(names_from = day_type, values_from = avg_steps) %>%
  drop_na()  # Remove rows where either weekday or weekend is missing

# Perform paired t-test
t_result <- t.test(step_summary$Weekday, step_summary$Weekend, paired = TRUE)

# Neatly print t-test result
cat("\n===== Paired t-test Results =====\n")
cat("t =", round(t_result$statistic, 4),
    "| df =", t_result$parameter,
    "| p-value =", round(t_result$p.value, 5), "\n")
cat("95% Confidence Interval: [",
    round(t_result$conf.int[1], 4), ",",
    round(t_result$conf.int[2], 4), "]\n")
cat("Mean Difference:", round(t_result$estimate, 2), "\n")
cat("H0: μ_weekday - μ_weekend = 0 (no difference)\n")
cat("H1: μ_weekday - μ_weekend ≠ 0 (significant difference)\n")

# Conclusion
if (t_result$p.value < 0.05) {
  cat("\nConclusion: p-value =", round(t_result$p.value, 5), "< 0.05\n")
  cat("Reject the null hypothesis. There is a statistically significant difference.\n")
} else {
  cat("\nConclusion: p-value =", round(t_result$p.value, 5), ">= 0.05\n")
  cat("Fail to reject the null hypothesis. There is no statistically significant difference.\n")
}

# Save CSV outputs
write.csv(step_summary, "user_step_summary.csv", row.names = FALSE)

result_df <- data.frame(
  t_statistic = t_result$statistic,
  df = t_result$parameter,
  p_value = t_result$p.value,
  conf_low = t_result$conf.int[1],
  conf_high = t_result$conf.int[2],
  mean_diff = t_result$estimate
)

write.csv(result_df, "paired_t_test_results.csv", row.names = FALSE)

[1m[22m`summarise()` has grouped output by 'participant_id'. You can override using
the `.groups` argument.


[90m# A tibble: 10 × 3[39m
[90m# Groups:   participant_id [10][39m
   participant_id Weekday_Steps Weekend_Steps
            [3m[90m<int>[39m[23m         [3m[90m<dbl>[39m[23m         [3m[90m<dbl>[39m[23m
[90m 1[39m              1         [4m7[24m575.         [4m7[24m383.
[90m 2[39m              2         [4m7[24m626.         [4m7[24m271.
[90m 3[39m              3        [4m1[24m[4m2[24m590.        [4m1[24m[4m2[24m447.
[90m 4[39m              4         [4m7[24m444.         [4m7[24m330 
[90m 5[39m              5         [4m7[24m485.         [4m7[24m108.
[90m 6[39m              6        [4m1[24m[4m2[24m531.        [4m1[24m[4m2[24m315.
[90m 7[39m              7         [4m7[24m783          [4m7[24m570.
[90m 8[39m              8         [4m3[24m450.         [4m3[24m523.
[90m 9[39m              9         [4m3[24m453.         [4m3[24m283.
[90m10[39m             10         [4m3[24m310.         [4m3[24m892.


[1m[22m`summarise()` has grouped output by 'participant_id'. You can override using
the `.groups` argument.



===== Paired t-test Results =====
t = 1.2971 | df = 9 | p-value = 0.22687 
95% Confidence Interval: [ -83.7481 , 308.8579 ]
Mean Difference: 112.55 
H0: μ_weekday - μ_weekend = 0 (no difference)
H1: μ_weekday - μ_weekend ≠ 0 (significant difference)

Conclusion: p-value = 0.22687 >= 0.05
Fail to reject the null hypothesis. There is no statistically significant difference.


**Task C: Independent Two-Sample t-Test**

In [None]:
# Load necessary libraries
library(dplyr)
library(readr)

# Load your cleaned data
data <- read_csv("cleaned_fitness_data.csv")

# Filter only 'Sedentary' and 'Active' lifestyle categories
filtered_data <- data %>%
  filter(lifestyle_category %in% c("Sedentary", "Active")) %>%
  select(user_id, lifestyle_category, avg_sleep_duration)

#Output to a file
write_csv(filtered_data, "task c_sample.csv")

# Display sample data
cat("=== Sample Data ===\n")
print(filtered_data)

# Separate sleep durations
sedentary_sleep <- filtered_data %>%
  filter(lifestyle_category == "Sedentary") %>%
  pull(avg_sleep_duration)

active_sleep <- filtered_data %>%
  filter(lifestyle_category == "Active") %>%
  pull(avg_sleep_duration)

# Step 1: Variance Test (F-test)
variance_test <- var.test(active_sleep, sedentary_sleep)
f_stat <- round(variance_test$statistic, 3)
f_p_value <- round(variance_test$p.value, 4)
equal_variance <- f_p_value >= 0.05

# Step 2: T-test based on variance test result
if (equal_variance) {
  t_test <- t.test(active_sleep, sedentary_sleep, var.equal = TRUE)
  test_used <- "Independent Two-Sample t-Test (Equal Variances)"
} else {
  t_test <- t.test(active_sleep, sedentary_sleep, var.equal = FALSE)
  test_used <- "Welch’s t-Test (Unequal Variances)"
}

# Step 3: Summary values
mean_active <- round(mean(active_sleep), 3)
mean_sedentary <- round(mean(sedentary_sleep), 3)
t_stat <- round(t_test$statistic, 3)
df <- round(t_test$parameter, 3)
t_p_value <- round(t_test$p.value, 4)

# Step 4: Summary Table
summary_table <- data.frame(
  Statistic = c(
    "Mean Sleep (Active)",
    "Mean Sleep (Sedentary)",
    "Variance Test (F)",
    "Variance Test P-Value",
    "T-Test Used",
    "T-Statistic",
    "Degrees of Freedom",
    "T-Test P-Value"
  ),
  Value = c(
    mean_active,
    mean_sedentary,
    f_stat,
    f_p_value,
    test_used,
    t_stat,
    df,
    t_p_value
  )
)

cat("\n=== Test Summary ===\n")
print(summary_table)

# Step 5: Conclusion
cat("\nConclusion: ")
if (t_p_value < 0.05) {
  cat("Reject H₀ — Significant difference in average sleep duration between Active and Sedentary users.\n")
  cat(paste("Reason: p-value =", t_p_value, "< 0.05\n"))
} else {
  cat("Fail to reject H₀ — No significant difference in average sleep duration between Active and Sedentary users.\n")
  cat(paste("Reason: p-value =", t_p_value, "≥ 0.05\n"))
}









[1mRows: [22m[34m3000[39m [1mColumns: [22m[34m4[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (1): lifestyle_category
[32mdbl[39m (3): user_id, avg_daily_steps, avg_sleep_duration

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


=== Sample Data ===
[90m# A tibble: 1,365 × 3[39m
   user_id lifestyle_category avg_sleep_duration
     [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m                           [3m[90m<dbl>[39m[23m
[90m 1[39m       3 Active                           7.05
[90m 2[39m       6 Active                           7.12
[90m 3[39m       8 Sedentary                        7.1 
[90m 4[39m       9 Sedentary                        7.12
[90m 5[39m      10 Sedentary                        7.04
[90m 6[39m      13 Sedentary                        7.08
[90m 7[39m      15 Active                           6.98
[90m 8[39m      16 Sedentary                        6.92
[90m 9[39m      18 Sedentary                        7.03
[90m10[39m      22 Active                           6.88
[90m# ℹ 1,355 more rows[39m

=== Test Summary ===
               Statistic                                           Value
1    Mean Sleep (Active)                                           7.044
2 Mea

**Task D: ANOVA (One-Way)**

In [None]:
# Load required libraries
install.packages("car")
library(dplyr)
library(readr)
library(car)

# Load the dataset
data <- read.csv("health_fitness_dataset.csv")

# Filter valid rows
summary_data <- data %>%
  filter(!is.na(daily_steps) & !is.na(sleep_hours)) %>%
  group_by(participant_id) %>%
  summarise(
    avg_daily_steps = mean(daily_steps, na.rm = TRUE),
    avg_sleep_quality = mean(sleep_hours, na.rm = TRUE),  # use sleep_hours as quality proxy
    .groups = "drop"
  ) %>%
  mutate(
    lifestyle_category = case_when(
      avg_daily_steps < 5000 ~ "Sedentary",
      avg_daily_steps < 10000 ~ "Moderately Active",
      TRUE ~ "Active"
    )
  )

summary_data$lifestyle_category <- as.factor(summary_data$lifestyle_category)

# View summary
print(head(summary_data))

cat("\n===== TASK D: PHASE 2 (One-Way ANOVA) =====\n")

# Levene's Test
levene_result <- leveneTest(avg_sleep_quality ~ lifestyle_category, data = summary_data)
print("Levene's Test for Homogeneity of Variances:")
print(levene_result)

# One-Way ANOVA
anova_result <- aov(avg_sleep_quality ~ lifestyle_category, data = summary_data)
anova_summary <- summary(anova_result)
print("One-Way ANOVA Results:")
print(anova_summary)

# Post-hoc if significant
if (anova_summary[[1]][["Pr(>F)"]][1] < 0.05) {
  tukey_result <- TukeyHSD(anova_result)
  print("Tukey HSD Post-hoc Test Results:")
  print(tukey_result)
} else {
  cat("ANOVA is not significant (p =", round(anova_summary[[1]][["Pr(>F)"]][1], 4), "). No post-hoc test needed.\n")
}

# Save summary & result
write.csv(summary_data, "taskD_summary.csv", row.names = FALSE)
anova_df <- as.data.frame(anova_summary[[1]]) # Corrected line
write.csv(anova_df, "taskD_anova_results.csv", row.names = FALSE)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependencies ‘colorspace’, ‘fracdiff’, ‘lmtest’, ‘timeDate’, ‘urca’, ‘zoo’, ‘RcppArmadillo’, ‘cowplot’, ‘Deriv’, ‘forecast’, ‘microbenchmark’, ‘rbibutils’, ‘numDeriv’, ‘doBy’, ‘SparseM’, ‘MatrixModels’, ‘minqa’, ‘nloptr’, ‘reformulas’, ‘Rdpack’, ‘RcppEigen’, ‘carData’, ‘abind’, ‘Formula’, ‘pbkrtest’, ‘quantreg’, ‘lme4’




**Phase 3 - R Programming Output and Reporting**

**Phase 1 - Data Acquisition and Cleaning**

In [None]:
# Load required libraries
library(ggplot2)
library(dplyr)
library(readr)

# Load ggplot2
library(ggplot2)

# Bar chart of lifestyle category counts with text labels
ggplot(category_counts, aes(x = lifestyle_category, y = number_of_participants, fill = lifestyle_category)) +
  geom_col(color = "black") +
  geom_text(aes(label = number_of_participants), vjust = -0.5, size = 5) +
  labs(
    title = "Number of Participants per Lifestyle Category",
    x = "Lifestyle Category",
    y = "Number of Participants"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

**Task A: Large-Sample Hypothesis Test**

In [None]:
# Load required libraries
library(ggplot2)
library(dplyr)
library(readr)

# Load cleaned dataset
summary_data <- read_csv("cleaned_fitness_data.csv")

# Filter for Active users
active_group <- summary_data %>%
  filter(lifestyle_category == "Active")

# Set the reference value (H0 mean)
mu_0 <- 10000

# Create boxplot
ggplot(active_group, aes(y = avg_daily_steps)) +
  geom_boxplot(fill = "#00BCD4", color = "black", outlier.color = "red") +
  geom_hline(yintercept = mu_0, linetype = "dashed", color = "red", linewidth = 1) +
  labs(
    title = "Boxplot of Average Daily Steps (Active)",
    y = "Average Daily Step Count"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5),
    axis.text = element_text(color = "black"),
    panel.grid.minor = element_blank()
  )

cat("\n")

# Create Histogram
ggplot(active_group, aes(x = avg_daily_steps)) +
  geom_histogram(
    binwidth = 500,
    aes(fill = cut(avg_daily_steps, breaks = 2)),
    color = "black",
    alpha = 0.8,
    boundary = 0,
    closed = "left"
  ) +
  scale_fill_manual(values = c("#4CAF50", "#FFC107"), guide = "none") +  # fixed '+' placement here
  geom_vline(xintercept = mu_0, linetype = "dashed", color = "red", linewidth = 1) +
  labs(
    title = "Histogram of Average Daily Step Count (Active)",
    x = "Average Daily Steps",
    y = "Frequency"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5),
    axis.text = element_text(color = "black"),
    panel.grid.minor = element_blank()
  )

cat("\n")

# Create Q-Q plot
qqnorm(active_group$avg_daily_steps,
       main = "Q-Q Plot: Daily Steps (Active)",
       col = "darkgreen")
qqline(active_group$avg_daily_steps, col = "blue", lwd = 2)

cat("\n")

**Task B: Small-Sample Paired t-Test**

In [None]:
# Boxplot
boxplot(step_summary$Weekday, step_summary$Weekend,
        names = c("Weekday", "Weekend"),
        main = "Boxplot of Step Count: Weekday vs Weekend",
        ylab = "Average Steps",
        col = c("skyblue", "lightgreen"))
        cat("\n")


# Histograms (side by side)
par(mfrow = c(1, 2))  # Split plot window
hist(step_summary$Weekday, main = "Weekday Steps Histogram",
     xlab = "Average Steps", col = "skyblue", border = "white")
hist(step_summary$Weekend, main = "Weekend Steps Histogram",
     xlab = "Average Steps", col = "lightgreen", border = "white")
     cat("\n")


# Q-Q Plots
par(mfrow = c(1, 2))  # Split plot window again
qqnorm(step_summary$Weekday, main = "Q-Q Plot: Weekday Steps")
qqline(step_summary$Weekday, col = "red")
qqnorm(step_summary$Weekend, main = "Q-Q Plot: Weekend Steps")
qqline(step_summary$Weekend, col = "blue")
cat("\n")


# Reset plotting window
par(mfrow = c(1,1))


**Task C: Independent Two-Sample t-Test**

In [None]:
# Load necessary libraries
library(ggplot2)
library(dplyr)
library(readr)

# Load the sample data from the CSV
sample_data <- read_csv("task c_sample.csv")

# Convert lifestyle_category to a factor for plotting
sample_data$lifestyle_category <- factor(sample_data$lifestyle_category, levels = c("Sedentary", "Active"))

# === BOX PLOT ===
ggplot(sample_data, aes(x = lifestyle_category, y = avg_sleep_duration, fill = lifestyle_category)) +
  geom_boxplot() +
  labs(
    title = "Boxplot of Average Sleep Duration",
    x = "Lifestyle Category",
    y = "Average Sleep Duration (hours)"
  ) +
  theme_minimal()

# === HISTOGRAMS ===
ggplot(sample_data, aes(x = avg_sleep_duration, fill = lifestyle_category)) +
  geom_histogram(position = "identity", alpha = 0.6, bins = 20) +
  facet_wrap(~ lifestyle_category) +
  labs(
    title = "Histogram of Average Sleep Duration by Lifestyle",
    x = "Average Sleep Duration (hours)",
    y = "Count"
  ) +
  theme_minimal()

# === Q-Q PLOTS ===
# Sedentary
qqnorm(sample_data$avg_sleep_duration[sample_data$lifestyle_category == "Sedentary"],
       main = "Q-Q Plot: Sedentary Sleep Duration")
qqline(sample_data$avg_sleep_duration[sample_data$lifestyle_category == "Sedentary"], col = "blue")

# Active
qqnorm(sample_data$avg_sleep_duration[sample_data$lifestyle_category == "Active"],
       main = "Q-Q Plot: Active Sleep Duration")
qqline(sample_data$avg_sleep_duration[sample_data$lifestyle_category == "Active"], col = "darkgreen")



**Task D: ANOVA (One-Way)**

In [None]:
# Task D: Phase 3 - Visualization

# Boxplot
boxplot(avg_sleep_quality ~ lifestyle_category, data = summary_data,
        main = "Boxplot of Sleep Quality by Lifestyle Category",
        xlab = "Lifestyle Category",
        ylab = "Average Sleep Quality",
        col = c("skyblue", "lightgreen", "lightpink"))
cat("\n")

# Histograms (side by side)
par(mfrow = c(1, 3))  # Split plot window into 3 columns

# Histogram for Sedentary
hist(summary_data$avg_sleep_quality[summary_data$lifestyle_category == "Sedentary"],
     main = "Sedentary",
     xlab = "Avg Sleep Quality",
     col = "skyblue", border = "white")

# Histogram for Moderately Active
hist(summary_data$avg_sleep_quality[summary_data$lifestyle_category == "Moderately Active"],
     main = "Moderately Active",
     xlab = "Avg Sleep Quality",
     col = "lightgreen", border = "white")

# Histogram for Active
hist(summary_data$avg_sleep_quality[summary_data$lifestyle_category == "Active"],
     main = "Active",
     xlab = "Avg Sleep Quality",
     col = "lightpink", border = "white")

cat("\n")

# Q-Q Plots (side by side)
par(mfrow = c(1, 3))

qqnorm(summary_data$avg_sleep_quality[summary_data$lifestyle_category == "Sedentary"],
       main = "Q-Q Plot: Sedentary")
qqline(summary_data$avg_sleep_quality[summary_data$lifestyle_category == "Sedentary"], col = "red")

qqnorm(summary_data$avg_sleep_quality[summary_data$lifestyle_category == "Moderately Active"],
       main = "Q-Q Plot: Moderately Active")
qqline(summary_data$avg_sleep_quality[summary_data$lifestyle_category == "Moderately Active"], col = "blue")

qqnorm(summary_data$avg_sleep_quality[summary_data$lifestyle_category == "Active"],
       main = "Q-Q Plot: Active")
qqline(summary_data$avg_sleep_quality[summary_data$lifestyle_category == "Active"], col = "darkgreen")

# Reset plotting window
par(mfrow = c(1,1))
