# Meditation App Impact on Sleep Analysis (N=4)

**Project Goal:** To quantitatively test specific hypotheses about the effect of meditation apps on sleep latency and efficiency using data from a small user study (N=4).

**Methodology:**
1.  **Data Preparation:** Load data, calculate participant averages for key metrics (Sleep Latency, Sleep Efficiency) under different conditions (Baseline, Intervention, Guided Meditation, Music-based Meditation).
2.  **Descriptive Statistics & Exploration:** Analyze the calculated averages and visualize trends.
3.  **Inferential Statistics:** Use the Wilcoxon Signed-Rank test (non-parametric paired test appropriate for N=4) to test hypotheses about changes between conditions.
4.  **Correlation Analysis (Optional):** Examine the relationship between objective sleep metrics and subjective self-reports using Spearman correlation.

**Output:**
- All textual results will be saved to `analysis_results.txt`.
- All generated figures will be saved to the `figures/` directory.

**Libraries:**

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
from contextlib import redirect_stdout
import io # To capture df.info()

# Configure plots
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_context('notebook')

# --- Output Setup ---
output_txt_file = 'analysis_results.txt'
figures_dir = 'figures'

# Create figures directory if it doesn't exist
os.makedirs(figures_dir, exist_ok=True)
print(f"Text outputs will be saved to: {output_txt_file}")
print(f"Figures will be saved to: {figures_dir}/")

# Clear the output file at the start
with open(output_txt_file, 'w') as f_out:
    f_out.write("--- Analysis Results ---\n\n")

Text outputs will be saved to: analysis_results.txt
Figures will be saved to: figures/


## Section 1: Setup & Data Preparation

Load the dataset and prepare it for analysis. Output from this section will be appended to the results file.

In [2]:
# Append results to the file using context manager
with open(output_txt_file, 'a') as f_out, redirect_stdout(f_out):
    print("\n--- Section 1: Setup & Data Preparation ---")
    
    # Define file path 
    file_path = 'Participant Data.csv'
    print(f"\nUsing data file: {file_path}")
    
    # --- 1. Load Data ---
    try:
        df = pd.read_csv(file_path)
        print(f"\nSuccessfully loaded data.")
        print(f"Dataset shape: {df.shape}")
        
        # Write head() output manually
        f_out.write("\nFirst 5 rows:\n")
        f_out.write(df.head().to_string() + "\n")
        
        # Write info() output manually using a string buffer
        f_out.write("\nData Info:\n")
        # Capture info() output to a string buffer
        buffer = io.StringIO()
        df.info(buf=buffer)
        info_output = buffer.getvalue()
        f_out.write(info_output + "\n")

    except FileNotFoundError:
        # If file not found, print to console and write to file, then raise error
        error_msg = f"Error: The file '{file_path}' was not found. Please ensure the CSV file is in the correct directory."
        print(error_msg, file=sys.stderr) # Print error to console stderr
        f_out.write(f"\n{error_msg}\n")
        raise
        
    # --- 2. Define Conditions & Filter Data ---
    print("\n--- Filtering Data by Condition ---")
    baseline_df = df[df['Day'].isin([1, 2, 3, 4])].copy()
    intervention_df = df[(df['Day'].isin([5, 6, 7, 8, 9, 10])) & (df['Meditation Used (Y/N)'] == 'Y')].copy()
    guided_df = intervention_df[intervention_df['Meditation Type'] == 'Guided Voice'].copy()
    music_df = intervention_df[intervention_df['Meditation Type'] == 'Music-based'].copy()

    print(f"Baseline nights count: {len(baseline_df)}")
    print(f"Intervention nights (meditation used) count: {len(intervention_df)}")
    print(f"Guided Voice nights count: {len(guided_df)}")
    print(f"Music-based nights count: {len(music_df)}")

    # --- 3. Calculate Participant Averages per Condition ---
    print("\n--- Calculating Participant Averages ---")
    metrics_to_average = ['Sleep Latency (m)', 'Sleep Efficiency']
    avg_baseline = baseline_df.groupby('Participant')[metrics_to_average].mean().reset_index()
    avg_baseline = avg_baseline.rename(columns={'Sleep Latency (m)': 'Avg_Baseline_Latency', 'Sleep Efficiency': 'Avg_Baseline_Efficiency'})
    avg_intervention = intervention_df.groupby('Participant')[metrics_to_average].mean().reset_index()
    avg_intervention = avg_intervention.rename(columns={'Sleep Latency (m)': 'Avg_Intervention_Latency', 'Sleep Efficiency': 'Avg_Intervention_Efficiency'})
    avg_guided = guided_df.groupby('Participant')['Sleep Latency (m)'].mean().reset_index()
    avg_guided = avg_guided.rename(columns={'Sleep Latency (m)': 'Avg_Guided_Latency'})
    avg_music = music_df.groupby('Participant')['Sleep Latency (m)'].mean().reset_index()
    avg_music = avg_music.rename(columns={'Sleep Latency (m)': 'Avg_Music_Latency'})

    # --- 4. Create Summary DataFrame ---
    print("\n--- Creating Summary DataFrame ---")
    summary_df = pd.merge(avg_baseline, avg_intervention, on='Participant', how='outer')
    summary_df = pd.merge(summary_df, avg_guided, on='Participant', how='outer')
    summary_df = pd.merge(summary_df, avg_music, on='Participant', how='outer')
    summary_df = summary_df.set_index('Participant')

    if summary_df.isnull().any().any():
        print("\nWarning: Missing values detected in summary_df:")
        print(summary_df.isnull().sum())
        
    # Write summary_df to file
    f_out.write("\n--- Participant Summary Averages ---\n")
    f_out.write(summary_df.to_string() + "\n")

print(f"Section 1 processing complete. Results appended to {output_txt_file}")
# We need summary_df and df for later sections
global_summary_df = summary_df.copy()
global_df = df.copy()

Section 1 processing complete. Results appended to analysis_results.txt


## Section 2: Descriptive Statistics & Exploration

Examine descriptive statistics and visualize trends based on the participant averages. Output appended to results file, figures saved.

In [3]:
# Use the global df copy
summary_df = global_summary_df 

with open(output_txt_file, 'a') as f_out, redirect_stdout(f_out):
    print("\n\n--- Section 2: Descriptive Statistics & Exploration ---")
    
    # --- Descriptive Statistics for Participant Averages ---
    print("\n--- Descriptive Statistics for Participant Averages ---")
    f_out.write(summary_df.describe().to_string() + "\n") # Write describe() output
    
    # --- Calculate Delta Scores ---
    print("\n--- Calculating Delta Scores (Changes) ---")
    summary_df['Latency_Delta (Intervention-Baseline)'] = summary_df['Avg_Intervention_Latency'] - summary_df['Avg_Baseline_Latency']
    summary_df['Efficiency_Delta (Intervention-Baseline)'] = summary_df['Avg_Intervention_Efficiency'] - summary_df['Avg_Baseline_Efficiency']
    summary_df['Latency_Delta (Guided-Music)'] = summary_df['Avg_Guided_Latency'] - summary_df['Avg_Music_Latency']
    
    print("\n--- Descriptive Statistics for Delta Scores ---")
    delta_cols = [
        'Latency_Delta (Intervention-Baseline)', 
        'Efficiency_Delta (Intervention-Baseline)', 
        'Latency_Delta (Guided-Music)'
    ]
    # Write delta describe() output
    f_out.write(summary_df[delta_cols].describe().to_string() + "\n")

# --- Visualizations (Saving happens outside the redirect_stdout context) ---
print("\n--- Generating and Saving Plots for Section 2 ---")

# 1. Box Plots of Participant Averages
fig_box, axes_box = plt.subplots(1, 3, figsize=(18, 5))
sns.boxplot(data=summary_df[['Avg_Baseline_Latency', 'Avg_Intervention_Latency']], ax=axes_box[0])
axes_box[0].set_title('Avg Latency: Baseline vs Intervention (N=4)')
axes_box[0].set_ylabel('Average Sleep Latency (m)')
sns.boxplot(data=summary_df[['Avg_Baseline_Efficiency', 'Avg_Intervention_Efficiency']], ax=axes_box[1])
axes_box[1].set_title('Avg Efficiency: Baseline vs Intervention (N=4)')
axes_box[1].set_ylabel('Average Sleep Efficiency (%)')
sns.boxplot(data=summary_df[['Avg_Guided_Latency', 'Avg_Music_Latency']], ax=axes_box[2])
axes_box[2].set_title('Avg Latency: Guided vs Music (N=4)')
axes_box[2].set_ylabel('Average Sleep Latency (m)')
fig_box.suptitle('Box Plots of Participant Averages Across Conditions', y=1.02)
plt.tight_layout()
boxplot_filename = os.path.join(figures_dir, 'boxplots_participant_averages.png')
fig_box.savefig(boxplot_filename, bbox_inches='tight')
plt.close(fig_box) # Close the figure to free memory
print(f"Saved box plot to: {boxplot_filename}")

# 2. Paired Before-After Plots
fig_paired, axes_paired = plt.subplots(1, 3, figsize=(18, 5))
participants = summary_df.index
# Latency: Baseline vs Intervention
axes_paired[0].plot([summary_df['Avg_Baseline_Latency'], summary_df['Avg_Intervention_Latency']], marker='o')
axes_paired[0].set_xticks([0, 1]); axes_paired[0].set_xticklabels(['Baseline', 'Intervention'])
axes_paired[0].set_ylabel('Average Sleep Latency (m)'); axes_paired[0].set_title('Paired Avg Latency: Baseline vs Intervention')
for i, p in enumerate(participants): 
    axes_paired[0].text(0, summary_df['Avg_Baseline_Latency'].iloc[i], f' {p}', va='center'); axes_paired[0].text(1, summary_df['Avg_Intervention_Latency'].iloc[i], f' {p}', va='center')
axes_paired[0].set_xlim(-0.2, 1.2)
# Efficiency: Baseline vs Intervention
axes_paired[1].plot([summary_df['Avg_Baseline_Efficiency'], summary_df['Avg_Intervention_Efficiency']], marker='o')
axes_paired[1].set_xticks([0, 1]); axes_paired[1].set_xticklabels(['Baseline', 'Intervention'])
axes_paired[1].set_ylabel('Average Sleep Efficiency (%)'); axes_paired[1].set_title('Paired Avg Efficiency: Baseline vs Intervention')
for i, p in enumerate(participants): 
    axes_paired[1].text(0, summary_df['Avg_Baseline_Efficiency'].iloc[i], f' {p}', va='center'); axes_paired[1].text(1, summary_df['Avg_Intervention_Efficiency'].iloc[i], f' {p}', va='center')
axes_paired[1].set_xlim(-0.2, 1.2)
# Latency: Guided vs Music
valid_comparison_h3 = summary_df[['Avg_Guided_Latency', 'Avg_Music_Latency']].dropna()
if not valid_comparison_h3.empty:
    axes_paired[2].plot([valid_comparison_h3['Avg_Music_Latency'], valid_comparison_h3['Avg_Guided_Latency']], marker='o')
    axes_paired[2].set_xticks([0, 1]); axes_paired[2].set_xticklabels(['Music', 'Guided'])
    axes_paired[2].set_ylabel('Average Sleep Latency (m)'); axes_paired[2].set_title(f'Paired Avg Latency: Music vs Guided (N={len(valid_comparison_h3)})')
    for i, p in enumerate(valid_comparison_h3.index): 
        axes_paired[2].text(0, valid_comparison_h3['Avg_Music_Latency'].loc[p], f' {p}', va='center'); axes_paired[2].text(1, valid_comparison_h3['Avg_Guided_Latency'].loc[p], f' {p}', va='center')
    axes_paired[2].set_xlim(-0.2, 1.2)
else:
    axes_paired[2].text(0.5, 0.5, 'Not enough paired data\nfor Guided vs Music', ha='center', va='center'); axes_paired[2].set_title('Paired Avg Latency: Music vs Guided')
fig_paired.suptitle('Paired Plots of Participant Averages Across Conditions', y=1.02)
plt.tight_layout()
pairedplot_filename = os.path.join(figures_dir, 'pairedplots_participant_averages.png')
fig_paired.savefig(pairedplot_filename, bbox_inches='tight')
plt.close(fig_paired) # Close the figure
print(f"Saved paired plot to: {pairedplot_filename}")

print(f"Section 2 processing complete. Results appended to {output_txt_file}")


--- Generating and Saving Plots for Section 2 ---
Saved box plot to: figures\boxplots_participant_averages.png
Saved paired plot to: figures\pairedplots_participant_averages.png
Section 2 processing complete. Results appended to analysis_results.txt


## Section 3: Inferential Statistics - Hypothesis Testing

Test hypotheses using Wilcoxon Signed-Rank test. Results appended to file.

In [4]:
# Use the global df copy
summary_df = global_summary_df 

# Define functions first (outside the context manager)
def calculate_effect_size_r(z_stat, n_pairs):
    if n_pairs <= 0 or pd.isna(z_stat):
        return np.nan
    return z_stat / np.sqrt(n_pairs)

# Modified function to work within redirect_stdout
def run_wilcoxon_and_print(data1, data2, alternative, hypothesis_name, f_out):
    f_out.write(f"\n--- Testing Hypothesis: {hypothesis_name} ---\n")
    
    combined = pd.DataFrame({'data1': data1, 'data2': data2}).dropna()
    n_pairs = len(combined)
    
    if n_pairs < 4: 
        f_out.write(f"Skipping test: Only {n_pairs} valid pairs found. Insufficient data.\n")
        return
        
    d1 = combined['data1']
    d2 = combined['data2']
    
    median1 = d1.median()
    median2 = d2.median()
    f_out.write(f"Comparing medians: {median1:.2f} vs {median2:.2f}\n")
    f_out.write(f"Number of pairs (N): {n_pairs}\n")
    
    try:
        stat, p_value = stats.wilcoxon(d1, d2, alternative=alternative, zero_method='pratt', correction=False)
        
        mu_W = n_pairs * (n_pairs + 1) / 4
        sigma_W = np.sqrt(n_pairs * (n_pairs + 1) * (2 * n_pairs + 1) / 24)
        
        if sigma_W > 0:
            z_stat_approx = (stat - mu_W) / sigma_W
            effect_size_r = calculate_effect_size_r(z_stat_approx, n_pairs)
            f_out.write(f"Approximate Z-statistic: {z_stat_approx:.3f}\n")
            f_out.write(f"Effect Size (r = Z/sqrt(N)): {effect_size_r:.3f}\n")
        else:
            z_stat_approx = np.nan
            effect_size_r = np.nan
            f_out.write("Warning: Could not calculate Z-statistic approximation (sigma_W=0).\n")
            f_out.write(f"Approximate Z-statistic: {z_stat_approx}\n")
            f_out.write(f"Effect Size (r = Z/sqrt(N)): {effect_size_r}\n")

        f_out.write(f"Wilcoxon Test Statistic (W): {stat:.2f}\n")
        f_out.write(f"P-value (one-tailed, {alternative}): {p_value:.4f}\n")
        
        alpha = 0.05
        if p_value < alpha:
            f_out.write(f"Conclusion: Reject H₀ (p < {alpha}). Evidence supports the alternative hypothesis.\n")
        else:
            f_out.write(f"Conclusion: Fail to reject H₀ (p >= {alpha}). Insufficient evidence for the alternative hypothesis.\n")
            
    except ValueError as e:
        f_out.write(f"Could not perform Wilcoxon test: {e}\n")
    except Exception as e:
         f_out.write(f"An unexpected error occurred during Wilcoxon test: {e}\n")

# Now run the tests within the context manager
with open(output_txt_file, 'a') as f_out:
    # No redirect_stdout needed here as the function writes directly to f_out
    f_out.write("\n\n--- Section 3: Inferential Statistics - Hypothesis Testing ---\n")
    
    # H1: Latency (Baseline vs Intervention)
    run_wilcoxon_and_print(
        summary_df['Avg_Baseline_Latency'], 
        summary_df['Avg_Intervention_Latency'], 
        alternative='greater', # H1: Intervention < Baseline --> Baseline > Intervention
        hypothesis_name="H1: Latency (Baseline vs Intervention)",
        f_out=f_out
    )
    
    # H2: Efficiency (Intervention vs Baseline)
    run_wilcoxon_and_print(
        summary_df['Avg_Intervention_Efficiency'], # Data 1
        summary_df['Avg_Baseline_Efficiency'],    # Data 2
        alternative='greater', # H1: Intervention > Baseline
        hypothesis_name="H2: Efficiency (Intervention vs Baseline)",
        f_out=f_out
    )
    
    # H3: Latency (Guided vs Music)
    run_wilcoxon_and_print(
        summary_df['Avg_Guided_Latency'], # Data 1
        summary_df['Avg_Music_Latency'],  # Data 2
        alternative='less', # H1: Guided < Music
        hypothesis_name="H3: Latency (Guided vs Music)",
        f_out=f_out
    )

print(f"Section 3 processing complete. Results appended to {output_txt_file}")

Section 3 processing complete. Results appended to analysis_results.txt


## Section 4: Correlation Analysis

Test relationship between objective and subjective metrics using Spearman correlation on per-night data. Results appended to file, figures saved.

In [5]:
# Use the global df copy
df = global_df

with open(output_txt_file, 'a') as f_out, redirect_stdout(f_out):
    print("\n\n--- Section 4: Correlation Analysis (Optional) ---")
    
    print("\n--- Preparing Data for Correlation ---")
    corr_df = df[['Sleep Latency (m)', 'Sleep Efficiency', 'Self-reported Sleep Quality (1-10)']].copy()
    corr_df['Self-reported Sleep Quality (1-10)'] = pd.to_numeric(corr_df['Self-reported Sleep Quality (1-10)'], errors='coerce')
    corr_df_cleaned = corr_df.dropna()
    print(f"Number of nights with complete data for correlation: {len(corr_df_cleaned)}")

    if len(corr_df_cleaned) < 3:
        print("\nInsufficient data points (<3) for meaningful descriptive statistics or correlation after handling NaNs.")
    else:
        # --- Descriptive Statistics for Correlation Variables ---
        print("\n--- Descriptive Statistics for Variables in H4 (Per-Night Data) ---")
        f_out.write(corr_df_cleaned.describe().to_string() + "\n") # Write describe() output
        
        # --- Spearman Correlation Tests ---
        print("\n--- Testing H4: Correlations (Objective vs. Subjective Quality) ---")
        alpha_corr = 0.05
        
        # Correlation 1: Latency vs. Subjective Quality
        print(f"\nCorrelation: Sleep Latency vs. Self-reported Quality")
        rho_lat, p_lat = stats.spearmanr(corr_df_cleaned['Sleep Latency (m)'], corr_df_cleaned['Self-reported Sleep Quality (1-10)'], nan_policy='omit')
        print(f"Spearman's rho: {rho_lat:.3f}")
        print(f"P-value: {p_lat:.4f}")
        if p_lat < alpha_corr:
            print(f"Conclusion: Significant correlation found (p < {alpha_corr}). Reject H₀.")
        else:
            print(f"Conclusion: No significant correlation found (p >= {alpha_corr}). Fail to reject H₀.")
        
        # Correlation 2: Efficiency vs. Subjective Quality
        print(f"\nCorrelation: Sleep Efficiency vs. Self-reported Quality")
        rho_eff, p_eff = stats.spearmanr(corr_df_cleaned['Sleep Efficiency'], corr_df_cleaned['Self-reported Sleep Quality (1-10)'], nan_policy='omit')
        print(f"Spearman's rho: {rho_eff:.3f}")
        print(f"P-value: {p_eff:.4f}")
        if p_eff < alpha_corr:
            print(f"Conclusion: Significant correlation found (p < {alpha_corr}). Reject H₀.")
        else:
            print(f"Conclusion: No significant correlation found (p >= {alpha_corr}). Fail to reject H₀.")

# --- Visualization for Correlation Variables (Saving happens outside the redirect_stdout context) ---
print("\n--- Generating and Saving Plots for Section 4 ---")
if len(corr_df_cleaned) >= 3:
    fig_hist, axes_hist = plt.subplots(1, 3, figsize=(15, 4))
    sns.histplot(corr_df_cleaned['Sleep Latency (m)'], kde=True, ax=axes_hist[0])
    axes_hist[0].set_title('Distribution of Sleep Latency (m)')
    sns.histplot(corr_df_cleaned['Sleep Efficiency'], kde=True, ax=axes_hist[1])
    axes_hist[1].set_title('Distribution of Sleep Efficiency (%)')
    sns.histplot(corr_df_cleaned['Self-reported Sleep Quality (1-10)'], kde=False, discrete=True, ax=axes_hist[2])
    axes_hist[2].set_title('Distribution of Self-reported Quality')
    fig_hist.suptitle('Distributions of Variables Used in Correlation Analysis (Per-Night Data)', y=1.02)
    plt.tight_layout()
    hist_filename = os.path.join(figures_dir, 'histograms_correlation_variables.png')
    fig_hist.savefig(hist_filename, bbox_inches='tight')
    plt.close(fig_hist) # Close the figure
    print(f"Saved histogram plot to: {hist_filename}")
else:
    print("Skipping histogram generation due to insufficient data.")

print(f"Section 4 processing complete. Results appended to {output_txt_file}")
print("\n--- Analysis Finished --- \n")


--- Generating and Saving Plots for Section 4 ---
Saved histogram plot to: figures\histograms_correlation_variables.png
Section 4 processing complete. Results appended to analysis_results.txt

--- Analysis Finished --- 

