In [69]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import os

In [76]:
# Directory containing your amplitude files
file_dir = '/Users/macbookair/Desktop/ML4QS_Group125/data_aggregated/'
output_dir = '/Users/macbookair/Desktop/ML4QS_Group125/boxplots/amplitudes/'

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Get all amplitude files with .csv extension
file_paths = glob.glob(os.path.join(file_dir, 'amplitude_*.csv'))

In [78]:
# Loop through each file
for file_path in file_paths:
    print(f'Processing file: {file_path}')
    # Extract the Δt value from the file name
    delta_t = os.path.basename(file_path).split('_')[1].split('.')[0]
    
    # Load the data
    df = pd.read_csv(file_path)
    
    # Get unique values for trials, participants, languages, and tones
    trials = df['script'].unique()
    participants = df['participant'].unique()
    languages = df['language'].unique()
    tones = df['tone'].unique()
    
    # Combine participant and script into a single column for easy plotting
    df['participant_script'] = df['participant'].astype(str) + '-' + df['script']
    
    # Create a 2x2 subplot
    fig, axs = plt.subplots(2, 2, figsize=(12, 6))  # Adjust figsize to control overall plot size
    fig.tight_layout(pad=4.0)  # Adjust padding between plots
    
    # Flatten the axs array for easy indexing
    axs = axs.flatten()
    
    # Counter for subplot indexing
    plot_idx = 0
    
    # Loop through each unique combination of language and tone
    for tone in tones:
        for language in languages:
            # Filter dataframe for the current language and tone
            df_lang_tone = df[(df['tone'] == tone) & (df['language'] == language)]
            
            # Create boxplots with participant-script on the x-axis in the current subplot
            sns.boxplot(ax=axs[plot_idx], x='participant_script', y='amplitude_mean', data=df_lang_tone)
            if tone == 'bus':
                axs[plot_idx].set_title(f'{language.upper()} - {tone.capitalize()}iness')
            else:
                axs[plot_idx].set_title(f'{language.upper()} - {tone.capitalize()}')
            axs[plot_idx].set_xlabel("")  # Remove x-axis label
            axs[plot_idx].set_ylabel("Amplitude", fontsize=15)
            axs[plot_idx].tick_params(axis='x', rotation=10, labelsize=10)
            axs[plot_idx].tick_params(axis='y', labelsize=8)
        
            # Increment the subplot index
            plot_idx += 1
    
    # Put a title on the whole plot at the bottom
    plt.figtext(0.5, 0.01, f'Δt = {delta_t}s', ha='center', fontsize=20)
    
    # Save the plot
    output_file_path = os.path.join(output_dir, f'boxplot_dt_{delta_t}.png')
    plt.savefig(output_file_path)
    prınt(f'Plot saved to: {output_file_path}')
    # Close the plot to free memory
    plt.close(fig)

Processing file: /Users/macbookair/Desktop/ML4QS_Group125/data_aggregated/amplitude_0.1.csv
Processing file: /Users/macbookair/Desktop/ML4QS_Group125/data_aggregated/amplitude_0.5.csv
Processing file: /Users/macbookair/Desktop/ML4QS_Group125/data_aggregated/amplitude_2.csv
Processing file: /Users/macbookair/Desktop/ML4QS_Group125/data_aggregated/amplitude_1.csv
Processing file: /Users/macbookair/Desktop/ML4QS_Group125/data_aggregated/amplitude_5.csv
