# Post-hoc REPP Analysis Pipeline

This notebook performs post-hoc analysis of tapping data collected from psynet experiments using the REPP.

## Overview

The pipeline processes participant audio recordings from tapping experiments and performs beat detection analysis to extract rhythmic patterns and onsets. It handles data preprocessing, stimulus information extraction, and generates visualization plots for each recording.

## Workflow

1. **Data Loading**: Loads participant data from a specified base directory containing:
   - `TapTrialMusic.csv`: Trial and stimulus metadata
   - Audio recordings in participant-specific directories

2. **Audio Processing**: 
   - Converts stereo recordings to mono
   - Resamples audio to 44.1 kHz if needed
   - Saves processed WAV files in PCM_16 format

3. **Stimulus Information Extraction**:
   - Extracts trial metadata from CSV files
   - Saves stimulus information as JSON files for each recording

4. **REPP Beat Detection**:
   - Runs REPP analysis on each participant recording
   - Configures detection parameters (extraction thresholds, window sizes)
   - Generates beat detection plots and extracts onset times
   - Displays visualization plots for each analyzed recording

## Output

For each participant, the pipeline generates:
- Converted audio files (`.wav`)
- Stimulus information files (`.json`)
- Beat detection analysis plots (`.png`)


In [None]:
import os
import json
import pandas as pd
import soundfile as sf
import librosa
import json
from importlib import reload
from custom_config import sms_tapping
import matplotlib.image as mpimg
from matplotlib import pyplot as plt

from post_repp_pipeline import load_stim_info_from_csv

from repp_beatfinding.beat_detection import (
    do_beat_detection_analysis,
) 
from repp.config import ConfigUpdater

# Set Parent directory path for Assets dir

In [None]:
# configure paths
base_dir = r"D:\pyspace\Djembe\psynet\data_2025\November-2025\mali-group1-nov9"     # Set base directory here
output_dir = r"output"

TapTrialMusic_path = os.path.join(base_dir, "data", "TapTrialMusic.csv")
TapTrialMusic_df = pd.read_csv(TapTrialMusic_path)

print("Sub-directories of assets", os.listdir(os.path.join(base_dir, "assets")))
print("Participant Ids:", TapTrialMusic_df['participant_id'].unique())

## Choose sub dir and participant id

In [None]:
choose_sub_dir = "Task 1"
choose_participant_id = 7

In [None]:
output_participant_dir = os.path.join(output_dir, f"participant_{choose_participant_id}")
participant_dir = os.path.join(base_dir, "assets", choose_sub_dir, "participants" , f"participant_{choose_participant_id}")
 
if os.path.exists(participant_dir):
    participant_audio_fnames = [f for f in os.listdir(participant_dir) if f.endswith('.wav')]
    os.makedirs(output_participant_dir, exist_ok=True)
else:
    raise ValueError(f"Participant directory does not exist. Choose another participant id.")

## Convert wav and extract stim_info per participant id

In [None]:
audio_stim_pairs = []
for audio_fname in participant_audio_fnames:
    audio_basename = audio_fname.strip(".wav")
    parts = audio_fname.split("__")
    # node_id  = int(parts[0].split("_")[1])
    trial_id = int(parts[1].split("_")[1])
    
    audio_path = os.path.join(participant_dir, audio_fname)
    
    audio_stim_tup = (audio_basename, audio_fname, f"{audio_basename}_stim_info.json") 
    audio_stim_pairs.append(audio_stim_tup)

    ###########  Convert and save WAV file
    data, fs = sf.read(audio_path)
    # Convert to mono if stereo
    if len(data.shape) == 2:
        data = np.mean(data, axis=1)
   
    if fs != 44100:
        data = librosa.resample(data, orig_sr=fs, target_sr=44100)
        fs = 44100
        
    output_audio_path = os.path.join(output_participant_dir, audio_fname)
    if not os.path.exists(output_audio_path):
        sf.write(output_audio_path, data, fs, subtype='PCM_16')
        print(f"WAV converted and saved to {output_participant_dir}")

   
    ###########  Stims info from CSV
    stim_info = load_stim_info_from_csv(trial_id, TapTrialMusic_df)

    # save stim_info to json
    stim_info_json_path = os.path.join(output_participant_dir, f"{audio_basename}_stim_info.json")
    
    if not os.path.exists(stim_info_json_path):
        with open(stim_info_json_path, 'w') as f:
            json.dump(stim_info, f, indent=4)
        print("stim_info saved:", stim_info)
  
participant_stim_info_fnames = [f for f in os.listdir(output_participant_dir) if f.endswith('.json')]  

### Repp Analysis

In [None]:

# Beat Finding Analysis
# import repp_beatfinding.beat_detection as beat_detection
# reload(beat_detection)


# from repp.config import sms_tapping

long_tapping= ConfigUpdater.create_config(
    sms_tapping,
    {
        'EXTRACT_THRESH': [0.19, 0.2],
        'EXTRACT_COMPRESS_FACTOR': 1,
        'EXTRACT_FIRST_WINDOW': [18, 18],
        'EXTRACT_SECOND_WINDOW': [26, 60],
        ## TODO: add a parameter that extend the MARKER ERROR THRESHOLD to 20.

    }
)

for recording_basename, recording_fname, stim_info_fname in audio_stim_pairs:

    # Define filenames for outputs
    filenames = {
        'stim_info_file': stim_info_fname,
        'audio_filename': 'stim_audio.wav',
        'recording_filename': recording_fname,
        'plot_filename': f'{recording_basename}.png',
        'title_plot': 'Beat Finding Analysis'
    }


    with open(os.path.join(output_participant_dir, filenames['stim_info_file']), 'r') as f:
        stim_info = json.load(f)
    print("-------------------------------------------------\n Running REPP\n")

    output, extracted_onsets, stats = do_beat_detection_analysis(
        os.path.join(output_participant_dir, filenames['recording_filename']),
        filenames['title_plot'],
        os.path.join(output_participant_dir, filenames['plot_filename']),
        stim_info=stim_info, config=long_tapping
    )
    print("extracted onsets:-----------------------------\n")
    print(extracted_onsets)
    # and show the plot here
    print("-------------------------------------------------\n")

    %matplotlib inline
    plt.clf()
    plt.figure(figsize=(14, 12))  # Adjust these numbers as needed
    img = mpimg.imread(os.path.join(output_participant_dir, filenames['plot_filename']))
    imgplot = plt.imshow(img)
    plt.axis('off')

    plt.tight_layout()

