In [None]:
import os
import csv
import torchaudio
import pandas as pd

# Define the paths to the data files and folders
audio_csv_path = 'path/to/audio.csv'
info_csv_path = 'path/to/info.csv'
audio_folder_path = 'path/to/audio/'
alignments_folder_path = 'path/to/alignments/'
notes_folder_path = 'path/to/notes/'

# Load the audio metadata from the audio.csv file into a pandas DataFrame
audio_metadata = pd.read_csv(audio_csv_path)

# Load the performance metadata from the info.csv file into a pandas DataFrame
performance_metadata = pd.read_csv(info_csv_path)

# Loop through the rows of the performance metadata DataFrame
for _, row in performance_metadata.iterrows():
    # Load the audio file for this performance
    audio_path = os.path.join(audio_folder_path, row['filename'] + '.mp3')
    waveform, sample_rate = torchaudio.load(audio_path)
    # Downsample the audio to 16KHz
    if sample_rate != 16000:
        waveform = torchaudio.transforms.Resample(sample_rate, 16000)(waveform)
    
    # Loop through the movements of this performance
    for movement in range(1, row['m'] + 1):
        # Load the alignment for this movement
        alignment_path = os.path.join(alignments_folder_path, row['filename'], 'movement{}.csv'.format(movement))
        with open(alignment_path, 'r') as f:
            alignment_reader = csv.reader(f)
            alignment = [(float(row[0]), float(row[1])) for row in alignment_reader]
        
        # Load the notes for this movement
        notes_path = os.path.join(notes_folder_path, row['filename'], 'movement{}.csv'.format(movement))
        notes = pd.read_csv(notes_path)
        
        # Align the notes to the audio
        movement_start = row['start'] + row['source_filename'] + sum(row['repeat1'][:movement-1])
        movement_end = movement_start + row['repeat1'][movement-1] + row['length'] + row['repeat2'][movement-1]
        alignment_offset = alignment[0][0]
        notes_start = movement_start + alignment_offset
        notes_end = movement_end + alignment_offset
        notes['onset'] += notes_start
        notes['offset'] += notes_start
        notes = notes[(notes['onset'] >= notes_start) & (notes['offset'] <= notes_end)]
        
        # Cut the audio to the same length as the notes
        start_offset = max(0, notes_start - movement_start)
        end_offset = max(0, movement_end - notes_end)
        start_frame = int(start_offset * sample_rate)
        end_frame = len(waveform[0]) - int(end_offset * sample_rate)
        waveform = waveform[:, start_frame:end_frame]
        
        # Save the aligned audio and notes for this movement
        audio_filename = '{}_{}_movement{}.mp3'.format(row['filename'], row['violinist'], movement)
        audio_path = os.path.join(audio_folder_path, audio_filename)
        torchaudio.save(audio_path, waveform, sample_rate)
        notes_filename = '{}_{}_movement{}.csv'.format(row['filename'], row['violinist'], movement)
        notes_path = os.path.join(notes_folder_path, notes_filename)
        notes.to_csv(notes_path, index=False)
