In [None]:
import os
from pydub import AudioSegment
import glob
from itertools import zip_longest

def group_files_in_pairs(file_list):
    """Group files in pairs, skipping the last one if odd number"""
    return list(zip_longest(file_list[::2], file_list[1::2], fillvalue=None))

def merge_flac_files(file1_path, file2_path, output_path):
    """Merge two FLAC files into one"""
    # Load the first audio file
    audio1 = AudioSegment.from_file(file1_path, format="flac")
    
    # Load the second audio file if it exists
    if file2_path:
        audio2 = AudioSegment.from_file(file2_path, format="flac")
        # Concatenate the audio files
        merged_audio = audio1 + audio2
    else:
        # If there's no second file, just use the first one
        merged_audio = audio1
    
    # Create output directory if it doesn't exist
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    
    # Export the merged audio
    merged_audio.export(output_path, format="flac")

def main():
    # Source directory path
    source_dir = "SpeechRAG/libriSQA/test-clean-merged"
    # Output directory path
    output_dir = "SpeechRAG/libriSQA/twomerged"
    
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Get all FLAC files from subdirectories
    all_flac_files = []
    for root, dirs, files in os.walk(source_dir):
        for file in files:
            if file.endswith("_merged.flac"):
                all_flac_files.append(os.path.join(root, file))
    
    # # Sort the files to ensure consistent pairing
    # all_flac_files.sort()
    
    # Group files in pairs
    file_pairs = group_files_in_pairs(all_flac_files)
    
    # Process each pair
    for i, (file1, file2) in enumerate(file_pairs):
        if file1 is None:
            continue
            
        # Extract the IDs from the file paths
        id1 = os.path.basename(os.path.dirname(file1))
        id2 = os.path.basename(os.path.dirname(file2)) if file2 else "solo"
        
        # Create output filename
        output_filename = f"{id1}_{id2}_merged.flac"
        output_path = os.path.join(output_dir, output_filename)
        
        print(f"Merging files: {os.path.basename(file1)} + {os.path.basename(file2) if file2 else 'None'}")
        merge_flac_files(file1, file2, output_path)
        print(f"Created: {output_filename}")



    