In [13]:
from music21 import converter, key, interval
from tqdm import tqdm
import os

In [14]:
import os
from music21 import converter, key, interval

def transpose_to_c_major_a_minor_bulk(input_root, output_root):
    """
    Recursively transposes all MIDI (.midi or .mid) files in the input directory to C major or A minor,
    and saves them in the output directory, preserving the folder structure.

    Args:
        input_root (str): Root directory containing the input files.
        output_root (str): Root directory to save the transposed files.
    """
    # Collect all .mid or .midi files and their relative paths
    midi_files = []
    for root, _, files in os.walk(input_root):
        for file in files:
            if file.endswith('.mid') or file.endswith('.midi'):
                input_path = os.path.join(root, file)
                relative_path = os.path.relpath(root, input_root)  # Subfolder path relative to input root
                output_dir = os.path.join(output_root, relative_path)
                os.makedirs(output_dir, exist_ok=True)  # Create the output subfolder if it doesn't exist
                output_path = os.path.join(output_dir, file)
                midi_files.append((input_path, output_path))
    
    # Process files with a progress bar
    for input_path, output_path in tqdm(midi_files, desc="Processing files", unit="file"):
        try:
            transpose_to_c_major_a_minor_single(input_path, output_path)
        except Exception as e:
            print(f"Error processing {input_path}: {e}")
# end transpose_to_c_major_a_minor_bulk

def transpose_to_c_major_a_minor_single(input_path, output_path):
    """
    Transposes a single MIDI file to C major or A minor if necessary, and saves the result.

    Args:
        input_path (str): Path to the input MusicXML file.
        output_path (str): Path to save the transposed MusicXML file.
    """
    # Load the MIDI file
    score = converter.parse(input_path)
    
    # Analyze the key
    original_key = score.analyze('key')
    # print(f"Processing {input_path} | Original key: {original_key}")
    
    # Check if the key is already C major or A minor
    if (original_key.tonic.name == 'C' and original_key.mode == 'major') or \
       (original_key.tonic.name == 'A' and original_key.mode == 'minor'):
        # print(f"The piece is already in C major or A minor. Skipping {input_path}.")
        return
    
    # Determine the target key (C major or A minor)
    target_key = key.Key('C') if original_key.mode == 'major' else key.Key('A')
    
    # Calculate the transposition interval
    transposition_interval = interval.Interval(original_key.tonic, target_key.tonic)
    
    # Transpose the score
    transposed_score = score.transpose(transposition_interval)

    # for harmony in transposed_score.recurse().getElementsByClass('Harmony'):
    #     print(f"Chord: {harmony.figure}, Bass: {harmony.bass()}")

    for harmony in transposed_score.recurse().getElementsByClass('Harmony'):
        if '/' in harmony.figure:  # Check for slash chords
            root, bass = harmony.figure.split('/')
            harmony.figure = f"{root}/{bass}"
        elif harmony.bass():  # Ensure inversion info is retained
            harmony.figure = f"{harmony.figure}/{harmony.bass().name}"
    
    # Save the transposed score to the output file
    # transposed_score.write('musicxml', fp=output_path)
    transposed_score.write('midi', fp=output_path)
    # print(f"Transposed score saved to {output_path}")
# end transpose_to_c_major_a_minor_single

In [15]:
# Example usage
input_root = '/media/maindisk/data/hooktheory_midi_hr/key_train'
output_root = '/media/maindisk/data/hooktheory_midi_hr/CA_train'
transpose_to_c_major_a_minor_bulk(input_root, output_root)

Processing files: 100%|██████████| 16890/16890 [24:06<00:00, 11.67file/s]


In [16]:
input_root = '/media/maindisk/data/hooktheory_midi_hr/key_test'
output_root = '/media/maindisk/data/hooktheory_midi_hr/CA_test'
transpose_to_c_major_a_minor_bulk(input_root, output_root)

Processing files:   0%|          | 0/889 [00:00<?, ?file/s]

Processing files: 100%|██████████| 889/889 [01:15<00:00, 11.70file/s]
