In [1]:
# Same header that we use in colab to set up the environment
import os
import random
import csv
from miditok import Octuple
from miditok.data_augmentation import augment_midi_dataset
from pathlib import Path
import subprocess

try:
  import masters_environment
except:
  print("No local environment loaded")

No local environment loaded


In [2]:
import pandas as pd

In [3]:
def midi_to_abc(midi_file_path, abc_file_path):
    try:
        subprocess.run(["midi2abc", midi_file_path, "-o", abc_file_path], check=True)
        # print(f"Successfully converted {midi_file_path} to {abc_file_path}")
    except subprocess.CalledProcessError as e:
        print(f"Error occurred during conversion: {e}")

In [4]:
def convert_midi_to_abc(source_folder, target_folder):
    """
    Recursively convert all MIDI files found in source_folder and its subfolders
    to ABC format, ensuring that musical content is correctly written.
    """
    source_path = Path(source_folder)
    target_path = Path(target_folder)
    
    # Ensure the target folder exists
    target_path.mkdir(parents=True, exist_ok=True)
    
    # Walk through the source folder and find MIDI files
    for midi_path in source_path.rglob('*.mid'):
        # Construct the relative path for the target file
        relative_path = midi_path.relative_to(source_path)
        abc_filename = relative_path.with_suffix('.abc')
        full_target_path = target_path / abc_filename
        
        # Ensure the target directory exists
        full_target_path.parent.mkdir(parents=True, exist_ok=True)
        
        midi_to_abc(midi_path, full_target_path)
        #print(f"Converted and saved: {full_target_path}")

In [5]:
scarlatti_midi_root = '../MusicLLM/data/Scarlatti'
scarlatti_midi_transposed_root = '../MusicLLM/data_transposed/Scarlatti'
scarlatti_abc_root = '../MusicLLM/data_abc/Scarlatti'

In [None]:
#tokenizer = REMI()  # using defaults parameters (constants.py)
tokenizer = Octuple()
data_path = Path(scarlatti_midi_root)

In [None]:
data_path

In [None]:
# Try using mido
from mido import MidiFile, MidiTrack, Message

def transpose_midi(input_file, semitones):
    midi = MidiFile(input_file)
    transposed = MidiFile()
    for track in midi.tracks:
        transposed_track = MidiTrack()
        for msg in track:
            if not msg.is_meta and msg.type == 'note_on':
                new_note = msg.note + semitones
                # Ensure that the new note is within the MIDI range
                if new_note < 0 or new_note > 127:
                    continue
                new_msg = msg.copy(note=new_note)
                transposed_track.append(new_msg)
            else:
                transposed_track.append(msg)
        transposed.tracks.append(transposed_track)
    return transposed

def transpose_directory(input_dir, output_dir):
    """ Transpose all MIDI files in a directory to all other 11 keys and save them. """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    for filename in os.listdir(input_dir):
        if filename.endswith('.mid') or filename.endswith('.midi'):
            original_file = os.path.join(input_dir, filename)
            for i in range(-5, 6):  # Transpose to all keys except the original
                transposed_midi = transpose_midi(original_file, i)
                transposed_filename = f"{os.path.splitext(filename)[0]}#p{i}.mid"
                transposed_file_path = os.path.join(output_dir, transposed_filename)
                transposed_midi.save(transposed_file_path)
                #print(f"Saved {transposed_file_path}")

transpose_directory(scarlatti_midi_root, scarlatti_midi_transposed_root)

In [None]:
# Performs data augmentation on pitch, velocities and durations; save MIDI files
'''
midi_aug_path = Path(scarlatti_midi_transposed_root)

augment_midi_dataset(
    data_path,
    pitch_offsets=[ -5, -4, -3, -2, -1, 1, 2, 3, 4, 5, 6],
    velocity_offsets=[],
    duration_offsets=[],
    out_path=midi_aug_path,
    copy_original_in_new_location=True,
    save_data_aug_report=True
)
'''

In [None]:
convert_midi_to_abc(scarlatti_midi_transposed_root, scarlatti_abc_root)

In [33]:
def get_files_non(directory):
    files = [file for file in os.listdir(directory) if "#p0" in file]
    return files

def get_all_files(directory):
    files = [file for file in os.listdir(directory)]
    return files

def get_abc_data(file_path):
    '''
    with open(file_path, 'r') as file:
        text = file.read()
    
    # Remove line endings that match "\n"
    modified_text = text.replace("\\\n", "")
    '''
    with open(file_path, 'r') as file:
        text = []  # Initialize an empty list to store lines that do not start with '%'
        for line in file:
            if not line.strip().startswith('%'):
                text.append(line.strip())
    text = "\n".join(text)  # Join all lines into a single string, separated by newlines
    modified_text = text.replace("\\\n", "")
    return modified_text

def ff(file_path):
    text = []
    with open(file_path, 'r') as file:
        for line in file:
            if not line.strip().startswith('%'):
                text.append(line.strip())  # Add the line to the list if it doesn't start with '%'
    text = "\n".join(text)  # Join all lines into a single string, separated by newlines
    return text

def get_random_query():
    queries = ["Craft a musical arrangement echoing the elegance of Scarlatti.",
    "Develop a piece capturing the essence of Scarlatti's musical language.",
    "Construct a composition reminiscent of Scarlatti's classical style.",
    "Formulate a musical creation inspired by the genius of Scarlatti.",
    "Fashion a piece evoking the spirit of Scarlatti's compositions.",
    "Produce a composition in homage to Scarlatti's timeless style.",
    "Invent a musical work reflecting Scarlatti's signature elegance.",
    "Design a composition modeled after the tradition of Scarlatti.",
    "Conceive a piece that pays tribute to Scarlatti's musical legacy.",
    "Shape a composition in the vein of Scarlatti's classical masterpieces.",
    "Devise a musical arrangement embodying Scarlattiian grace and charm.",
    "Create a piece inspired by the classical structures favored by Scarlatti.",
    "Construct a composition resonating with the melodic brilliance of Scarlatti.",
    "Develop a musical work in the esteemed tradition of Scarlatti's compositions.",
    "Formulate a composition infused with the harmonic richness of Scarlatti's style.",
    "Craft a piece that captures the sophistication and refinement of Scarlatti's music.",
    "Produce a composition that echoes the grace and poise characteristic of Scarlatti.",
    "Design a musical arrangement paying homage to Scarlatti's classical genius.",
    "Invent a piece inspired by the timeless melodies of Scarlatti.",
    "Shape a composition that reflects the enduring legacy of Scarlatti's musical craftsmanship."]

    q = 'Human: ' + random.choice(queries) + '</s>'
    
    return q

In [34]:
print(get_random_query())

Human: Fashion a piece evoking the spirit of Scarlatti's compositions.</s>


In [40]:
scarlatti_files = get_files_non(scarlatti_abc_root)
#scarlatti_files = get_all_files(scarlatti_abc_root)

In [41]:
has_title = 0
no_title = 0
missing = []
column_names = ['instruction', 'input', 'output', 'src']

empty_df = pd.DataFrame(columns=column_names)
empty_df

Unnamed: 0,instruction,input,output,src


In [42]:
created = 0

for n in scarlatti_files:
    # Just filename
    title = ''
    k = n.split('.')[0]

    data = get_abc_data(os.path.join('../MusicLLM/data_abc/Scarlatti', n))
                 
    # Need: instruction, input,       output,            src
    #       Human:       tite: </s>  Assistant: </s>
    if len(data.strip()) > 0:
        new_instruction = get_random_query()
        new_input = ''
        new_output = "Assistant: " + data  + "</s>"
        new_src = "https://www.kunstderfuge.com/scarlatti.htm"
    
        data_to_append = pd.DataFrame([[new_instruction, new_input, new_output, new_src]], columns=column_names)
        empty_df = pd.concat([empty_df, data_to_append], ignore_index=True)
        created += 1
    else:
        print("Skipping file: ", n)

print("Match: ", has_title, " no match: ", no_title)
print("Files created: ", created)

Match:  0  no match:  0
Files created:  555


In [43]:
empty_df.iloc[0]

instruction    Human: Design a musical arrangement paying hom...
input                                                           
output         Assistant: X: 1\nT: from ../MusicLLM/data_tran...
src                   https://www.kunstderfuge.com/scarlatti.htm
Name: 0, dtype: object

In [44]:
#empty_df.to_csv('scarlatti_source_transposed.csv', mode='w', quoting=csv.QUOTE_ALL, index=False)
empty_df.to_csv('scarlatti_source.csv', quoting=csv.QUOTE_ALL, index=False) 

In [None]:
!pwd
