In [11]:
import numpy as np
import pandas as pd
import os
import librosa
from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [12]:
filelist = []

# directory =  r"G:\ML_Datasets\IS460_proj\maestro-v3.0.0"  #Set your directory here, add r in fron if need to convert to raw string
directory =  r"/mnt/g/ML_Datasets/IS460_proj/maestro-v3.0.0"
# directory = r"G:\ML_Datasets\IS460_proj\maps\ENSTDkCl"
#directory = #r"G:\ML_Datasets\IS460_proj\maps\AkPnBcht"
#directory = r"G:\ML_Datasets\IS460_proj\maps\AkPnBcht\AkPnBcht\ISOL"

osdirectory = fr"{directory}"

for root, dirs, files in os.walk(os.path.abspath(osdirectory)):
    for file in files:
        if file[-4:] != "midi":   #Skipping any .txt files, as we only wish to work with MIDI and wav
            filelist.append((os.path.join(root, file), file, root))      


# filelist = filelist[1276:1914]

In [14]:
filelist[1]

('/mnt/g/ML_Datasets/IS460_proj/maestro-v3.0.0/2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.txt',
 'MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.txt',
 '/mnt/g/ML_Datasets/IS460_proj/maestro-v3.0.0/2004')

In [7]:
filelist[132*2][0]

'G:\\ML_Datasets\\IS460_proj\\maestro-v3.0.0\\2013\\ORIG-MIDI_02_7_7_13_Group__MID--AUDIO_19_R1_2013_wav--1.txt'

In [8]:
txtfile = open(filelist[132*2+1][0], "r")

In [9]:
# Params
hop_len = 512
# n_bins_in = 252
# bins_octaves_in = 36
# win_step = 0.01
no_notes = 88
# num_cep_def = 40
# num_filt_def = 40
# length_per_file = 4000000
data_frame_size = 43

In [10]:
df = pd.DataFrame(columns = ["File Name",     #Create column headers for PD df
                             "Subset Number",
                             "Sampling Rate", 
                             "Class",
                             "Midi Labels",
                             "Wav Array", 
                             "Status"])


for file in tqdm(range(0,int(len(filelist)), 2)):
    # print(file)
    try: 
        # print(file+1)
        txtfile_path = filelist[file][0]
        wavfile_path = filelist[file+1][0]
        
        # print(txtfile_path, wavfile_path)
        wavsig, wavrate = librosa.load(path = wavfile_path, mono=True,  dtype=np.float32)
        # print("wavsig, wavrate")
        melspecgram = librosa.feature.melspectrogram(y=wavsig, sr= wavrate, n_mels = 128, hop_length = hop_len, n_fft = 2048)
        # print(melspecgram)
        melspecgram = ((librosa.power_to_db(melspecgram, ref=np.max)+80)/80).transpose()
        # print("melspecgram")
        

        win_len = hop_len/float(wavrate) # hop_len = 512 default, hop_len, win_length is how much time is inbetween each frame

        no_frames = melspecgram.shape[0] # 1 frames is 1 timestep in the melspectrogram
        labels = np.zeros((no_frames, no_notes)) # empty array to be filled in later
        timing_arr = np.arange(1, no_frames + 1) * win_len # creates a 1D array where each element is (i+1)*win_length

        # print(txtfile_path)
        txtfile = open(txtfile_path, "r") # open txt file to check for onset and offset of keys
        for line in txtfile:
            line_split = line.strip().split() # line is split, if it has timing, it will be split into (onsettime, offsettime, key)
            if line_split == []: # condition that helps to skip blank lines
                continue
                
            if line_split[0] != "OnsetTime":
                # print(txtfile_path)
                # print("time")
                start_time = float(line_split[0])
                end_time = float(line_split[1])
                key_no = int(line_split[2]) - 21
                index_min = np.where(timing_arr >= start_time)[0] # finds the indexes of the timing array where the element is more than the stated start time
                index_max = np.where(timing_arr > end_time - 0.01)[0] # finds the indexes of the timing array where the element is more than the stated end time
                # print(type(index_min), type(index_max))
                if index_min.size > 0 and index_max.size > 0:
                    labels[index_min[0]:index_max[0], key_no] = 1 # finds the indexes where the key was pressed by finding the minimum start time and the minimum end time
                else:
                    print(f"IndexError: Skipping iteration for {txtfile_path, start_time, end_time}")

        full_frames = no_frames//data_frame_size # splits the melspectrogram into chunks of frames where each chunk is of length data_frame_size
        for idx in range(0, full_frames): 
            start_idx = idx
            end_idx = idx + data_frame_size
            temp_frame = melspecgram[start_idx:end_idx, :] # indexes the current frames
            temp_labels = labels[start_idx:end_idx, :] # indexes the current labels
            temp_labels = np.where(np.sum(temp_labels, axis=0) > 0, 1, 0) # condenses labels into (1, 88) array
            # print("flag 7")
            newrow = {
                    "File Name" : filelist[file][1][:-4], 
                    "Subset Number" : int(idx),
                    "Sampling Rate": wavrate, 
                    "Class": filelist[file][1][:-4].split("_")[1], 
                    "Midi Labels": temp_labels,
                    "Wav Array" : temp_frame,
                    "Status" : True
                    }
            df.loc[len(df)] = newrow # adds new row to df

        start_idx = full_frames*data_frame_size #here we pad the leftovers and do the same
        end_idx = no_frames
        if start_idx != no_frames:
            pad_size = data_frame_size - (end_idx - start_idx)
            pad_wav = np.zeros((pad_size, 128))
            pad_labels = np.zeros((pad_size, 88))

            temp_frame = np.vstack((melspecgram[start_idx:end_idx, :], pad_wav))
            temp_labels = np.vstack((labels[start_idx:end_idx, :], pad_labels))
            temp_labels = np.where(np.sum(temp_labels, axis=0) > 0, 1, 0) # added to combine labels into 1 array
            
            newrow = {
                    "File Name" : filelist[file][1][:-4], 
                    "Subset Number" : int(idx+1),
                    "Sampling Rate": wavrate, 
                    "Class": filelist[file][1][:-4].split("_")[1], 
                    "Midi Labels": temp_labels,
                    "Wav Array" : temp_frame,
                    "Status" : True
                    }
            df.loc[len(df)] = newrow
            

    except FileNotFoundError as fnf_error:
        print(f"FileNotFoundError processing {filelist[file][0]}: {fnf_error}")
        newrow = {
                "File Name" : filelist[file][1][:-4], 
                            "Subset Number" : int(idx/data_frame_size),
                            "Sampling Rate": wavrate, 
                            "Class": filelist[file][1][:-4].split("_")[1], 
                            "Midi Labels": np.nan,
                            "Wav Array" : np.nan,
                            "Status" : False
                            }
        
        df.loc[len(df)] = newrow

print("flag 10")

100%|██████████| 319/319 [44:25<00:00,  8.36s/it]  

flag 10





In [11]:
df['Wav Array'][0].shape

(43, 128)

In [12]:
# df.to_pickle("Vers6_ISOL_0_736_ENSTDKAM.pkl")
df.to_pickle("Maestro_1276_1914_43frames_to1.pkl")

In [52]:
path = r"C:\Users\melov\OneDrive - Singapore Management University\SMU General\Y4S2\IS460\project\dataprocessing"
source1 = "_ENSTDkAm"
df = pd.read_pickle(path + r"\Vers6_ENSTDKAM_2205frames_to1.pkl")

In [53]:
def split_songs(df, source, test_size, seed):
    """Splits files so that the classes are balanced"""
    filename_df = df.groupby(['File Name'])[['Class']].max().reset_index()
    filename_df['File Name'] = filename_df['File Name'].apply(lambda x: "_".join(x.split("_")[:-1])).unique()
    filename_df['Class'] = filename_df['Class'].apply(lambda x: x.split("-")[0])
    X_train, X_test, y_train, y_test = train_test_split(filename_df[['File Name']], filename_df['Class'], stratify=filename_df['Class'], test_size=test_size, random_state=seed)
    
    source_trainfiles = list(map(lambda x: x+source, list(X_train['File Name'])))
    source_testfiles = list(map(lambda x: x+source, list(X_test['File Name'])))

    train = df[df['File Name'].isin(source_trainfiles)]
    test = df[df['File Name'].isin(source_testfiles)]

    print(df['File Name'].unique().shape, train['File Name'].unique().shape, test['File Name'].unique().shape)
    return train, test

In [54]:
train, test = split_songs(df=df, source=source1, test_size=0.2, seed= 1)
train.to_pickle("ENSTDKAM_ver6_2205frame_to1_train.pkl")
test.to_pickle("ENSTDKAM_ver6_2205frame_to1_test.pkl")

(3735,) (2988,) (747,)
