In [1]:
import glob
import os
import librosa
import numpy as np
import pretty_midi

In [5]:
import numpy as np
data = np.load('/Users/ronit/Documents/AIML_Project/DataSet_P/Valid/X_final_CQT_.npy')
data.shape

(1998, 38, 1, 252)

In [6]:
import numpy as np
data = np.load('/Users/ronit/Documents/AIML_Project/DataSet_P/Valid/Y_final_Label_.npy')
data.shape

(1998, 38, 1, 88)

In [7]:
import numpy as np
data = np.load('/Users/ronit/Documents/AIML_Project/DataSet_P/Test/X_final_CQT_.npy')
data.shape

(15219, 38, 1, 252)

In [8]:
import numpy as np
data = np.load('/Users/ronit/Documents/AIML_Project/DataSet_P/Test/Y_final_Label_.npy')
data.shape

(15219, 38, 1, 88)

In [3]:
# Constants

MIDInotes_Range = [librosa.note_to_midi('A0'), librosa.note_to_midi('C8')]
Sample_Rate = 16000
Bins_per_octave = 36
n_octaves = 7
Hop_length = 512
spec_fmin = librosa.note_to_hz('A0')
val_rate = 1 / 7
n_Bins = n_octaves * Bins_per_octave
win_width = 32
kernel_size = 7
overlap = True

'''''''''
Allow pretty_midi to read MIDI files with absurdly high tick rates.
Useful for reading the MAPS dataset.
https://github.com/craffel/pretty-midi/issues/112
'''''''''

pretty_midi.pretty_midi.MAX_TICK = 1e10

In [4]:
def MIDI_to_mat(MIDI_path_train, length, len_of_CQT, Range_MIDI=MIDInotes_Range):
    MIDI_data = pretty_midi.PrettyMIDI(MIDI_path_train)
    pianoRoll = MIDI_data.instruments[0].get_piano_roll(fs=len_of_CQT * Sample_Rate / length)
    Ground_truth_Mat = (pianoRoll[Range_MIDI[0]:Range_MIDI[1] + 1, :len_of_CQT] > 0)
    return Ground_truth_Mat

In [18]:
# dataset = ['Testing']
dataset = ['Test']

for data in dataset:
    j, k = 0, 0
    
    S_Path = os.path.join('/Users/ronit/Documents/AIML_Project/DataSet', data)
    
#     S_Path = os.path.join('/Users/ronit/Documents/AIML_Project/TRY', data)
#     D_path = os.path.join('/Users/ronit/Documents/AIML_Project/TRY', data)

#     if not os.path.exists(D_path):
#         os.makedirs(D_path)

    print(S_Path)

    WAVS = [file[:-4] for file in os.listdir(S_Path) if file.endswith('.wav')]
    MIDI = [file[:-4] for file in os.listdir(S_Path) if file.endswith('.mid')]

    pairs = []
    for file in WAVS:
        assert file in MIDI, 'Both the MIDI and WAV files not found'
        pairs.append((file + '.wav', file + '.mid'))

    for wav, midi in pairs:
        wav_path = os.path.join(S_Path, wav)
        x, sr = librosa.load(wav_path, sr=Sample_Rate)

        # Load the audio from specific path and compute Constant-Q Transform.
        CQT_file = librosa.cqt(x,
                               sr=Sample_Rate,
                               fmin=spec_fmin,
                               hop_length=Hop_length,
                               n_bins=n_Bins,
                               bins_per_octave=Bins_per_octave,
                               scale=False
                               )
        # Taking abs of Constant-Q Transform of given audio as 2D NumPy array
        CQT_abs = np.abs(CQT_file)
        # Converting the frequency data on a logarthmic scale for better visualization
        CQT = np.transpose(librosa.amplitude_to_db(CQT_abs))
        
        print("CQT.shape", CQT.shape)

        midi_path = os.path.join(S_Path, midi)
        Ground_Truth_mat = MIDI_to_mat(midi_path, len(x), CQT.shape[0])
        MIDI_Train = np.transpose(Ground_Truth_mat)
        
#         for m in MIDI_Train:
#             unique, counts = np.unique(m, return_counts=True)
#             print(dict(zip(unique, counts)))
        
        
        print(MIDI_Train.shape)
        
    
#         Length of MIDI < length of CQT, then cut CQT
        if MIDI_Train.shape[0] < CQT.shape[0]:
            print("YUP")
            CQT = CQT[:MIDI_Train.shape[0],:]
        print("CQT.shape", CQT.shape)
        print( MIDI_Train.shape, end='\n\n')
        
        
            

/Users/ronit/Documents/AIML_Project/DataSet/Test
CQT.shape (23424, 252)
(23376, 88)
YUP
CQT.shape (23376, 252)
(23376, 88)

CQT.shape (16379, 252)
(16331, 88)
YUP
CQT.shape (16331, 252)
(16331, 88)

CQT.shape (10920, 252)
(10873, 88)
YUP
CQT.shape (10873, 252)
(10873, 88)

CQT.shape (8148, 252)
(8100, 88)
YUP
CQT.shape (8100, 252)
(8100, 88)

CQT.shape (2453, 252)
(2406, 88)
YUP
CQT.shape (2406, 252)
(2406, 88)

CQT.shape (4270, 252)
(4223, 88)
YUP
CQT.shape (4223, 252)
(4223, 88)

CQT.shape (2670, 252)
(2623, 88)
YUP
CQT.shape (2623, 252)
(2623, 88)

CQT.shape (3122, 252)
(3075, 88)
YUP
CQT.shape (3075, 252)
(3075, 88)

CQT.shape (8357, 252)
(8310, 88)
YUP
CQT.shape (8310, 252)
(8310, 88)

CQT.shape (2154, 252)
(2107, 88)
YUP
CQT.shape (2107, 252)
(2107, 88)

CQT.shape (10267, 252)
(10220, 88)
YUP
CQT.shape (10220, 252)
(10220, 88)

CQT.shape (16385, 252)
(16338, 88)
YUP
CQT.shape (16338, 252)
(16338, 88)

CQT.shape (6051, 252)
(6004, 88)
YUP
CQT.shape (6004, 252)
(6004, 88)

CQT.shap