In [23]:
import librosa
import madmom
from madmom.features.beats import RNNBeatProcessor, MultiModelSelectionProcessor
from scipy import signal
import numpy as np
import numpy as np2

In [24]:
def peak_picking(beat_times, total_samples, kernel_size, offset):

    # smoothing the beat function
    cut_off_norm = len(beat_times)/total_samples*100/2
    b, a = signal.butter(1, cut_off_norm)
    beat_times = signal.filtfilt(b, a, beat_times)

    # creating a list of samples for the rnn beats
    beat_samples = np2.linspace(0, total_samples, len(beat_times), endpoint=True, dtype=int)

    n_t_medians = signal.medfilt(beat_times, kernel_size=kernel_size)
    offset = 0.01
    peaks = []

    for i in range(len(beat_times)-1):
        if beat_times[i] > 0:
            if beat_times[i] > beat_times[i-1]:
                if beat_times[i] > beat_times[i+1]:
                    if beat_times[i] > (n_t_medians[i] + offset):
                        peaks.append(int(beat_samples[i]))
    return peaks

In [25]:
def analyze(data_audio, sample_rate):

    data_result = {}

    # sample rate
    data_result['sample_rate'] = sample_rate

    # getting duration in seconds
    data_result['duration'] = librosa.get_duration(y=data_audio, sr=sample_rate)


    rnn_processor = RNNBeatProcessor(post_processor=None)
    predictions = rnn_processor(data_audio)
    mm_processor = MultiModelSelectionProcessor(num_ref_predictions=None)
    beats = mm_processor(predictions)

    data_result['beat_samples'] = peak_picking(beats, len(data_audio), 5, 0.01)

    if len(data_result['beat_samples']) < 3:
        data_result['beat_samples'] = peak_picking(beats, len(data_audio), 25, 0.01)

    if data_result['beat_samples'] == []:
        data_result['beat_samples'] = [0]

    data_result['number_of_beats'] = len(data_result['beat_samples'])

    return data_result

In [26]:
def preprocessing_audio_in(rate, data, fft_size = 16384):
    """
    Convert the input audio sampled at the input rate
    to a list of HPCP vectors computed using the input fft_size
    (effectively outputing int(len(data)/fft_size)) HPCP vectors
    """
    output_samples = []
    for i in range(int(len(data)/fft_size)):
        ###Computing the DFT by taking a fragment of the audio 
        dft = np.fft.fft(data[fft_size*i:fft_size*(i+1)])
        ### Computiong the Harmonic pitch class profile
        HPCP = []
        f_ref = 130.80
        M = [round(12*np.log2(rate*l/(fft_size*f_ref))) %12 if l > 0 else -1 for l in range(int(fft_size/2))]
        M = np.array(M)
        for p in range(12):
            val = np.sum((np.absolute(dft[:int(fft_size/2)])**2)* (M == p).astype(int) )
            HPCP.append(val)
        HPCP = [x/sum(HPCP) for x in HPCP]
        output_sample = HPCP
        output_samples.append(output_sample)
     
    return output_samples

In [27]:
'''def analyze_file(path):
    data_audio, sample_rate = librosa.load(path, sr=44100)
    return analyze(data_audio, sample_rate)'''

'def analyze_file(path):\n    data_audio, sample_rate = librosa.load(path, sr=44100)\n    return analyze(data_audio, sample_rate)'

In [28]:
import tensorflow as tf

In [29]:
new_model = tf.keras.models.load_model('DLCords1.h5')
new_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 35)                455       
                                                                 
 dense_4 (Dense)             (None, 35)                1260      
                                                                 
 dense_5 (Dense)             (None, 10)                360       
                                                                 
Total params: 2,075
Trainable params: 2,075
Non-trainable params: 0
_________________________________________________________________


In [30]:
def convert_to_wav_and_filter_audio(path):
    split_cmd = "spleeter separate -p spleeter:2stems -o separate/ "+path+""
    #print(split_cmd)
    import os;os.system(split_cmd)

#convert_to_wav_and_filter_audio("quien-de-los-dos-sera.mp3")

In [31]:
def predict_chords(path_of_chords):
    data_audio, sample_rate = librosa.load(path_of_chords, sr=44100)
    samples_for_beat = analyze(data_audio, sample_rate)['beat_samples']
    seconds = []
    secondFinalOfChord = int()
    data_divide_in_chords = []
    
    for i in range(len(samples_for_beat)):
        if(i== len(samples_for_beat)-1):
            data_divide_in_chords.append(data_audio[samples_for_beat[i]:len(data_audio)])
        else:
            data_divide_in_chords.append(data_audio[samples_for_beat[i]:samples_for_beat[i+1]])
            
    pitchs_of_all_data = []
          
    for i in data_divide_in_chords:
        secondFinalOfChord = secondFinalOfChord  + int(len(i))
        seconds.append(secondFinalOfChord/44100)        
        pitch_s = preprocessing_audio_in(rate=sample_rate, data=i)
        if(len(pitch_s)>0):
            pitchs_of_all_data.append(pitch_s[0])#Solo enviamos el primer pitch
        
    #a1_data, a1_rate = librosa.load(path_of_chords='C:\\Users\\UPIIZ 35\\Documents\\DLChordsNotebook\\data\\Guitar_Only\\a\\a1.wav', sr=44100)
    #pitch_clean = preprocessing_audio_in(a1_rate, a1_data)
    #print(pitchs_of_all_data)

    prediction = new_model.predict(pitchs_of_all_data)
    prediction = np.argmax(prediction, axis=1).astype(int)
    
    labels_dict_reverse = {
    0:'a',
    1:'am',
    2:'bm',
    3:'c',
    4:'d',
    5:'dm',
    6:'e',
    7:'em',
    8:'f',
    9:'g'
    }

    prediction_in_string = []
    for i in prediction:   
        prediction_in_string.append(labels_dict_reverse[i])   
    
    #print(">>> ", seconds)
    #print(prediction, prediction_in_string)


    class Chord:
        def __init__(self, chord_result, time_init, time_final):
            self.chord_result = chord_result
            self.time_init = time_init
            self.time_final = time_final
    
    chords_objects = []

    for i in range(len(prediction_in_string)):
        if(i==0):
            chords_objects.append(Chord(str(prediction_in_string[i]), 0.00, round(seconds[0], 2)))
        else:
            chords_objects.append(Chord(str(prediction_in_string[i]), round(seconds[i-1], 2), round(seconds[i],2)))
           
    return chords_objects

In [32]:
def predict_song(path):
    print("Convirtiendo y filtrando audio...")
    convert_to_wav_and_filter_audio(path)#convertimos audio y filtramos

    path_whitout_extension = path.rsplit('.', 1)[0]
    #print(path_whitout_extension)
    path_of_chords = "separate\\" + path_whitout_extension + "\\accompaniment.wav"#ruta de archivo con extension .wav, solo con la melodia, sin voz.
    #print(path_of_chords)

    print("Prediccion de acordes en curso...")
    return predict_chords(path_of_chords)#Predecir acordes


In [33]:

chords_objects = predict_song("amarte-a-la-antigua.wav")
for chord_object in chords_objects:
    if(len(chord_object.chord_result)==2):
        print("CHORDISTO>>", chord_object.chord_result,"    TIEMPOS>> ", chord_object.time_init, "   -   ", chord_object.time_final)
    else:
        print("CHORDISTO>>", chord_object.chord_result,"     TIEMPOS>> ", chord_object.time_init, "   -   ", chord_object.time_final)

Convirtiendo y filtrando audio...
Prediccion de acordes en curso...


  HPCP = [x/sum(HPCP) for x in HPCP]


CHORDISTO>> am     TIEMPOS>>  0.0    -    0.49
CHORDISTO>> f      TIEMPOS>>  0.49    -    1.0
CHORDISTO>> dm     TIEMPOS>>  1.0    -    1.5
CHORDISTO>> f      TIEMPOS>>  1.5    -    1.9
CHORDISTO>> g      TIEMPOS>>  1.9    -    2.37
CHORDISTO>> em     TIEMPOS>>  2.37    -    2.85
CHORDISTO>> dm     TIEMPOS>>  2.85    -    3.17
CHORDISTO>> a      TIEMPOS>>  3.17    -    3.81
CHORDISTO>> f      TIEMPOS>>  3.81    -    4.46
CHORDISTO>> f      TIEMPOS>>  4.46    -    4.94
CHORDISTO>> em     TIEMPOS>>  4.94    -    5.41
CHORDISTO>> em     TIEMPOS>>  5.41    -    5.75
CHORDISTO>> g      TIEMPOS>>  5.75    -    7.04
CHORDISTO>> g      TIEMPOS>>  7.04    -    8.95
CHORDISTO>> g      TIEMPOS>>  8.95    -    9.95
CHORDISTO>> d      TIEMPOS>>  9.95    -    10.41
CHORDISTO>> d      TIEMPOS>>  10.41    -    10.86
CHORDISTO>> em     TIEMPOS>>  10.86    -    11.8
CHORDISTO>> em     TIEMPOS>>  11.8    -    12.72
CHORDISTO>> c      TIEMPOS>>  12.72    -    13.69
CHORDISTO>> c      TIEMPOS>>  13.69    -