In [122]:
import csv
import pandas as pd
import pickle
import soundfile as sf
import librosa
import numpy as np

THRESHOLD = 7.5
SAMPLE_LENGTH = 1000

STRIDE_SIZE = 10.
FRAME_SIZE = 25.
N_MFCC = 16

from pathlib import Path
home = str(Path.home())
SONG_DIR = home + "/Downloads/songdata_90/songdata/"

def get_mfcc_features(song_data, sample_rate, stride_size = STRIDE_SIZE, frame_size = FRAME_SIZE):
    mfccs = librosa.feature.mfcc(song_data, sample_rate, 
                                 n_mfcc=N_MFCC,
                                 hop_length=int(STRIDE_SIZE / 1000. * sample_rate), 
                                 n_fft=int(FRAME_SIZE / 1000. * sample_rate))
    return mfccs

def load_song(song_file):
    data, samplerate = sf.read(song_file)
    data = data[:, 0]
    mfccs = get_mfcc_features(data, samplerate)
    mfccs = np.asarray(mfccs).T
    return mfccs

def peaks_to_windows_flat(song_name, good_peaks, bad_peaks):
    mfccs = load_song(song_name)
    good_samples = []
    bad_samples = []
    for p in good_peaks:
        n = int((p[0] - (FRAME_SIZE/1000.)) / (STRIDE_SIZE/1000.))
        features = mfccs[n-(SAMPLE_LENGTH//2):n+(SAMPLE_LENGTH//2), :]
        good_samples.append(features)
    for p in bad_peaks:
        n = int((p[0] - (FRAME_SIZE/1000.)) / (STRIDE_SIZE/1000.))
        features = mfccs[n-(SAMPLE_LENGTH//2):n+(SAMPLE_LENGTH//2), :]
        bad_samples.append(features)
    good_samples = np.concatenate(good_samples)
    bad_samples = np.concatenate(bad_samples)
    return good_samples, bad_samples

def peaks_to_windows_mat(song_name, good_peaks, bad_peaks):
    mfccs = load_song(song_name)
    good_samples = []
    bad_samples = []
    for p in good_peaks:
        n = int((p[0] - (FRAME_SIZE/1000.)) / (STRIDE_SIZE/1000.))
        features = mfccs[n-(SAMPLE_LENGTH//2):n+(SAMPLE_LENGTH//2), :]
        good_samples.append(features)
    for p in bad_peaks:
        n = int((p[0] - (FRAME_SIZE/1000.)) / (STRIDE_SIZE/1000.))
        features = mfccs[n-(SAMPLE_LENGTH//2):n+(SAMPLE_LENGTH//2), :]
        bad_samples.append(features)
    good_samples = np.asarray(good_samples)
    bad_samples = np.asarray(bad_samples)
    return good_samples, bad_samples
        
    
    

In [123]:


results_file = open('resultsFile', 'rb')      
results = pickle.load(results_file) 
list.sort(results, key = lambda x : x[0])

df = pd.read_csv("songs_fixed.csv")

detection_count = 0
drop_count = 0
bad_samples_overall = []
good_samples_overall = []

for i in range(len(results)):
    drops = str(df.iloc[[i]]["Drops"][i]).split(", ")
    peaks = results[i][1]
    good_peaks = set()
    temp_detection_count = 0
    songname = df.iloc[[i]]["Song Name"][i]
    drops_found = []
    for d in drops:
        for p in peaks:
            dval = float(d)
            pval = float(p[0])
            if pval >= dval - THRESHOLD and pval <= dval + THRESHOLD:
                temp_detection_count += 1
                drops_found.append(d)
                good_peaks.add(p)
                break
    if temp_detection_count < len(drops):
        print(songname)
        print(drops_found)
    bad_peaks = set(peaks) - good_peaks
    drop_count += len(drops)
    detection_count += temp_detection_count
    
    good_samples, bad_samples = peaks_to_windows_mat(SONG_DIR + "/" + results[i][0], good_peaks, bad_peaks)
    for b in bad_samples:
        bad_samples_overall.append(b)
        
    for g in good_samples:
        good_samples_overall.append(g)

print(detection_count / drop_count)

Ariana_Grande___Side_To_Side_(TRU_Concept_Remix_ft._Romany)
['170.1']
0.9951690821256038


In [124]:
# from hmmlearn import hmm

# TEST_SIZE = 40

# model = hmm.GaussianHMM(n_components=5)
# lengths = [SAMPLE_LENGTH] * (int((np.shape(good_samples_overall)[0]) / SAMPLE_LENGTH) - TEST_SIZE)
# train_sample_count = int(np.shape(good_samples_overall)[0] - TEST_SIZE * SAMPLE_LENGTH)
# print(np.shape(lengths), train_sample_count)
# model = model.fit(np.asarray(good_samples_overall)[: train_sample_count], lengths)

In [125]:
# score = model.score(np.asarray(good_samples_overall)[: train_sample_count], lengths)
# print(score)

In [None]:
from sklearn.neural_network import MLPClassifier

TEST_SIZE = 250

def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

good_flat = []
for g in good_samples_overall:
    good_flat.append(g.flatten())
bad_flat = []
for b in bad_samples_overall:
    bad_flat.append(b.flatten())

X = np.concatenate([good_flat, bad_flat])
Y = np.concatenate([[1] * np.shape(good_samples_overall)[0], [0] * np.shape(bad_samples_overall)[0]])

X, Y = unison_shuffled_copies(X, Y)
print(np.shape(X), np.shape(Y))
print(Y_train)

X_test = X[:TEST_SIZE]
Y_test = Y[:TEST_SIZE]

X_train = X[TEST_SIZE:]
Y_train = Y[TEST_SIZE:]

clf = MLPClassifier(solver='lbfgs', alpha=1e-3, hidden_layer_sizes=(100, 10), random_state=30)
clf.fit(X_train, Y_train)

Y_pred = clf.predict(X_test)
print(Y_pred)
print("Done!")



(1252, 16000) (1252,)
[0 0 0 ... 0 0 0]


In [179]:
import math

Y_prob = clf.predict_proba(X_test)

diffs = [x == y for x, y in zip(Y_pred, Y_test)]
print("Accuracy:", diffs.count(True) / len(diffs))
correct_probs = []
incorrect_probsFP = []
incorrect_probsFN = []
for i in range(len(Y_pred)):
    if Y_pred[i] != Y_test[i] and Y_pred[i] == 1:
        incorrect_probsFP.append(abs(Y_prob[i][0] - Y_prob[i][1]))
    elif Y_pred[i] != Y_test[i] and Y_pred[i] == 0:
        incorrect_probsFN.append(abs(Y_prob[i][0] - Y_prob[i][1]))
    else:
        correct_probs.append(abs(Y_prob[i][0] - Y_prob[i][1]))

# print(incorrect_probsFP)
# print(incorrect_probsFN)
# print(correct_probs)
    
recall = [x==1 and y==1 for x, y in zip(Y_pred, Y_test)].count(True) / list(Y_test).count(1)
print("Recall:", recall)

precision = [x==1 and y==1 for x, y in zip(Y_pred, Y_test)].count(True) / list(Y_pred).count(1)
print("Precision:", precision)
print("F1:", 2 * recall * precision / (recall + precision))

Accuracy: 0.9
[0.9999997450987452, 0.999999996150942, 0.9999998007390889, 0.9635499074741063, 0.9982704012970451, 0.9999753079392835, 0.9987857278387566, 0.9999999996412807, 0.6185091059620091]
[0.14424246663449436, 0.7173303626650287, 0.9931944959027834, 0.999999999999432, 0.999989644549524, 0.9855493085484708, 1.0, 0.14424246663449436, 0.9999996796938744, 0.14424246663449436, 1.0, 0.5707658868538363, 0.9999999910518298, 0.9999995424899009, 0.9791799864819946, 0.14424246663449436]
[0.9999999999944267, 0.999999998296671, 0.999999999999361, 0.9999999999999885, 1.0, 1.0, 0.9999934830562238, 0.9999999999943379, 0.9999999913891058, 0.9999999893441509, 0.999999998439195, 1.0, 1.0, 0.9999999421691932, 1.0, 0.999999898562798, 1.0, 1.0, 1.0, 1.0, 0.9999996293112836, 0.9999999999999973, 0.9999650906566802, 1.0, 1.0, 1.0, 0.5400345503912796, 0.9999999999995102, 0.9999999999999865, 0.9999997664957099, 0.999999999999218, 0.9999999999999896, 0.9999516072977974, 0.999999999999984, 0.9999999614244501