In [None]:
import librosa #https://librosa.org/
import librosa.display
import librosa.beat
import sounddevice as sd  #https://anaconda.org/conda-forge/python-sounddevice
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import scipy.stats as scs
import scipy.fft as scf
import scipy.signal as scsg
from sys import getsizeof as sizeof
from scipy.spatial.distance import  euclidean, cosine, cityblock
from pprint import pprint

sr = 22050
mono = True
warnings.filterwarnings("ignore")

## Semana 2
### *Exercicio 2.1*

In [None]:
feat = pd.read_csv("dataset/Features - Audio MER/top100_features.csv", index_col=0)

In [None]:
def min_max_scale(y):
    min_v = y.min()
    max_v = y.max()
    
    up = (y - min_v)
    down = (max_v - min_v)
    return  up/down if down!=0 else 0 

In [None]:
def standardization(y):
    return (y - y.mean())/y.std()

In [None]:
feat = feat.drop(columns=["Quadrant"])

In [None]:
feat = feat.apply(lambda col: min_max_scale(col), axis = 0)

In [None]:
feat = feat.to_numpy()

In [None]:
np.savetxt("dataset/Features - Audio MER/top100_features_normalized.csv", feat)

### *Exercicio 2.2*

In [None]:
m1 = os.listdir(path)[0]
m1

In [None]:
y,fs = librosa.load("dataset/MER_audio_taffc_dataset/Q1/MT0000040632.mp3", sr=sr, mono = mono)

In [None]:
mfcc = np.apply_along_axis(extract_features, 1, librosa.feature.mfcc(y=y, n_mfcc=13)).flatten()
spc_centroid= np.apply_along_axis(extract_features, 1, librosa.feature.spectral_centroid(y=y)).flatten()
spc_bdwth = np.apply_along_axis(extract_features, 1, librosa.feature.spectral_bandwidth(y=y)).flatten()
spc_contrast = np.apply_along_axis(extract_features, 1, librosa.feature.spectral_contrast(y=y)).flatten()
spc_flatness = np.apply_along_axis(extract_features, 1, librosa.feature.spectral_flatness(y=y)).flatten()
spc_rollof = np.apply_along_axis(extract_features, 1, librosa.feature.spectral_rolloff(y=y)).flatten()
f0 = np.apply_along_axis(extract_features, 0, librosa.yin(y, 20, 11025))
f0[f0==11025] = 0;
rms = np.apply_along_axis(extract_features, 1, librosa.feature.rms(y=y)).flatten()
zcr = np.apply_along_axis(extract_features, 1, librosa.feature.zero_crossing_rate(y=y)).flatten()
tempo = librosa.beat.tempo(y=y)

print(
    f"""
    mfcc = {mfcc.shape}
    centroid = {spc_centroid.shape}
    bdwth = {spc_bdwth.shape}
    contrast = {spc_contrast.shape}
    flatness = {spc_flatness.shape}
    rollof = {spc_rollof.shape}
    f0 = {f0.shape}
    rms = {rms.shape}
    zcr = {zcr.shape}
    tempo = {tempo}
    """
)

In [None]:
feature_vector = np.concatenate((mfcc,spc_centroid, spc_bdwth, spc_contrast, spc_flatness, spc_rollof,
                                    f0, rms, zcr, tempo))

In [None]:
feature_vector.shape

In [None]:
def extract_features(signal : np.array):
    mean = signal.mean()
    stdDev = signal.std()
    skewness = scs.skew(signal)
    kurtosis = scs.kurtosis(signal)
    median = np.median(signal)
    max_value = signal.max()
    min_value = signal.min()
    return (mean, stdDev, skewness, kurtosis, median, max_value, min_value)

In [None]:
path = "dataset/MER_audio_taffc_dataset/Q"
#910x190
feature_matrix = np.zeros((900, 190))
index=0
for audio in sorted(os.listdir(path)):
    print(index)
    y, fs = librosa.load(f"{path}/{audio}", sr=sr, mono = mono)
    #Spectral features extraction
    mfcc = np.apply_along_axis(extract_features, 1, librosa.feature.mfcc(y=y, n_mfcc=13)).flatten()
    spc_centroid= np.apply_along_axis(extract_features, 1, librosa.feature.spectral_centroid(y=y)).flatten()
    spc_bdwth = np.apply_along_axis(extract_features, 1, librosa.feature.spectral_bandwidth(y=y)).flatten()
    spc_contrast = np.apply_along_axis(extract_features, 1, librosa.feature.spectral_contrast(y=y)).flatten()
    spc_flatness = np.apply_along_axis(extract_features, 1, librosa.feature.spectral_flatness(y=y)).flatten()
    spc_rollof = np.apply_along_axis(extract_features, 1, librosa.feature.spectral_rolloff(y=y)).flatten()
    f0 = librosa.yin(y, 20, 11025)
    f0[f0==11025] = 0
    f0 = np.apply_along_axis(extract_features, 0, f0)
    rms = np.apply_along_axis(extract_features, 1, librosa.feature.rms(y=y)).flatten()
    zcr = np.apply_along_axis(extract_features, 1, librosa.feature.zero_crossing_rate(y=y)).flatten()
    tempo = librosa.beat.tempo(y=y)

    feature_vector = np.concatenate((mfcc,spc_centroid, spc_bdwth, spc_contrast, spc_flatness, spc_rollof,
                                    f0, rms, zcr, tempo))

    feature_matrix[index]=feature_vector;
    index+=1;

#np.apply_along_axis(librosa.util.normalize, 1, feature_matrix)
#feature_matrix.tofile("exercise2_features.csv", sep = ";")
#del feature_matrix

In [None]:
np.savetxt("dataset/Features - Audio MER/extracted_features.csv", feature_matrix)

In [None]:
feature_matrix.shape

In [None]:
feat_matrix_norm = np.apply_along_axis(min_max_scale, 0, feature_matrix)

In [None]:
np.savetxt("dataset/Features - Audio MER/extracted_features_normalized.csv", feat_matrix_norm)

### *Exercicio 2.3*
> Referências: <br>
    - https://stackoverflow.com/questions/37963042/python-librosa-what-is-the-default-frame-size-used-to-compute-the-mfcc-feature <br>
    - https://en.wikipedia.org/wiki/Mel_scale <br>
    - https://haythamfayek.com/2016/04/21/speech-processing-for-machine-learning.html <br>
    - https://gist.github.com/bmcfee/746e572232be36f3bd462749fb1796da <br>
    - https://github.com/librosa/librosa <br>
    - https://github.com/librosa/librosa/blob/main/librosa/feature/spectral.py <br>
    - https://www.researchgate.net/publication/220723537_Finding_An_Optimal_Segmentation_for_Audio_Genre_Classification#pf2 <br>
    - https://en.wikipedia.org/wiki/Octave_band <br>
    - https://en.wikipedia.org/wiki/Spectral_flatness <br>
    - https://en.wikipedia.org/wiki/Root_mean_square <br>
    - https://github.com/scipy/scipy <br>
    - http://practicalcryptography.com/miscellaneous/machine-learning/guide-mel-frequency-cepstral-coefficients-mfccs/

In [None]:
y,fs = librosa.load("dataset/MER_audio_taffc_dataset/Q1/MT0000040632.mp3", sr=sr, mono = mono)

In [None]:
def hz2mel(f):
    return 2595 * np.log10(1 + f / 700)

In [None]:
def mel2hz(m):
    return 700 * (10 ** (m / 2595) - 1)

In [None]:
def get_time_frames(y, nperseg, noverlap):
    #from numpy/librosa
    step = nperseg - noverlap
    shape = y.shape[:-1] + ((y.shape[-1] - noverlap) // step, nperseg)
    strides = y.strides[:-1] + (step * y.strides[-1], y.strides[-1])
    result = np.lib.stride_tricks.as_strided(
        y, shape=shape, strides=strides, writeable=True
    )
    result = result.swapaxes(0, 1)
    return result

In [None]:
def mel_filterbank(sr, n_fft, frame_length, n=40, fmin=20, fmax=None):
    if fmax == None:
        fmax = sr/2
    min_mel, max_mel = hz2mel(fmin), hz2mel(fmax)
    
    mel_freq = mel2hz(np.linspace(min_mel, max_mel, n+2))
    bins = np.floor((n_fft + 1) / sr * mel_freq).astype(int)
    
    filterbank = np.zeros((n, frame_length))
    
    for i in range(1, n+1):
        left_pos = bins[i-1]
        center_pos = bins[i]
        right_pos = bins[i+1]
        
        up = np.linspace(0, 1, center_pos - left_pos + 1)
        down = np.linspace(1, 0, right_pos - center_pos + 1)
        
        filterbank[i-1, left_pos:(center_pos+1)] = up
        filterbank[i-1, center_pos:(right_pos+1)] = down
        
    return filterbank

In [None]:
def mfcc(mag, mel_filter, n=13):
    filter_coef = (mel_filter * mag).sum(axis = 1)
    filter_coef[filter_coef == 0] = 1
    logged_coefs = 20*np.log10(filter_coef)
    ceps_coefs = scf.dct(logged_coefs)
    return ceps_coefs[:n]

In [None]:
def spectral_centroid(mag, freqs, smart=True):
    min_value = 1e-40
    if smart:
        #remove zeros
        mag = np.maximum(min_value, mag)
        
    div = np.sum(mag)
    return np.sum(mag * freqs) / div if div != 0 else 0

In [None]:
def spectral_bandwith(mag, freq, p=2):
    centroid = spectral_centroid(mag, freq)
    return np.sum( mag * (freq - centroid)**p ) ** (1/p)

In [None]:
def spectral_flatness(mag, smart=True):
    power_spec=mag**2
    min_value = 1e-20
    if smart:
        #to remove zeros
        power_spec = np.maximum(min_value, power_spec)
    div = power_spec.mean()
    return scs.gmean(power_spec) / div if div!=0 else 0

In [None]:
def spectral_rollof(mag, freq, perc=0.85):
    cumulative_sum = np.cumsum(mag)
    threshold = 0.85 * cumulative_sum[-1]
    index = len(cumulative_sum[cumulative_sum < threshold])
    return freq[index]

In [None]:
def root_mean_square(y):
    return np.sqrt((y**2).mean())

In [None]:
def zero_crossing_rate(y):
    y[y<0] = -1
    y[y>=0] = 1
    return np.mean(np.abs(y[1:] - y[:-1]))/2

In [None]:
def features(y, sr=22050, hop_length=512, n_mfcc=13, n_fft=2048):
    f, t, spec = scsg.stft( y, sr, nperseg=n_fft, noverlap=n_fft - hop_length, window="hann", padded=False )
    time_frames = get_time_frames(y, n_fft, n_fft - hop_length)
    spec *= n_fft / 2  
    mag = np.abs(spec)
    
    mel_filter = mel_filterbank(sr=sr, n_fft=n_fft, frame_length=len(f))
    
    mfccs = np.apply_along_axis(mfcc, 0, mag, mel_filter).reshape(13, spec.shape[-1])
    centroid = np.apply_along_axis(spectral_centroid, 0, mag, f).reshape(1, spec.shape[-1])
    bandwith = np.apply_along_axis(spectral_bandwith, 0, mag, f).reshape(1, spec.shape[-1])
    flatness = np.apply_along_axis(spectral_flatness, 0, mag).reshape(1, spec.shape[-1])
    rollof = np.apply_along_axis(spectral_rollof, 0, mag, f).reshape(1, spec.shape[-1])
    
    rms = np.apply_along_axis(root_mean_square, 0, time_frames).reshape(1, time_frames.shape[-1])
    zcr = np.apply_along_axis(zero_crossing_rate, 0, time_frames).reshape(1, time_frames.shape[-1])
    return (mfccs,
            centroid, 
            bandwith, 
            flatness,
            rollof, 
            rms, 
            zcr)

In [None]:
songs = sorted(os.listdir("dataset/MER_audio_taffc_dataset/Q"))

k=20
y,fs = librosa.load(f"dataset/MER_audio_taffc_dataset/Q/{songs[k]}", sr=sr, mono = mono)

(mfccs,
 centroid, 
 bandwith,
 flatness,
 rollof,
 rms, 
 zcr) = features(y.copy())

print(#np.any((
     mfccs,
     centroid,
     bandwith,
     flatness,
     np.any(np.isnan(rollof)),
     np.any(np.isnan(rms)),
     np.any(np.isnan(zcr)),
    #)),
     sep ="\n",
     end="\n\n")

mfccs = np.apply_along_axis(extract_features, 1, mfccs).flatten()
centroid= np.apply_along_axis(extract_features, 1, centroid).flatten()
bandwith = np.apply_along_axis(extract_features, 1, bandwith).flatten()
flatness = np.apply_along_axis(extract_features, 1, flatness).flatten()
rollof = np.apply_along_axis(extract_features, 1, rollof).flatten()
rms = np.apply_along_axis(extract_features, 1, rms).flatten()
zcr = np.apply_along_axis(extract_features, 1, zcr).flatten()

print(#np.any((
     np.any(mfccs),
     centroid,
     bandwith,
     flatness,
     np.any(np.isnan(rollof)),
     np.any(np.isnan(rms)),
     np.any(np.isnan(zcr)),
    #)),
     sep ="\n",
     end="\n\n")

In [None]:
path = "dataset/MER_audio_taffc_dataset/Q"
#900x190
feature_matrix2 = np.zeros((900, 133))
index=0
for audio in sorted(os.listdir(path)):
    print(index)
    y, fs = librosa.load(f"{path}/{audio}", sr=sr, mono = mono)
    
    (mfccs,
     centroid, 
     bandwith,
     flatness,
     rollof,
     rms, 
     zcr) = features(y.copy())
    
    #Spectral features extraction
    mfccs = np.apply_along_axis(extract_features, 1, mfccs).flatten()
    centroid= np.apply_along_axis(extract_features, 1, centroid).flatten()
    bandwith = np.apply_along_axis(extract_features, 1, bandwith).flatten()
    flatness = np.apply_along_axis(extract_features, 1, flatness).flatten()
    rollof = np.apply_along_axis(extract_features, 1, rollof).flatten()
    rms = np.apply_along_axis(extract_features, 1, rms).flatten()
    zcr = np.apply_along_axis(extract_features, 1, zcr).flatten()

    feature_vector = np.concatenate((mfccs, centroid, bandwith, flatness, rollof, rms, zcr))

    feature_matrix2[index]=feature_vector;
    index+=1;

In [None]:
np.savetxt("dataset/Features - Audio MER/manual_extracted_features.csv", feature_matrix2)

In [None]:
feat_matrix_norm2 = np.apply_along_axis(min_max_scale, 0, feature_matrix2)

In [None]:
np.savetxt("dataset/Features - Audio MER/manual_extracted_features_normalized.csv", feat_matrix_norm2)

## Semana 3
### Exercicio 3.2

In [None]:
def dist(v1, v2, dist_type):
    if dist_type == "e":
        return euclidean(v1, v2)
    elif dist_type == "m":
        return cityblock(v1, v2)
    elif dist_type == "c":
        return cosine(v1, v2)
    else:
        raise Exception

### *Exercicio 3.2*

In [None]:
for dist_type in ("e", "m", "c"):
    similarity_matrix = np.zeros((900,900))
    for i in range(900):
        for j in range(900):
            d = dist(feat_matrix_norm[i, :], feat_matrix_norm[j, :], dist_type)
            similarity_matrix[i,j] = d
    print(dist_type, "done")
    np.savetxt(f"dataset/SimilarityMatrix/d{dist_type}.csv", similarity_matrix, fmt="%f")

In [None]:
for dist_type in ("e", "m", "c"):
    similarity_matrix = np.zeros((900,900))
    for i in range(900):
        for j in range(900):
            d = dist(feat[i, :], feat[j, :], dist_type)
            similarity_matrix[i,j] = d
    print(dist_type, "done")
    np.savetxt(f"dataset/SimilarityMatrix/top100{dist_type}.csv", similarity_matrix, fmt="%f")

In [None]:
for dist_type in ("e", "m", "c"):
    similarity_matrix = np.zeros((900,900))
    for i in range(900):
        for j in range(900):
            d = dist(feat_matrix_norm2[i, :], feat_matrix_norm2[j, :], dist_type)
            similarity_matrix[i,j] = d
    print(dist_type, "done")
    np.savetxt(f"dataset/SimilarityMatrix/m{dist_type}.csv", similarity_matrix, fmt="%f")

### *Exercicio 3.3*

In [None]:
def get_song_index(song_name):
    songs = os.listdir("dataset/MER_audio_taffc_dataset/Q")
    songs.sort()
    return np.where(np.array(songs)==song_name)[0][0]

In [None]:
def get_song_name(song_index):
    songs = os.listdir("dataset/MER_audio_taffc_dataset/Q")
    songs.sort()
    return songs[song_index]

In [None]:
for song in os.listdir("Queries/"):
    i = get_song_index(song)
    for dist_type in ("e", "m", "c"):
        song_row = np.loadtxt(f"dataset/SimilarityMatrix/d{dist_type}.csv")[i]
        sorted_indexes = np.argsort(song_row)[1:21]
        top_songs_names = list(map(lambda song_index: get_song_name(song_index), sorted_indexes))
        os.makedirs(f"dataset/Rankings/{song}", exist_ok=True)
        np.savetxt(f"dataset/Rankings/{song}/d{dist_type}.csv", top_songs_names, fmt="%s")

In [None]:
for song in os.listdir("Queries/"):
    i = get_song_index(song)
    for dist_type in ("e", "m", "c"):
        song_row = np.loadtxt(f"dataset/SimilarityMatrix/top100{dist_type}.csv")[i]
        sorted_indexes = np.argsort(song_row)[1:21]
        top_songs_names = list(map(lambda song_index: get_song_name(song_index), sorted_indexes))
        os.makedirs(f"dataset/Rankings/{song}", exist_ok=True)
        np.savetxt(f"dataset/Rankings/{song}/top100{dist_type}.csv", top_songs_names, fmt="%s")

In [None]:
for song in os.listdir("Queries/"):
    i = get_song_index(song)
    for dist_type in ("e", "m", "c"):
        song_row = np.loadtxt(f"dataset/SimilarityMatrix/m{dist_type}.csv")[i]
        sorted_indexes = np.argsort(song_row)[1:21]
        top_songs_names = list(map(lambda song_index: get_song_name(song_index), sorted_indexes))
        os.makedirs(f"dataset/Rankings/{song}", exist_ok=True)
        np.savetxt(f"dataset/Rankings/{song}/m{dist_type}.csv", top_songs_names, fmt="%s")

## Semana 4


### *Exercicio 4.2.2*

In [None]:
def get_match_counts(data, index1, index2):
    song1 = data.loc[index1]
    song2 = data.loc[index2]
    
    points = 0
    for metric in data.columns:
        try:
            v1 = np.array(list(map(lambda string: string.strip(), song1[metric].split(";"))))
            v2 = np.array(list(map(lambda string: string.strip(), song2[metric].split(";"))))
            matches = np.intersect1d(v1, v2, assume_unique=True)
            points += len(matches)
        except:
            points+=0
    return points

In [None]:
cols = ["Artist", "GenresStr", "Quadrant", "MoodsFoundStr"]
metadata = pd.read_csv("dataset/MER_audio_taffc_dataset/panda_dataset_taffc_metadata.csv", index_col=0)[cols]

In [None]:
similarity_matrix = np.zeros((900,900))
for song_1 in metadata.index:
    i = get_song_index(song_1 + ".mp3")
    for song_2 in metadata.index:
        sim_count = get_match_counts(metadata, song_1, song_2)
        j = get_song_index(song_2 + ".mp3")
        similarity_matrix[i,j] = sim_count
    print(i)

np.savetxt(f"dataset/SimilarityMatrix/metadata_sim.csv", similarity_matrix, fmt="%d")

In [None]:
for querie in os.listdir("dataset/Rankings/"):
    if "mp3" in querie:
        i = get_song_index(querie)
        relevant = (np.argsort(similarity_matrix[i])[::-1])[:20]
        relevant = relevant[relevant!=i]
        relevant = list(map(lambda song: get_song_name(song), relevant))
        
        #pprint(relevant)
        print(f"Music: {querie}")
        for rank in os.listdir(f"dataset/Rankings/{querie}"):
            ranking = np.loadtxt(f"dataset/Rankings/{querie}/{rank}", dtype=object)
            match = np.intersect1d(relevant, ranking)
            precision = len(match)/20
        
            print(f"\tDistance: {rank} --> Precision = {precision}")
        print()   

### *Exercicio 4.2.1*
- Avaliação conjunto de 100 features, distância de cosseno

In [None]:
song_name = "MT0014794891.mp3"
y, fs = librosa.load(f"dataset/MER_audio_taffc_dataset/Q/{song_name}", sr=sr)
sd.play(y, sr)

In [None]:
evaluations = pd.read_csv("dataset/Rankings/Evaluation/Subjective/top100/MT0000202045.mp3.csv")
print("Isolated Results: \n")
print(evaluations.describe().loc[["std", "mean"]])
print("\n"+"-"*20+"\n")
print("Global Results: \n")
print(evaluations.mean(axis=1).describe()[["std", "mean"]])

In [None]:
evaluations = pd.read_csv("dataset/Rankings/Evaluation/Subjective/top100/MT0000379144.mp3.csv")
print("Isolated Results: \n")
print(evaluations.describe().loc[["std", "mean"]])
print("\n"+"-"*20+"\n")
print("Global Results: \n")
print(evaluations.mean(axis=1).describe()[["std", "mean"]])

In [None]:
evaluations = pd.read_csv("dataset/Rankings/Evaluation/Subjective/top100/MT0000414517.mp3.csv")
print("Isolated Results: \n")
print(evaluations.describe().loc[["std", "mean"]])
print("\n"+"-"*20+"\n")
print("Global Results: \n")
print(evaluations.mean(axis=1).describe()[["std", "mean"]])

In [None]:
evaluations = pd.read_csv("dataset/Rankings/Evaluation/Subjective/top100/MT0000956340.mp3.csv")
print("Isolated Results: \n")
print(evaluations.describe().loc[["std", "mean"]])
print("\n"+"-"*20+"\n")
print("Global Results: \n")
print(evaluations.mean(axis=1).describe()[["std", "mean"]])

In [None]:
path = "dataset/Rankings/Evaluation/Subjective/top100/"
try:
    del df
except:
    pass
    
for song in sorted(os.listdir(path)):
    if "mp3" in song:
        temp_df = pd.read_csv(f"{path}{song}")
        try:
            df = pd.concat((df, temp_df)).reset_index(drop=True)
        except:
            df = temp_df
            
temp = df.mean(axis=1)
temp = temp[temp>=2.5]
print("Precision =", len(temp)/len(df))