In [1]:
import librosa #https://librosa.org/
import librosa.display
import librosa.beat
import sounddevice as sd  #https://anaconda.org/conda-forge/python-sounddevice
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import scipy.stats as scs
import scipy.fft as scf
import scipy.signal as scsg
from sys import getsizeof as sizeof
from scipy.spatial.distance import  euclidean, cosine, cityblock
from pprint import pprint
from IPython.display import display

sr = 22050
mono = True
warnings.filterwarnings("ignore")

In [2]:
# Alterar o valor de run para 1 ou True, para calcular as várias matrizes com os features e distâncias
run = 0

## Semana 2
### *Exercicio 2.1*

In [3]:
feat = pd.read_csv("dataset/Features - Audio MER/top100_features.csv", index_col=0)

In [4]:
def min_max_scale(y):
    min_v = y.min()
    max_v = y.max()
    
    up = (y - min_v)
    down = (max_v - min_v)
    return  up/down if down!=0 else 0 

In [5]:
def standardization(y):
    return (y - y.mean())/y.std()

In [6]:
feat = feat.drop(columns=["Quadrant"])

In [7]:
feat = feat.apply(lambda col: min_max_scale(col), axis = 0)

In [8]:
feat = feat.to_numpy()

In [9]:
if run: np.savetxt("dataset/Features - Audio MER/top100_features_normalized.csv", feat)

### *Exercicio 2.2*

In [10]:
y,fs = librosa.load("dataset/MER_audio_taffc_dataset/Q1/MT0000040632.mp3", sr=sr, mono = mono)

In [11]:
def extract_features(signal : np.array):
    mean = signal.mean()
    stdDev = signal.std()
    skewness = scs.skew(signal)
    kurtosis = scs.kurtosis(signal)
    median = np.median(signal)
    max_value = signal.max()
    min_value = signal.min()
    return (mean, stdDev, skewness, kurtosis, median, max_value, min_value)

In [12]:
mfcc = np.apply_along_axis(extract_features, 1, librosa.feature.mfcc(y=y, n_mfcc=13)).flatten()
spc_centroid= np.apply_along_axis(extract_features, 1, librosa.feature.spectral_centroid(y=y)).flatten()
spc_bdwth = np.apply_along_axis(extract_features, 1, librosa.feature.spectral_bandwidth(y=y)).flatten()
spc_contrast = np.apply_along_axis(extract_features, 1, librosa.feature.spectral_contrast(y=y)).flatten()
spc_flatness = np.apply_along_axis(extract_features, 1, librosa.feature.spectral_flatness(y=y)).flatten()
spc_rollof = np.apply_along_axis(extract_features, 1, librosa.feature.spectral_rolloff(y=y)).flatten()
f0 = np.apply_along_axis(extract_features, 0, librosa.yin(y, 20, 11025))
f0[f0==11025] = 0;
rms = np.apply_along_axis(extract_features, 1, librosa.feature.rms(y=y)).flatten()
zcr = np.apply_along_axis(extract_features, 1, librosa.feature.zero_crossing_rate(y=y)).flatten()
tempo = librosa.beat.tempo(y=y)

print(
    f"""
    mfcc = {mfcc.shape}
    centroid = {spc_centroid.shape}
    bdwth = {spc_bdwth.shape}
    contrast = {spc_contrast.shape}
    flatness = {spc_flatness.shape}
    rollof = {spc_rollof.shape}
    f0 = {f0.shape}
    rms = {rms.shape}
    zcr = {zcr.shape}
    tempo = {tempo}
    """
)


    mfcc = (91,)
    centroid = (7,)
    bdwth = (7,)
    contrast = (49,)
    flatness = (7,)
    rollof = (7,)
    f0 = (7,)
    rms = (7,)
    zcr = (7,)
    tempo = [99.38401442]
    


In [13]:
feature_vector = np.concatenate((mfcc,spc_centroid, spc_bdwth, spc_contrast, spc_flatness, spc_rollof,
                                    f0, rms, zcr, tempo))

In [14]:
feature_vector.shape

(190,)

In [15]:
if run:
    path = "dataset/MER_audio_taffc_dataset/Q"
    #910x190
    feature_matrix = np.zeros((900, 190))
    index=0
    for audio in sorted(os.listdir(path)):
        print(index)
        y, fs = librosa.load(f"{path}/{audio}", sr=sr, mono = mono)
        #Spectral features extraction
        mfcc = np.apply_along_axis(extract_features, 1, librosa.feature.mfcc(y=y, n_mfcc=13)).flatten()
        spc_centroid= np.apply_along_axis(extract_features, 1, librosa.feature.spectral_centroid(y=y)).flatten()
        spc_bdwth = np.apply_along_axis(extract_features, 1, librosa.feature.spectral_bandwidth(y=y)).flatten()
        spc_contrast = np.apply_along_axis(extract_features, 1, librosa.feature.spectral_contrast(y=y)).flatten()
        spc_flatness = np.apply_along_axis(extract_features, 1, librosa.feature.spectral_flatness(y=y)).flatten()
        spc_rollof = np.apply_along_axis(extract_features, 1, librosa.feature.spectral_rolloff(y=y)).flatten()
        f0 = librosa.yin(y, 20, 11025)
        f0[f0==11025] = 0
        f0 = np.apply_along_axis(extract_features, 0, f0)
        rms = np.apply_along_axis(extract_features, 1, librosa.feature.rms(y=y)).flatten()
        zcr = np.apply_along_axis(extract_features, 1, librosa.feature.zero_crossing_rate(y=y)).flatten()
        tempo = librosa.beat.tempo(y=y)

        feature_vector = np.concatenate((mfcc,spc_centroid, spc_bdwth, spc_contrast, spc_flatness, spc_rollof,
                                        f0, rms, zcr, tempo))

        feature_matrix[index]=feature_vector;
        index+=1;

#np.apply_along_axis(librosa.util.normalize, 1, feature_matrix)
#feature_matrix.tofile("exercise2_features.csv", sep = ";")
#del feature_matrix

In [16]:
if run: np.savetxt("dataset/Features - Audio MER/extracted_features.csv", feature_matrix)

In [17]:
if run: feature_matrix.shape

In [18]:
if run: feat_matrix_norm = np.apply_along_axis(min_max_scale, 0, feature_matrix)

In [19]:
if run: np.savetxt("dataset/Features - Audio MER/extracted_features_normalized.csv", feat_matrix_norm)

In [20]:
feature_matrix = np.loadtxt("dataset/Features - Audio MER/extracted_features.csv")
feat_matrix_norm = np.loadtxt("dataset/Features - Audio MER/extracted_features_normalized.csv")

### *Exercicio 2.3*
> Referências: <br>
    - https://stackoverflow.com/questions/37963042/python-librosa-what-is-the-default-frame-size-used-to-compute-the-mfcc-feature <br>
    - https://en.wikipedia.org/wiki/Mel_scale <br>
    - https://haythamfayek.com/2016/04/21/speech-processing-for-machine-learning.html <br>
    - https://gist.github.com/bmcfee/746e572232be36f3bd462749fb1796da <br>
    - https://github.com/librosa/librosa <br>
    - https://www.researchgate.net/publication/220723537_Finding_An_Optimal_Segmentation_for_Audio_Genre_Classification#pf2 <br>
    - https://en.wikipedia.org/wiki/Octave_band <br>
    - https://en.wikipedia.org/wiki/Spectral_flatness <br>
    - https://en.wikipedia.org/wiki/Root_mean_square <br>
    - https://github.com/scipy/scipy <br>
    - http://practicalcryptography.com/miscellaneous/machine-learning/guide-mel-frequency-cepstral-coefficients-mfccs/

In [21]:
y,fs = librosa.load("dataset/MER_audio_taffc_dataset/Q1/MT0000040632.mp3", sr=sr, mono = mono)

In [22]:
def hz2mel(f):
    return 2595 * np.log10(1 + f / 700)

In [23]:
def mel2hz(m):
    return 700 * (10 ** (m / 2595) - 1)

In [24]:
def get_time_frames(y, nperseg, noverlap):
    #from numpy/librosa
    step = nperseg - noverlap
    shape = y.shape[:-1] + ((y.shape[-1] - noverlap) // step, nperseg)
    strides = y.strides[:-1] + (step * y.strides[-1], y.strides[-1])
    result = np.lib.stride_tricks.as_strided(
        y, shape=shape, strides=strides, writeable=True
    )
    result = result.swapaxes(0, 1)
    return result

In [25]:
def mel_filterbank(sr, n_fft, frame_length, n=40, fmin=20, fmax=None):
    if fmax == None:
        fmax = sr/2
    min_mel, max_mel = hz2mel(fmin), hz2mel(fmax)
    
    mel_freq = mel2hz(np.linspace(min_mel, max_mel, n+2))
    bins = np.floor((n_fft + 1) / sr * mel_freq).astype(int)
    
    filterbank = np.zeros((n, frame_length))
    
    for i in range(1, n+1):
        left_pos = bins[i-1]
        center_pos = bins[i]
        right_pos = bins[i+1]
        
        up = np.linspace(0, 1, center_pos - left_pos + 1)
        down = np.linspace(1, 0, right_pos - center_pos + 1)
        
        filterbank[i-1, left_pos:(center_pos+1)] = up
        filterbank[i-1, center_pos:(right_pos+1)] = down
        
    return filterbank

In [26]:
def mfcc(mag, mel_filter, n=13):
    filter_coef = (mel_filter * mag).sum(axis = 1)
    filter_coef[filter_coef == 0] = 1
    logged_coefs = 20*np.log10(filter_coef)
    ceps_coefs = scf.dct(logged_coefs)
    return ceps_coefs[:n]

In [27]:
def spectral_centroid(mag, freqs, smart=True):
    min_value = 1e-40
    if smart:
        #remove zeros
        mag = np.maximum(min_value, mag)
        
    div = np.sum(mag)
    return np.sum(mag * freqs) / div if div != 0 else 0

In [28]:
def spectral_bandwith(mag, freq, p=2):
    centroid = spectral_centroid(mag, freq)
    return np.sum( mag * (freq - centroid)**p ) ** (1/p)

In [29]:
def spectral_flatness(mag, smart=True):
    power_spec=mag**2
    min_value = 1e-20
    if smart:
        #to remove zeros
        power_spec = np.maximum(min_value, power_spec)
    div = power_spec.mean()
    return scs.gmean(power_spec) / div if div!=0 else 0

In [30]:
def spectral_rollof(mag, freq, perc=0.85):
    cumulative_sum = np.cumsum(mag)
    threshold = 0.85 * cumulative_sum[-1]
    index = len(cumulative_sum[cumulative_sum < threshold])
    return freq[index]

In [31]:
def root_mean_square(y):
    return np.sqrt((y**2).mean())

In [32]:
def zero_crossing_rate(y):
    y[y<0] = -1
    y[y>=0] = 1
    return np.mean(np.abs(y[1:] - y[:-1]))/2

In [33]:
def features(y, sr=22050, hop_length=512, n_mfcc=13, n_fft=2048):
    f, t, spec = scsg.stft( y, sr, nperseg=n_fft, noverlap=n_fft - hop_length, window="hann", padded=False )
    time_frames = get_time_frames(y, n_fft, n_fft - hop_length)
    spec *= n_fft / 2  
    mag = np.abs(spec)
    
    mel_filter = mel_filterbank(sr=sr, n_fft=n_fft, frame_length=len(f))
    
    mfccs = np.apply_along_axis(mfcc, 0, mag, mel_filter).reshape(13, spec.shape[-1])
    centroid = np.apply_along_axis(spectral_centroid, 0, mag, f).reshape(1, spec.shape[-1])
    bandwith = np.apply_along_axis(spectral_bandwith, 0, mag, f).reshape(1, spec.shape[-1])
    flatness = np.apply_along_axis(spectral_flatness, 0, mag).reshape(1, spec.shape[-1])
    rollof = np.apply_along_axis(spectral_rollof, 0, mag, f).reshape(1, spec.shape[-1])
    
    rms = np.apply_along_axis(root_mean_square, 0, time_frames).reshape(1, time_frames.shape[-1])
    zcr = np.apply_along_axis(zero_crossing_rate, 0, time_frames).reshape(1, time_frames.shape[-1])
    return (mfccs,
            centroid, 
            bandwith, 
            flatness,
            rollof, 
            rms, 
            zcr)

In [34]:
songs = sorted(os.listdir("dataset/MER_audio_taffc_dataset/Q"))

k=20
y,fs = librosa.load(f"dataset/MER_audio_taffc_dataset/Q/{songs[k]}", sr=sr, mono = mono)

(mfccs,
 centroid, 
 bandwith,
 flatness,
 rollof,
 rms, 
 zcr) = features(y.copy())

print(#np.any((
     mfccs,
     centroid,
     bandwith,
     flatness,
     np.any(np.isnan(rollof)),
     np.any(np.isnan(rms)),
     np.any(np.isnan(zcr)),
    #)),
     sep ="\n",
     end="\n\n")

mfccs = np.apply_along_axis(extract_features, 1, mfccs).flatten()
centroid= np.apply_along_axis(extract_features, 1, centroid).flatten()
bandwith = np.apply_along_axis(extract_features, 1, bandwith).flatten()
flatness = np.apply_along_axis(extract_features, 1, flatness).flatten()
rollof = np.apply_along_axis(extract_features, 1, rollof).flatten()
rms = np.apply_along_axis(extract_features, 1, rms).flatten()
zcr = np.apply_along_axis(extract_features, 1, zcr).flatten()

print(#np.any((
     np.any(np.isnan(mfccs)),
     np.any(np.isnan(centroid)),
     np.any(np.isnan(bandwith)),
     np.any(np.isnan(flatness)),
     np.any(np.isnan(rollof)),
     np.any(np.isnan(rms)),
     np.any(np.isnan(zcr)),
    #)),
     sep ="\n",
     end="\n\n")

[[ 0.00000000e+00  0.00000000e+00 -3.17948625e+03 ...  1.02451118e+03
   5.61967440e+02 -6.74623957e+02]
 [ 0.00000000e+00  0.00000000e+00  1.40487534e+02 ...  1.08662451e+02
   1.08302334e+02  3.62996122e+01]
 [ 0.00000000e+00  0.00000000e+00  2.11672882e+02 ...  1.38183488e+01
   2.35107490e+01  1.43527028e+02]
 ...
 [ 0.00000000e+00  0.00000000e+00 -6.07910228e+01 ...  2.09368195e+00
   7.23438542e+00 -7.83129299e+00]
 [ 0.00000000e+00  0.00000000e+00  2.52346272e+01 ...  5.26763021e+01
   3.02826056e+01 -3.45040255e+00]
 [ 0.00000000e+00  0.00000000e+00 -3.21052719e+01 ...  4.70293848e+01
   5.20744176e+01  4.09081341e+01]]
[[5512.50015073 5512.50015073 2175.56555029 ... 2249.50704408
  2293.83838047 2648.2864036 ]]
[[    0.             0.          2091.27563384 ... 37097.42202401
  26456.05375657 12334.78582425]]
[[1.0000014  1.0000014  0.01688097 ... 0.01609812 0.01694409 0.02282196]]
False
False
False

False
False
False
False
False
False
False



In [35]:
if run:
    path = "dataset/MER_audio_taffc_dataset/Q"
    #900x190
    feature_matrix2 = np.zeros((900, 133))
    index=0
    for audio in sorted(os.listdir(path)):
        print(index)
        y, fs = librosa.load(f"{path}/{audio}", sr=sr, mono = mono)

        (mfccs,
         centroid, 
         bandwith,
         flatness,
         rollof,
         rms, 
         zcr) = features(y.copy())

        #Spectral features extraction
        mfccs = np.apply_along_axis(extract_features, 1, mfccs).flatten()
        centroid= np.apply_along_axis(extract_features, 1, centroid).flatten()
        bandwith = np.apply_along_axis(extract_features, 1, bandwith).flatten()
        flatness = np.apply_along_axis(extract_features, 1, flatness).flatten()
        rollof = np.apply_along_axis(extract_features, 1, rollof).flatten()
        rms = np.apply_along_axis(extract_features, 1, rms).flatten()
        zcr = np.apply_along_axis(extract_features, 1, zcr).flatten()

        feature_vector = np.concatenate((mfccs, centroid, bandwith, flatness, rollof, rms, zcr))

        feature_matrix2[index]=feature_vector;
        index+=1;


In [36]:
if run:
    np.savetxt("dataset/Features - Audio MER/manual_extracted_features.csv", feature_matrix2)

In [37]:
if run:
    feat_matrix_norm2 = np.apply_along_axis(min_max_scale, 0, feature_matrix2)

In [38]:
if run:
    np.savetxt("dataset/Features - Audio MER/manual_extracted_features_normalized.csv", feat_matrix_norm2)

In [39]:
feature_matrix2 = np.loadtxt("dataset/Features - Audio MER/manual_extracted_features.csv")
feat_matrix_norm2 = np.loadtxt("dataset/Features - Audio MER/manual_extracted_features_normalized.csv")

Para validar os features calculados manualmente, foram comparados os gráficos resultantes das funções manuais com as funções de librosa, de modo a perceber se existia diferenças significativas e foram ainda comparadas algumas seções das matrizes resultantes de cada função. Foram vistos os valores iniciais e valores numa posição mais interna. Na maioria dos features, as diferenças começavam a verificar-se a partir da 3ª casa decimal, devido a erros de precisão.

## Semana 3
### Exercicio 3.2

In [40]:
def dist(v1, v2, dist_type):
    if dist_type == "e":
        return euclidean(v1, v2)
    elif dist_type == "m":
        return cityblock(v1, v2)
    elif dist_type == "c":
        return cosine(v1, v2)
    else:
        raise Exception

### *Exercicio 3.2*

In [41]:
if run:
    for dist_type in ("e", "m", "c"):
        similarity_matrix = np.zeros((900,900))
        for i in range(900):
            for j in range(i+1, 900):
                d = dist(feat_matrix_norm[i, :], feat_matrix_norm[j, :], dist_type)
                similarity_matrix[i, j] = d
                similarity_matrix[j, i] = d
        print(dist_type, "done")
        np.savetxt(f"dataset/SimilarityMatrix/d{dist_type}.csv", similarity_matrix, fmt="%f")


In [42]:
if run:
    for dist_type in ("e", "m", "c"):
        similarity_matrix = np.zeros((900,900))
        for i in range(900):
            for j in range(i+1, 900):
                d = dist(feat[i, :], feat[j, :], dist_type)
                similarity_matrix[i, j] = d
                similarity_matrix[j, i] = d
        print(dist_type, "done")
        np.savetxt(f"dataset/SimilarityMatrix/top100{dist_type}.csv", similarity_matrix, fmt="%f")


In [43]:
if run:
    for dist_type in ("e", "m", "c"):
        similarity_matrix = np.zeros((900,900))
        for i in range(900):
            for j in range(i+1, 900):
                d = dist(feat_matrix_norm2[i, :], feat_matrix_norm2[j, :], dist_type)
                similarity_matrix[i, j] = d
                similarity_matrix[j, i] = d
        print(dist_type, "done")
        np.savetxt(f"dataset/SimilarityMatrix/m{dist_type}.csv", similarity_matrix, fmt="%f")


### *Exercicio 3.3*

In [44]:
def get_song_index(song_name):
    songs = os.listdir("dataset/MER_audio_taffc_dataset/Q")
    songs.sort()
    return np.where(np.array(songs)==song_name)[0][0]

In [45]:
def get_song_name(song_index):
    songs = os.listdir("dataset/MER_audio_taffc_dataset/Q")
    songs.sort()
    return songs[song_index]

In [46]:
if run:
    for song in os.listdir("Queries/"):
        i = get_song_index(song)
        for dist_type in ("e", "m", "c"):
            song_row = np.loadtxt(f"dataset/SimilarityMatrix/d{dist_type}.csv")[i]
            sorted_indexes = np.argsort(song_row)[1:21]
            top_songs_names = list(map(lambda song_index: get_song_name(song_index), sorted_indexes))
            os.makedirs(f"dataset/Rankings/{song}", exist_ok=True)
            np.savetxt(f"dataset/Rankings/{song}/d{dist_type}.csv", top_songs_names, fmt="%s")

In [47]:
if run:
    for song in os.listdir("Queries/"):
        i = get_song_index(song)
        for dist_type in ("e", "m", "c"):
            song_row = np.loadtxt(f"dataset/SimilarityMatrix/top100{dist_type}.csv")[i]
            sorted_indexes = np.argsort(song_row)[1:21]
            top_songs_names = list(map(lambda song_index: get_song_name(song_index), sorted_indexes))
            os.makedirs(f"dataset/Rankings/{song}", exist_ok=True)
            np.savetxt(f"dataset/Rankings/{song}/top100{dist_type}.csv", top_songs_names, fmt="%s")

In [48]:
if run:
    for song in os.listdir("Queries/"):
        i = get_song_index(song)
        for dist_type in ("e", "m", "c"):
            song_row = np.loadtxt(f"dataset/SimilarityMatrix/m{dist_type}.csv")[i]
            sorted_indexes = np.argsort(song_row)[1:21]
            top_songs_names = list(map(lambda song_index: get_song_name(song_index), sorted_indexes))
            os.makedirs(f"dataset/Rankings/{song}", exist_ok=True)
            np.savetxt(f"dataset/Rankings/{song}/m{dist_type}.csv", top_songs_names, fmt="%s")

In [49]:
queries = {
    0 : "MT0000202045.mp3",
    1 : "MT0000379144.mp3",
    2 : "MT0000414517.mp3",
    3 : "MT0000956340.mp3"
}

In [50]:
song_name = queries[0]
print(f"Recomendations overlap for song {song_name}:")
for feat in ("d", "top100", "m"):
    path = f"dataset/Rankings/{song_name}/"

    lib_euc = np.loadtxt(path+f"{feat}e.csv", dtype=object)
    lib_man = np.loadtxt(path+f"{feat}c.csv", dtype=object)
    lib_cos = np.loadtxt(path+f"{feat}m.csv", dtype=object)

    print(
        f"{len(np.intersect1d( np.intersect1d(lib_euc, lib_man), lib_cos )) / len(lib_euc)*100:.2f}", "%"
    )

Recomendations overlap for song MT0000202045.mp3:
75.00 %
60.00 %
75.00 %


In [51]:
song_name = queries[1]
print(f"Recomendations overlap for song {song_name}:")
for feat in ("d", "top100", "m"):
    path = f"dataset/Rankings/{song_name}/"

    lib_euc = np.loadtxt(path+f"{feat}e.csv", dtype=object)
    lib_man = np.loadtxt(path+f"{feat}c.csv", dtype=object)
    lib_cos = np.loadtxt(path+f"{feat}m.csv", dtype=object)

    print(
        f"{len(np.intersect1d( np.intersect1d(lib_euc, lib_man), lib_cos )) / len(lib_euc)*100:.2f}", "%"
    )

Recomendations overlap for song MT0000379144.mp3:
70.00 %
75.00 %
55.00 %


In [52]:
song_name = queries[2]
print(f"Recomendations overlap for song {song_name}:")
for feat in ("d", "top100", "m"):
    path = f"dataset/Rankings/{song_name}/"

    lib_euc = np.loadtxt(path+f"{feat}e.csv", dtype=object)
    lib_man = np.loadtxt(path+f"{feat}c.csv", dtype=object)
    lib_cos = np.loadtxt(path+f"{feat}m.csv", dtype=object)

    print(
        f"{len(np.intersect1d( np.intersect1d(lib_euc, lib_man), lib_cos )) / len(lib_euc)*100:.2f}", "%"
    )

Recomendations overlap for song MT0000414517.mp3:
55.00 %
50.00 %
55.00 %


In [53]:
song_name = queries[3]
print(f"Recomendations overlap for song {song_name}:")
for feat in ("d", "top100", "m"):
    path = f"dataset/Rankings/{song_name}/"

    lib_euc = np.loadtxt(path+f"{feat}e.csv", dtype=object)
    lib_man = np.loadtxt(path+f"{feat}c.csv", dtype=object)
    lib_cos = np.loadtxt(path+f"{feat}m.csv", dtype=object)

    print(
        f"{len(np.intersect1d( np.intersect1d(lib_euc, lib_man), lib_cos )) / len(lib_euc) * 100:.2f}", "%"
    )

Recomendations overlap for song MT0000956340.mp3:
75.00 %
80.00 %
90.00 %


In [54]:
total = 0
for song in range(4):
    song_name = queries[song]
    path = f"dataset/Rankings/{song_name}/"
    for feat in ("d", "top100", "m"):
        lib_euc = np.loadtxt(path+f"{feat}e.csv", dtype=object)
        lib_man = np.loadtxt(path+f"{feat}c.csv", dtype=object)
        lib_cos = np.loadtxt(path+f"{feat}m.csv", dtype=object)

        total+=len(np.intersect1d( np.intersect1d(lib_euc, lib_man), lib_cos )) / len(lib_euc)
print(f"Recomendations overlap mean:", end = " ")
print(f"{total/4/3*100:.2f}", "%")

Recomendations overlap mean: 67.92 %


### Exercicio 3.4


Os diferentes tipos de distância podem gerar recomendações diferentes, como é possível verificar nas células anteriores, onde se calcula, para cada querie, a percentagem de interceção entre os conjuntos gerados pelas diferentes métricas. <br>
Em nenhum caso os conjuntos são coincidentes. No entanto, as interceções são significativas, existindo um mínimo de 50% overlap nos rankings. Isto é, para cada querie, pelo menos 10 musicas surgiram nas 3 diferentes recomendações. <br>

Assim, certos tipos de distância podem gerar melhores recomendações que outros. <br>
É possivel que a melhor métrica de distância dependa do contexto do problema e dos descritores retirados para o resolver. <br>

Deste modo, a melhor medida de distância deve ser inferida retirando partido do dataset que se tem. Através de métodos de validação é possível estimar qual o modelo que obtém melhores classificações. A partir destes resultados são retiradas as conclusões necessárias para apoiar a decisão.

## Semana 4


### *Exercicio 4.1*

In [55]:
def get_match_counts(data, index1, index2):
    song1 = data.loc[index1]
    song2 = data.loc[index2]
    
    points = 0
    for metric in data.columns:
        try:
            v1 = np.array(list(map(lambda string: string.strip(), song1[metric].split(";"))))
            v2 = np.array(list(map(lambda string: string.strip(), song2[metric].split(";"))))
            matches = np.intersect1d(v1, v2, assume_unique=True)
            points += len(matches)
        except:
            points+=0
    return points

In [56]:
cols = ["Artist", "GenresStr", "Quadrant", "MoodsFoundStr"]
metadata = pd.read_csv("dataset/MER_audio_taffc_dataset/panda_dataset_taffc_metadata.csv", index_col=0)[cols]

In [57]:
if run:
    similarity_matrix = np.zeros((900,900))
    for song_1 in metadata.index:
        i = get_song_index(song_1 + ".mp3")
        for song_2 in metadata.index:
            sim_count = get_match_counts(metadata, song_1, song_2)
            j = get_song_index(song_2 + ".mp3")
            similarity_matrix[i,j] = sim_count
        print(i)

    np.savetxt(f"dataset/SimilarityMatrix/metadata_sim.csv", similarity_matrix, fmt="%d")

In [58]:
similarity_matrix = np.loadtxt("dataset/SimilarityMatrix/metadata_sim.csv")

In [59]:
for querie in os.listdir("dataset/Rankings/"):
    if "mp3" in querie:
        i = get_song_index(querie)
        relevant = (np.argsort(similarity_matrix[i])[::-1])[:21]
        relevant = relevant[relevant!=i]
        relevant = list(map(lambda song: get_song_name(song), relevant))
        
        #pprint(relevant)
        print(f"Music: {querie}")
        for rank in os.listdir(f"dataset/Rankings/{querie}"):
            if "metadados" not in rank:
                ranking = np.loadtxt(f"dataset/Rankings/{querie}/{rank}", dtype=object)
                match = np.intersect1d(relevant, ranking)
                precision = len(match)/20

                print(f"\tDistance: {rank} --> Precision = {precision}")
        print()   

Music: MT0000202045.mp3
	Distance: dm.csv --> Precision = 0.0
	Distance: dc.csv --> Precision = 0.05
	Distance: top100m.csv --> Precision = 0.0
	Distance: mm.csv --> Precision = 0.0
	Distance: de.csv --> Precision = 0.05
	Distance: mc.csv --> Precision = 0.05
	Distance: me.csv --> Precision = 0.05
	Distance: top100c.csv --> Precision = 0.0
	Distance: top100e.csv --> Precision = 0.0

Music: MT0000956340.mp3
	Distance: dm.csv --> Precision = 0.0
	Distance: dc.csv --> Precision = 0.0
	Distance: top100m.csv --> Precision = 0.05
	Distance: mm.csv --> Precision = 0.0
	Distance: de.csv --> Precision = 0.0
	Distance: mc.csv --> Precision = 0.0
	Distance: me.csv --> Precision = 0.0
	Distance: top100c.csv --> Precision = 0.1
	Distance: top100e.csv --> Precision = 0.1

Music: MT0000379144.mp3
	Distance: dm.csv --> Precision = 0.0
	Distance: dc.csv --> Precision = 0.0
	Distance: top100m.csv --> Precision = 0.0
	Distance: mm.csv --> Precision = 0.0
	Distance: de.csv --> Precision = 0.0
	Distance: m

### *Exercicio 4.1.4*

   É possível verificar que as recomendações feitas pelos metadados e as recomendações feitas pelos top100 features pouco ou nada têm a ver. <br>
   As recomendações pelo top100 features têm como base uma procura por emoção. Isto é, pesquisam por músicas que aparentem transmitir uma emoção similar à da querie original. Isto pode traduzir-se uma música auditivamente similar, ou completamente diferente. Por exemplo, o Rap está muitas vezes associado a música agressiva, tal como o Metal, no entanto não são semelhantes. <br>
   Por outro lado, as recomendações com base nos metadados são sugestões que se baseiam no conteúdo. No caso específico, a recomendação retira informações sobre: <br>
   &nbsp; 1) autor; <br>
   &nbsp; 2) Género; <br>
   &nbsp; 3) Quadrante; <br>
   &nbsp; 4) Emoção. <br>
   Constata-se que nos metadados a emoção é também considerada. No entanto, no top 100 features a emoção é definida por um conjunto de descritores. Depois, a recomendação é feita com uma métrica de distância. Nos metadados a emoção está previamente classificada em valores discretos bem definidos. Estes valores são apenas comparados de modo a verificar o tamanho do conjunto da interseção. 

### *Exercicio 4.2.2 - top100*
- Avaliação conjunto de 100 features, distância de cosseno

In [60]:
song_name = "MT0014794891.mp3"
y, fs = librosa.load(f"dataset/MER_audio_taffc_dataset/Q/{song_name}", sr=sr)
#sd.play(y, sr)

In [61]:
evaluations = pd.read_csv("dataset/Rankings/Evaluation/Subjective/top100/MT0000202045.mp3.csv")
print("\t-- Querie 1 --")
print("-- Estatisticas por utilizador --")
display(evaluations.describe().loc[["mean", "std"]])

print("-- Estatisticas por musica --")
display(evaluations.T.describe().loc[["mean","std"]])

print("-- Estatisticas Gerais --")
print(
    "mean=" + str(evaluations.to_numpy().mean()),
    "std =" + str(evaluations.to_numpy().std()),
    sep="\n"
)

	-- Querie 1 --
-- Estatisticas por utilizador --


Unnamed: 0,Joao,Joel,Tomas
mean,1.4,1.5,1.75
std,0.820783,1.147079,1.208522


-- Estatisticas por musica --


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
mean,1.0,1.0,1.333333,1.0,1.0,1.333333,1.0,4.333333,1.0,3.333333,1.0,1.0,1.0,1.333333,2.666667,2.0,1.0,2.666667,1.0,1.0
std,0.0,0.0,0.57735,0.0,0.0,0.57735,0.0,1.154701,0.0,1.154701,0.0,0.0,0.0,0.57735,0.57735,1.0,0.0,1.527525,0.0,0.0


-- Estatisticas Gerais --
mean=1.55
std =1.0555409355712675


In [62]:
evaluations = pd.read_csv("dataset/Rankings/Evaluation/Subjective/top100/MT0000379144.mp3.csv")
print("\t-- Querie 2 --")
print("-- Estatisticas por utilizador --")
display(evaluations.describe().loc[["mean", "std"]])

print("-- Estatisticas por musica --")
display(evaluations.T.describe().loc[["mean","std"]])

print("-- Estatisticas Gerais --")
print(
    "mean=" + str(evaluations.to_numpy().mean()),
    "std =" + str(evaluations.to_numpy().std()),
    sep="\n"
)

	-- Querie 2 --
-- Estatisticas por utilizador --


Unnamed: 0,Joao,Joel,Tomas
mean,2.3,1.7,1.9
std,1.41793,0.978721,0.967906


-- Estatisticas por musica --


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
mean,1.666667,1.333333,3.333333,3.333333,1.333333,4.0,3.666667,1.666667,1.0,1.333333,1.0,1.0,2.0,2.0,3.0,2.333333,1.0,1.666667,1.333333,1.333333
std,0.57735,0.57735,1.154701,0.57735,0.57735,1.0,0.57735,0.57735,0.0,0.57735,0.0,0.0,1.0,1.732051,1.0,1.154701,0.0,0.57735,0.57735,0.57735


-- Estatisticas Gerais --
mean=1.9666666666666666
std =1.1396880664852505


In [63]:
evaluations = pd.read_csv("dataset/Rankings/Evaluation/Subjective/top100/MT0000414517.mp3.csv")
print("\t-- Querie 3 --")
print("-- Estatisticas por utilizador --")
display(evaluations.describe().loc[["mean", "std"]])

print("-- Estatisticas por musica --")
display(evaluations.T.describe().loc[["mean","std"]])

print("-- Estatisticas Gerais --")
print(
    "mean=" + str(evaluations.to_numpy().mean()),
    "std =" + str(evaluations.to_numpy().std()),
    sep="\n"
)

	-- Querie 3 --
-- Estatisticas por utilizador --


Unnamed: 0,Joao,Joel,Tomas
mean,1.35,1.45,1.65
std,0.74516,0.944513,1.03999


-- Estatisticas por musica --


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
mean,2.333333,1.333333,1.0,1.0,1.333333,1.333333,1.0,1.0,2.0,1.666667,1.0,1.666667,1.0,1.0,2.666667,1.0,4.0,1.333333,1.0,1.0
std,1.154701,0.57735,0.0,0.0,0.57735,0.57735,0.0,0.0,1.0,1.154701,0.0,1.154701,0.0,0.0,0.57735,0.0,1.0,0.57735,0.0,0.0


-- Estatisticas Gerais --
mean=1.4833333333333334
std =0.9035424112286533


In [64]:
evaluations = pd.read_csv("dataset/Rankings/Evaluation/Subjective/top100/MT0000956340.mp3.csv")
print("\t-- Querie 4 --")
print("-- Estatisticas por utilizador --")
display(evaluations.describe().loc[["mean", "std"]])

print("-- Estatisticas por musica --")
display(evaluations.T.describe().loc[["mean","std"]])

print("-- Estatisticas Gerais --")
print(
    "mean=" + str(evaluations.to_numpy().mean()),
    "std =" + str(evaluations.to_numpy().std()),
    sep="\n"
)

	-- Querie 4 --
-- Estatisticas por utilizador --


Unnamed: 0,Joao,Joel,Tomas
mean,2.85,2.6,3.2
std,1.386969,1.313893,1.399248


-- Estatisticas por musica --


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
mean,4.333333,4.333333,4.333333,3.666667,3.666667,1.0,4.0,2.0,4.0,1.0,2.666667,3.666667,1.0,4.333333,1.0,1.0,3.333333,2.333333,3.333333,2.666667
std,0.57735,0.57735,0.57735,0.57735,0.57735,0.0,0.0,1.0,0.0,0.0,0.57735,0.57735,0.0,0.57735,0.0,0.0,1.154701,0.57735,0.57735,1.154701


-- Estatisticas Gerais --
mean=2.8833333333333333
std =1.3551342696902358


In [65]:
path = "dataset/Rankings/Evaluation/Subjective/top100/"
try:
    del df
except:
    pass
    
for song in sorted(os.listdir(path)):
    if "mp3" in song:
        temp_df = pd.read_csv(f"{path}{song}")
        try:
            df = pd.concat((df, temp_df)).reset_index(drop=True)
        except:
            df = temp_df
            
temp = df.mean(axis=1)
temp = temp[temp>=2.5]
print("Precision =", len(temp)/len(df))

Precision = 0.3


### *Exercicio 4.2.2 - metadados*
- Avaliação similaridade com base nos metadados 

In [66]:
evaluations = pd.read_csv("dataset/Rankings/Evaluation/Subjective/top100/MT0000202045.mp3.csv")
print("\t-- Querie 1 --")
print("-- Estatisticas por utilizador --")
display(evaluations.describe().loc[["mean", "std"]])

print("-- Estatisticas por musica --")
display(evaluations.T.describe().loc[["mean","std"]])

print("-- Estatisticas Gerais --")
print(
    "mean=" + str(evaluations.to_numpy().mean()),
    "std =" + str(evaluations.to_numpy().std()),
    sep="\n"
)

	-- Querie 1 --
-- Estatisticas por utilizador --


Unnamed: 0,Joao,Joel,Tomas
mean,1.4,1.5,1.75
std,0.820783,1.147079,1.208522


-- Estatisticas por musica --


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
mean,1.0,1.0,1.333333,1.0,1.0,1.333333,1.0,4.333333,1.0,3.333333,1.0,1.0,1.0,1.333333,2.666667,2.0,1.0,2.666667,1.0,1.0
std,0.0,0.0,0.57735,0.0,0.0,0.57735,0.0,1.154701,0.0,1.154701,0.0,0.0,0.0,0.57735,0.57735,1.0,0.0,1.527525,0.0,0.0


-- Estatisticas Gerais --
mean=1.55
std =1.0555409355712675


In [67]:
evaluations = pd.read_csv("dataset/Rankings/Evaluation/Subjective/metadados/MT0000379144.mp3.csv")
print("\t-- Querie 2 --")
print("-- Estatisticas por utilizador --")
display(evaluations.describe().loc[["mean", "std"]])

print("-- Estatisticas por musica --")
display(evaluations.T.describe().loc[["mean","std"]])

print(
    "mean=" + str(evaluations.to_numpy().mean()),
    "std =" + str(evaluations.to_numpy().std()),
    sep="\n"
)

	-- Querie 2 --
-- Estatisticas por utilizador --


Unnamed: 0,Joao,Joel,Tomas
mean,2.45,2.65,3.2
std,0.998683,0.933302,1.151658


-- Estatisticas por musica --


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
mean,3.333333,3.666667,2.0,3.0,3.333333,3.0,3.333333,4.0,3.0,2.0,1.666667,2.0,2.0,2.333333,2.666667,3.0,3.333333,1.0,4.333333,2.333333
std,1.154701,0.57735,0.0,1.0,1.527525,0.0,1.154701,1.0,1.0,1.0,0.57735,1.0,0.0,0.57735,0.57735,1.0,0.57735,0.0,0.57735,0.57735


mean=2.7666666666666666
std =1.0546194679704248


In [68]:
evaluations = pd.read_csv("dataset/Rankings/Evaluation/Subjective/metadados/MT0000414517.mp3.csv")
print("\t-- Querie 3 --")
print("-- Estatisticas por utilizador --")
display(evaluations.describe().loc[["mean", "std"]])


print("-- Estatisticas por musica --")
display(evaluations.T.describe().loc[["mean","std"]])

print(
    "mean=" + str(evaluations.to_numpy().mean()),
    "std =" + str(evaluations.to_numpy().std()),
    sep="\n"
)

	-- Querie 3 --
-- Estatisticas por utilizador --


Unnamed: 0,Joao,Joel,Tomas
mean,2.4,2.4,2.45
std,1.231174,1.187656,1.356272


-- Estatisticas por musica --


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
mean,4.333333,3.666667,3.666667,3.0,3.0,5.0,2.333333,2.0,2.333333,1.333333,2.333333,2.0,1.333333,1.0,1.0,1.333333,1.333333,1.0,2.666667,3.666667
std,0.57735,0.57735,0.57735,0.0,0.0,0.0,0.57735,0.0,0.57735,0.57735,1.154701,0.0,0.57735,0.0,0.0,0.57735,0.57735,0.0,0.57735,0.57735


mean=2.4166666666666665
std =1.2287075413711035


In [69]:
evaluations = pd.read_csv("dataset/Rankings/Evaluation/Subjective/metadados/MT0000956340.mp3.csv")
print("\t-- Querie 4 --")
print("-- Estatisticas por utilizador --")
display(evaluations.describe().loc[["mean", "std"]])


print("-- Estatisticas por musica --")
display(evaluations.T.describe().loc[["mean","std"]])

print(
    "mean = " + str(evaluations.to_numpy().mean()),
    "std = " + str(evaluations.to_numpy().std()),
    sep="\n"
)

	-- Querie 4 --
-- Estatisticas por utilizador --


Unnamed: 0,Joao,Joel,Tomas
mean,2.25,1.9,2.45
std,1.292692,1.209611,1.431782


-- Estatisticas por musica --


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
mean,3.666667,3.333333,4.333333,3.666667,1.0,1.333333,1.666667,1.333333,1.0,3.666667,4.0,1.666667,1.0,3.333333,3.0,1.0,1.0,2.0,1.0,1.0
std,0.57735,1.527525,0.57735,0.57735,0.0,0.57735,0.57735,0.57735,0.0,0.57735,0.0,0.57735,0.0,0.57735,0.0,0.0,0.0,1.0,0.0,0.0


mean = 2.2
std = 1.3012814197295421


In [70]:
path = "dataset/Rankings/Evaluation/Subjective/metadados/"
try:
    del df
except:
    pass
    
for song in sorted(os.listdir(path)):
    if "mp3" in song:
        temp_df = pd.read_csv(f"{path}{song}")
        try:
            df = pd.concat((df, temp_df)).reset_index(drop=True)
        except:
            df = temp_df
            
temp = df.mean(axis=1)
temp = temp[temp>=2.5]
print("Precision =", len(temp)/len(df))

Precision = 0.3625


### *Exercicio 4.2.3*
   
   O modelo de recomendação, quer através dos top100 features, quer através dos metadados, apresentou resultados que podem ser classificados como maus, tendo uma precisão de cerca de 30%. Isto pode dever-se a vários fatores, mas o que nos parece mais relevante é o facto de o ouvinte se ter baseado na similaridade da música e não na emoção subjacente a esta. 
   Este fator justificaria também o facto de a recomendação pelos metadados ter tido melhor pontuação. Como foi referido no ponto 4.1.4, a recomendação com base nos metadados é mais próxima da recomendação por conteúdo do que a recomendação através do conjunto de 100 features. Apesar disto, a precisão obtida indica que a recomendação com base nos metadados pode não ter bons resultados.<br>
   
   Outra causa pode ser o próprio dataset. Não foi feita nenhuma verificação relativa à distribuição dos tipos de música presentes na base de dados, quer por emoção, quer por género. O modelo pode não conseguir calcular boas recomendações pelo facto de não as ter na sua posse e, assim, erradamente, demonstrar maus resultados. Se este for o caso, é representativo do ditado "Garbage in, garbage out". 
