In [1]:
import pandas as pd
import numpy as np
from joblib import dump
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm

In [2]:
data_path = '../data/features_3_sec.csv'
df = pd.read_csv(data_path)
df['label'] = df['label'].astype('category')

scaler = MinMaxScaler()
x_scaled = scaler.fit_transform(df.loc[:, 'chroma_stft_mean':'mfcc20_var'].values)
y = df['label'].cat.codes.values

cov = pd.read_csv('../data/kur_covariance_scaled.csv', header=None).values

v = x_scaled - x_scaled.mean(axis=0)
mahalanobis_distances = np.diag(v @ np.linalg.inv(cov) @ v.T)
p25 = np.percentile(mahalanobis_distances, 25)
p75 = np.percentile(mahalanobis_distances, 75)
threshold = p75 + 3 * (p75 - p25)
x_filtered = x_scaled[mahalanobis_distances < threshold]
y_filtered = y[mahalanobis_distances < threshold]

x_train, x_test, y_train, y_test = train_test_split(x_filtered, y_filtered, random_state=42)

In [3]:
knn = KNeighborsClassifier(1, p=1, n_jobs=-1).fit(x_train, y_train)
knn_train_fn = knn.score(x_train, y_train)
knn_test_fn = knn.score(x_test, y_test)
print('Score de entrenamiento: ')
print(knn_train_fn)
print('Score de Testeo: ')
print(knn_test_fn)

Score de entrenamiento: 
0.9991865509761388
Score de Testeo: 
0.9459129727531517


In [4]:
dump(knn, '../data/models/knn_937.joblib')

['../data/models/knn_937.joblib']

In [5]:
dump(scaler, '../data/models/knn_937_scaler.joblib')

['../data/models/knn_937_scaler.joblib']

In [None]:
# ToDo: Probar norma 1 para filtrado de outliers

In [18]:
svmc = svm.SVC(C=250, gamma=4).fit(x_train, y_train)
svmc_train_fn = svmc.score(x_train, y_train)
svmc_test_fn = svmc.score(x_test, y_test)
print('Score de entrenamiento: ')
print(svmc_train_fn)
print('Score de Testeo: ')
print(svmc_test_fn)

Score de entrenamiento: 
0.9991710417242332
Score de Testeo: 
0.129714048901782


In [56]:
svmc = svm.SVC(C=80, gamma=0.0295).fit(x_train, y_train)
svmc_train_fn = svmc.score(x_train, y_train)
svmc_test_fn = svmc.score(x_test, y_test)
print('Score de entrenamiento: ')
print(svmc_train_fn)
print('Score de Testeo: ')
print(svmc_test_fn)

Score de entrenamiento: 
0.9991710417242332
Score de Testeo: 
0.9365934521342727


In [60]:
etc = ExtraTreesClassifier(n_estimators=10000, n_jobs=-1).fit(x_train, y_train)
etc_train = etc.score(x_train, y_train)
etc_test = etc.score(x_test, y_test)
print('Score de entrenamiento: ')
print(etc_train)
print('Score de Testeo: ')
print(etc_test)

Score de entrenamiento: 
0.9991710417242332
Score de Testeo: 
0.904268545379196


In [63]:
rfc = RandomForestClassifier(n_estimators=1000, n_jobs=-1).fit(x_train, y_train)
rfc_train_fn = rfc.score(x_train, y_train)
rfc_test_fn = rfc.score(x_test, y_test)
print('Score de entrenamiento: ')
print(rfc_train_fn)
print('Score de Testeo: ')
print(rfc_test_fn)

Score de entrenamiento: 
0.9991710417242332
Score de Testeo: 
0.8736013261500207
