In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from learn.classifier import DT, KNN, RandomForest
from learn.metrics import accuracy, confusion_matrix

In [2]:
df = pd.read_csv('./data/final_genres.csv')
print(df.shape)
df.head()

(500, 51)


Unnamed: 0,Target,chroma_stft_mean_1,chroma_stft_mean_10,chroma_stft_mean_11,chroma_stft_mean_12,chroma_stft_mean_2,chroma_stft_mean_3,chroma_stft_mean_4,chroma_stft_mean_5,chroma_stft_mean_6,...,mfcc_std_2,mfcc_std_3,mfcc_std_4,mfcc_std_5,mfcc_std_6,mfcc_std_7,mfcc_std_8,mfcc_std_9,rmse_mean_1,rmse_std_1
0,classical,0.124398,0.321564,0.146449,0.31265,0.297825,0.411158,0.20606,0.34024,0.159909,...,13.754662,15.204769,13.0022,8.452141,8.637547,10.714485,8.11754,8.694667,0.036255,0.01486
1,classical,0.163998,0.176087,0.217041,0.4684,0.240299,0.388455,0.153389,0.147619,0.1182,...,18.254653,13.690039,12.309498,11.403011,11.812307,11.098268,9.0671,8.759368,0.030628,0.012692
2,classical,0.197351,0.28669,0.178152,0.460257,0.423289,0.361857,0.23214,0.392983,0.146053,...,14.050104,12.320599,11.65873,7.176022,9.036495,8.918298,7.712881,7.130923,0.043869,0.01388
3,classical,0.334221,0.200719,0.167926,0.202627,0.139895,0.348492,0.253649,0.260229,0.266748,...,17.603713,21.511297,11.291489,11.453227,10.807599,8.537981,10.013286,9.646251,0.02945,0.018888
4,classical,0.31924,0.309454,0.182653,0.315585,0.141492,0.248884,0.128845,0.352041,0.232046,...,14.853837,11.946413,8.000764,8.721288,7.653882,7.51565,10.428358,9.820378,0.0423,0.014464


In [3]:
def get_ten_fold_cv(df, label):
    k = 10
    m = len(df)
    t = m // k
    validations = []
    for i in range(10):
        df = df.sample(frac=1).reset_index(drop=True)
        train_indexes = [_ for _ in range(t*i)] + [_ for _ in range(t*(i+1), m)]
        test_indexes = [_ for _ in range(t*i, t*(i+1))]
        X = df.drop(columns=label)
        y = df[label]
        X_train = X.iloc[train_indexes, :]
        X_test = X.iloc[test_indexes, :]
        y_train = y.iloc[train_indexes]
        y_test = y.iloc[test_indexes]
        validations.append((X_train, X_test, y_train, y_test))
    return validations

In [4]:
def mean_accuracy(clf_name, df, label):
    clfs = {
        'dt': DT(), 
        'dtt': DecisionTreeClassifier(),
        'knn': KNN(k=5),
        'knnn': KNeighborsClassifier(n_neighbors=5),
        'rf': RandomForest(n_estimators=100), 
        'rff':RandomForestClassifier(n_estimators=100)
    }
    acc = 0
    X = df.drop(columns=label)
    y = df[label]
    k = 10
    for i in range(k):
        clf = clfs[clf_name]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
        clf.fit(X_train, y_train)
        acc += accuracy(y_test, clf.predict(X_test))
    acc = acc/k * 100
    return ("Accuracy : {0}%".format(round(acc, 3)))

In [5]:
mean_accuracy('dt', df, 'Target')

'Accuracy : 71.667%'

In [6]:
mean_accuracy('dtt', df, 'Target')

'Accuracy : 71.4%'

In [7]:
mean_accuracy('knn', df, 'Target')

'Accuracy : 79.333%'

In [8]:
mean_accuracy('knnn', df, 'Target')

'Accuracy : 79.333%'

In [9]:
mean_accuracy('rf', df, 'Target')

'Accuracy : 55.933%'

In [10]:
mean_accuracy('rff', df, 'Target')

'Accuracy : 90.933%'