In [1]:
import pandas as pd
import numpy as np
import time
import pickle as pkl

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

from learn.preprocessing import MinMaxScaler
from learn.decomposition import PCA
from learn.classifier import DT, KNN, RandomForest
from learn.metrics import accuracy, confusion_matrix

In [2]:
df = pd.read_csv('./data/final_genres.csv') #import data dari final genres (hanya 4 genres)
print(df.shape) 
df.head()

(400, 91)


Unnamed: 0,chroma_stft_mean_1,chroma_stft_mean_2,chroma_stft_mean_3,chroma_stft_mean_4,chroma_stft_mean_5,chroma_stft_mean_6,chroma_stft_mean_7,chroma_stft_mean_8,chroma_stft_mean_9,chroma_stft_mean_10,...,mfcc_std_4,mfcc_std_5,mfcc_std_6,mfcc_std_7,mfcc_std_8,mfcc_std_9,mfcc_std_10,mfcc_std_11,mfcc_std_12,Target
0,0.07687,0.188126,0.245781,0.285505,0.586038,0.223212,0.478323,0.319455,0.206357,0.259557,...,10.449984,4.951737,5.491462,5.740409,5.737721,6.549459,8.040053,7.897353,5.728275,classical
1,0.437475,0.275345,0.334226,0.168873,0.285993,0.197081,0.425991,0.232011,0.075662,0.149319,...,8.07445,9.536843,9.428475,9.755775,6.885186,4.31446,5.736244,7.462625,6.262827,classical
2,0.140473,0.351503,0.399898,0.203435,0.36252,0.138056,0.228721,0.126469,0.260466,0.332882,...,7.539508,7.392303,9.626102,8.751103,7.530446,4.8909,10.118975,9.565813,9.361697,classical
3,0.277341,0.051879,0.022297,0.08276,0.607411,0.600071,0.102452,0.107664,0.103249,0.312747,...,13.475181,6.26689,7.107491,8.874654,8.108906,9.1868,9.688565,6.611019,9.762531,classical
4,0.430561,0.165279,0.354566,0.112191,0.154739,0.188823,0.15476,0.169602,0.143798,0.515868,...,5.895025,8.782835,5.336773,5.41345,4.457804,9.115865,7.333367,8.406437,10.167446,classical


In [3]:
X = df.drop(columns="Target") #semua data kecuali kolom target
y = df["Target"] #hanya colom target
print(X.shape, y.shape)

(400, 90) (400,)


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [5]:
dt_learn = DT(algo="c4.5")
dt_sklearn = DecisionTreeClassifier()

t_learn_start = time.time() 
dt_learn.fit(X_train, y_train)
t_learn = time.time() - t_learn_start

t_sklearn_start = time.time()
dt_sklearn.fit(X_train, y_train)
t_sklearn = time.time() - t_sklearn_start

y_pred_learn = dt_learn.predict(X_test)
y_pred_sklearn = dt_sklearn.predict(X_test)

acc_learn = round(accuracy(y_test, y_pred_learn) * 100, 3)
acc_sklearn = round(accuracy(y_test, y_pred_sklearn) * 100, 3)

print("Accuracy using created package (learn)\t: {0}%, ({1} seconds)".format(acc_learn, t_learn))
print("Accuracy using sklearn package\t\t: {0}%, ({1} seconds)".format(acc_sklearn, t_sklearn))
matrix, label = confusion_matrix(y_test, y_pred_learn, show_label=True)
print(matrix)
print(label)

Accuracy using created package (learn)	: 74.167%, (7.806397914886475 seconds)
Accuracy using sklearn package		: 76.667%, (0.04507327079772949 seconds)
[[30.  6.  1.  0.]
 [ 0. 16.  5.  4.]
 [ 1.  3. 24.  2.]
 [ 0.  2.  7. 19.]]
['classical', 'country', 'hiphop', 'metal']


In [6]:
dt_learn = DT(algo="cart")
dt_sklearn = DecisionTreeClassifier()

t_learn_start = time.time() 
dt_learn.fit(X_train, y_train)
t_learn = time.time() - t_learn_start

t_sklearn_start = time.time()
dt_sklearn.fit(X_train, y_train)
t_sklearn = time.time() - t_sklearn_start

y_pred_learn = dt_learn.predict(X_test)
y_pred_sklearn = dt_sklearn.predict(X_test)

acc_learn = round(accuracy(y_test, y_pred_learn) * 100, 3)
acc_sklearn = round(accuracy(y_test, y_pred_sklearn) * 100, 3)

print("Accuracy using created package (learn)\t: {0}%, ({1} seconds)".format(acc_learn, t_learn))
print("Accuracy using sklearn package\t\t: {0}%, ({1} seconds)".format(acc_sklearn, t_sklearn))
matrix, label = confusion_matrix(y_test, y_pred_learn, show_label=True)
print(matrix)
print(label)

Accuracy using created package (learn)	: 84.167%, (0.8595638275146484 seconds)
Accuracy using sklearn package		: 75.0%, (0.04054403305053711 seconds)
[[37.  0.  0.  0.]
 [ 3. 19.  2.  1.]
 [ 0.  5. 21.  4.]
 [ 0.  1.  3. 24.]]
['classical', 'country', 'hiphop', 'metal']


In [7]:
knn_learn = KNN(k=4)
knn_sklearn = KNeighborsClassifier(n_neighbors=4)

scaler = MinMaxScaler()
X_train_dtm = scaler.fit_transform(X_train)
X_test_dtm = scaler.transform(X_test)

pca = PCA(n_components=3)
X_train_dtm = pca.fit_transform(X_train_dtm)
X_test_dtm = pca.transform(X_test_dtm)

t_learn_start = time.time() 
knn_learn.fit(X_train_dtm, y_train)
t_learn = time.time() - t_learn_start

t_sklearn_start = time.time()
knn_sklearn.fit(X_train_dtm, y_train)
t_sklearn = time.time() - t_sklearn_start

y_pred_learn = knn_learn.predict(X_test_dtm)
y_pred_sklearn = knn_sklearn.predict(X_test_dtm)

acc_learn = round(accuracy(y_test, y_pred_learn) * 100, 3)
acc_sklearn = round(accuracy(y_test, y_pred_sklearn) * 100, 3)

print("Accuracy using created package (learn)\t: {0}%, ({1} seconds)".format(acc_learn, t_learn))
print("Accuracy using sklearn package\t\t: {0}%, ({1} seconds)".format(acc_sklearn, t_sklearn))
matrix, label = confusion_matrix(y_test, y_pred_learn, show_label=True)
print(matrix)
print(label)

Accuracy using created package (learn)	: 87.5%, (0.0 seconds)
Accuracy using sklearn package		: 85.0%, (0.008633852005004883 seconds)
[[34.  3.  0.  0.]
 [ 1. 22.  1.  1.]
 [ 0.  1. 24.  5.]
 [ 0.  1.  2. 25.]]
['classical', 'country', 'hiphop', 'metal']


In [8]:
rf_learn = RandomForest(n_estimators=100)
rf_sklearn = RandomForestClassifier(n_estimators=100)

t_learn_start = time.time() 
rf_learn.fit(X_train, y_train)
t_learn = time.time() - t_learn_start

t_sklearn_start = time.time()
rf_sklearn.fit(X_train, y_train)
t_sklearn = time.time() - t_sklearn_start

y_pred_learn = rf_learn.predict(X_test)
y_pred_sklearn = rf_sklearn.predict(X_test)

acc_learn = round(accuracy(y_test, y_pred_learn) * 100, 3)
acc_sklearn = round(accuracy(y_test, y_pred_sklearn) * 100, 3)

print("Accuracy using created package (learn)\t: {0}%, ({1} seconds)".format(acc_learn, t_learn))
print("Accuracy using sklearn package\t\t: {0}%, ({1} seconds)".format(acc_sklearn, t_sklearn))
matrix, label = confusion_matrix(y_test, y_pred_learn, show_label=True)
print(matrix)
print(label)

Accuracy using created package (learn)	: 88.333%, (6.454908609390259 seconds)
Accuracy using sklearn package		: 88.333%, (0.5375247001647949 seconds)
[[34.  3.  0.  0.]
 [ 0. 20.  3.  2.]
 [ 0.  2. 27.  1.]
 [ 0.  0.  3. 25.]]
['classical', 'country', 'hiphop', 'metal']


In [9]:
pkl.dump(rf_learn, open("model.pkl", "wb"))