In [1]:
from ds4400_final_project.dataset.constants import DATASET_FOLDER
from pathlib import Path
from typing import Tuple, Dict
from sklearn import svm
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
import numpy as np

In [2]:
def load_data_from_file(csv_filename: str) -> Tuple[np.array, np.array, Dict[int, str], Dict[str, int]]:
	""" Load the CSV file from the dataset folder. """
	file = str(Path(DATASET_FOLDER) / csv_filename)
	features_list = np.genfromtxt(file, dtype=None, encoding=None, delimiter=",", skip_header=1, usecols=range(2, 60))
	features = np.array([list(x) for x in features_list])

	# Create a mapping between a numeric value and genre
	index_genre_map = {i: genre for i, genre in enumerate(np.unique(features[:,-1]))}
	genre_index_map = {value: key for key, value in index_genre_map.items()}

	# split the inputs and their labels
	x = normalize(features[:,:57])
	y = normalize(np.array([genre_index_map[genre] for genre in features[:,-1]]).reshape(-1, 1)).flatten()

	return x, y, index_genre_map, genre_index_map

In [3]:
# import the data from the 3 seconds and 30 seconds features CSV
X_3, y_3, index_genre_map_3, genre_index_map_3 = load_data_from_file("features_3_sec.csv")
X_30, y_30, index_genre_map_30, genre_index_map_30 = load_data_from_file("features_30_sec.csv")

In [4]:
# split all the data into training and testing sets
x_3_train, x_3_test, y_3_train, y_3_test = train_test_split(X_3, y_3, test_size=0.33, random_state=42)
x_30_train, x_30_test, y_30_train, y_30_test = train_test_split(X_30, y_30, test_size=0.33, random_state=42)

In [7]:
C = 0.1
kernel = 'sigmoid'
classifier_3 = svm.SVC(C=C, kernel=kernel)
classifier_30 = svm.SVC(C=C, kernel=kernel)
classifier_3.fit(x_3_train, y_3_train)
classifier_30.fit(x_30_train, y_30_train)

SVC(C=0.1, kernel='sigmoid')

In [8]:
train_3_acc = classifier_3.score(x_3_train, y_3_train)
test_3_acc = classifier_3.score(x_3_test, y_3_test)
train_30_acc = classifier_30.score(x_30_train, y_30_train)
test_30_acc = classifier_30.score(x_30_test, y_30_test)
print("Train accuracy, 3 sec: ", train_3_acc)
print("Test accuracy, 3 sec: ", test_3_acc)
print("Train accuracy, 30 sec: ", train_30_acc)
print("Test accuracy, 30 sec: ", test_30_acc)

Train accuracy, 3 sec:  0.9021365605856866
Test accuracy, 3 sec:  0.8953594176524113
Train accuracy, 30 sec:  0.9044776119402985
Test accuracy, 30 sec:  0.8909090909090909
