In [9]:
import sys
sys.path.append('../')
from ds4400_final_project.dataset.constants import DATASET_FOLDER
from pathlib import Path
from typing import Tuple, Dict
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
import numpy as np

In [10]:
def load_data_from_file(csv_filename: str) -> Tuple[np.array, np.array, Dict[int, str], Dict[str, int]]:
	""" Load the CSV file from the dataset folder. """
	file = str(Path(DATASET_FOLDER) / csv_filename)
	features_list = np.genfromtxt(file, dtype=None, encoding=None, delimiter=",", skip_header=1, usecols=range(2, 60))
	features = np.array([list(x) for x in features_list])

	# Create a mapping between a numeric value and genre
	index_genre_map = {i: genre for i, genre in enumerate(np.unique(features[:,-1]))}
	genre_index_map = {value: key for key, value in index_genre_map.items()}

	# split the inputs and their labels
	x = features[:,:57]
	y = np.array([genre_index_map[genre] for genre in features[:,-1]])

	return x, y, index_genre_map, genre_index_map

In [20]:
# import the data from the 3 seconds features CSV
X, y, index_genre_map, genre_index_map = load_data_from_file("features_3_sec.csv")

In [21]:
X = normalize(X, axis=0)

In [22]:
X

array([[ 0.00860081,  0.01066405,  0.00883209, ...,  0.00487959,
        -0.00045597,  0.00593612],
       [ 0.00879721,  0.01008993,  0.00763291, ...,  0.0140838 ,
         0.0108522 ,  0.0081292 ],
       [ 0.00889336,  0.01080397,  0.00894034, ...,  0.00770021,
         0.00472316,  0.00448956],
       ...,
       [ 0.00891044,  0.01042643,  0.00354915, ...,  0.00705162,
        -0.00056201,  0.00563983],
       [ 0.00993735,  0.00993403,  0.00449917, ...,  0.00360599,
         0.001268  ,  0.00173422],
       [ 0.00946976,  0.01016163,  0.0034219 , ...,  0.00573129,
        -0.00640268,  0.00430273]])

In [23]:
# split all the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [24]:
classifier = svm.SVC()
classifier.fit(X, y)

SVC()

In [25]:
classifier.score(X, y)
# classifier.score(x_train, y_train)
# classifier.score(x_test, y_test)

0.8871871871871871