In [126]:
import pandas as pd
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC, SVC
from sklearn.multiclass import OneVsRestClassifier
import numpy as np
import math
import librosa

def accuracy_estimate(genres, y_test):
    # quick estimate
    # lower is better
    m = y_test.shape[0]
    cost = sum((genres!=y_test))/m
    return cost

def take_m_rows_every_n(data, m, n):
    # returns 2 1d or 2d matrices 
    # first one contains the first m out of every n datapoints
    # second one contains the remainingi n-m out of every n datapoints
    # bound to be a better way to do this... some kind of pandas mask?
    size = data.shape[0]
    train_size = math.ceil(size * m / n)
    test_size = math.floor(size * (n-m) / n)
    
    if len(data.shape) > 1:
        data_first = np.empty((train_size, data.shape[1])) # if not exact integer will make a mess
        data_second = np.empty((test_size, data.shape[1]))
    else:
        data_first = np.empty((train_size)) # if not exact integer will make a mess
        data_second = np.empty((test_size))
    i=0
    j=0
    for idx, row in enumerate(data):
        if idx%n < m:
            data_first[i] = row
            i+=1
        else:
            data_second[j] = row
            j+=1
    return data_first, data_second



def read_csv_extract_features(name):
    df = pd.read_csv (name)
    # print (df)

    x = df.to_numpy()[:,2:-1] # all rows; don't care about the filename, length, label
    # print(X)
    print(f"x shape:{x.shape}")

    y_as_word = df.to_numpy()[:,-1].T  # label only
    # print(Y)
    print(f"y shape:{y_as_word.shape}")

    classes_dict = {
        "blues" : 0,
        "classical" : 1,
        "country" : 2,
        "disco" : 3,
        "hiphop" : 4,
        "jazz" : 5,
        "metal" : 6,
        "pop" : 7,
        "reggae" : 8,
        "rock" : 9,
    }

    y = np.array([classes_dict[genre] for genre in y_as_word])
    # print(y)

    x_train, x_test = take_m_rows_every_n(x, 90, 100)
    y_train, y_test = take_m_rows_every_n(y, 90, 100)
                                   
    print(f"x train shape:{x_train.shape}")
    print(f"y train shape:{y_train.shape}")   
    print(f"x test shape:{x_test.shape}")
    print(f"y test shape:{y_test.shape}") 
    
    return x_train, y_train, x_test, y_test


if __name__ == "__main__":
    x_train, y_train, x_test, y_test = read_csv_extract_features(r'features_30_sec.csv')

x shape:(1000, 57)
y shape:(1000,)
x train shape:(900, 57)
y train shape:(900,)
x test shape:(100, 57)
y test shape:(100,)


In [145]:
    # linear kernel, from what I read appears to be somewhat optimised
    
    clf = make_pipeline(StandardScaler(),
                        LinearSVC(random_state=0, tol=1e-5, dual=False, C=0.01))
    clf.fit(x_train, y_train)
    
    results = clf.decision_function(x_test)
    genres = results.argmax(axis=1)
    print(genres)    
    print(accuracy_estimate(genres, y_test))
    
    #overfitting..again
    results = clf.decision_function(x_train)
    genres = results.argmax(axis=1)
    print(accuracy_estimate(genres, y_train))

[0 2 0 1 0 0 2 2 6 5 1 1 1 1 1 1 1 1 1 1 2 2 6 2 2 2 2 2 2 0 6 3 6 6 3 6 6
 3 3 3 3 6 4 6 8 6 4 4 4 4 5 1 5 5 5 5 5 5 5 5 5 6 0 6 6 6 6 6 6 6 7 7 7 7
 7 7 7 7 7 7 8 8 8 2 2 7 8 5 8 6 5 6 6 9 6 2 6 6 2 6]
0.35
0.24


In [158]:
    # gaussian, ovr by default
    # gamma C optimisation seems to be beyond the scope;
    # essentialy useless without covariance samples
    clf = make_pipeline(StandardScaler(),
                        SVC(kernel="rbf", C=1, gamma=1/30))
    clf.fit(x_train, y_train)
    
    results = clf.decision_function(x_test)
    genres = results.argmax(axis=1)
    print(genres)    
    print(accuracy_estimate(genres, y_test))
    
    
    results = clf.decision_function(x_train)
    genres = results.argmax(axis=1)
    print(accuracy_estimate(genres, y_train))
    # seems to be overfitting

[0 2 0 5 0 0 0 2 3 0 1 1 1 1 1 1 1 1 1 5 2 2 6 2 2 2 0 2 2 0 3 3 3 3 9 3 3
 3 3 3 4 4 4 4 8 6 0 4 4 4 5 5 5 5 5 5 5 5 5 5 9 9 9 6 6 6 6 6 3 6 7 7 7 7
 7 7 7 7 7 7 4 8 8 8 9 9 8 5 8 6 4 6 9 9 6 9 9 9 9 9]
0.24
0.05444444444444444


In [79]:
    # librosa install is broken on 3.10 atm :/
    rootdir = 'Data\\genres_original\\'

    import os
    j = 0
    lst = []
    for subdir, dirs, files in os.walk(rootdir):
        for file in files:
            try:
                y, sr = librosa.load(subdir+"\\" +file)
            except:
                print(f"failed extraction on {file}")
                continue
            feat = np.mean(librosa.feature.mfcc(y=y, sr=sr), axis=1)
            row = [file, y.size] + feat.tolist() + [file.split(".")[0]] # make the same format as previous csv file
            lst.append(row)

    df = pd.DataFrame(lst)
    df.to_csv('20melfeatures.csv', index = False)



failed extraction on jazz.00054.wav


In [111]:
    x_train, y_train, x_test, y_test = read_csv_extract_features(r'20melfeatures.csv')

x shape:(999, 20)
y shape:(999,)
x train shape:(900, 20)
y train shape:(900,)
x test shape:(99, 20)
y test shape:(99,)


In [115]:
    clf = make_pipeline(StandardScaler(),
                        SVC(kernel="rbf", C=1))
    clf.fit(x_train, y_train)
    
    results = clf.decision_function(x_test)
    genres = results.argmax(axis=1)
    print(genres)
    print(accuracy_estimate(genres, y_test))
    
        
    results = clf.decision_function(x_train)
    genres = results.argmax(axis=1)
    print(accuracy_estimate(genres, y_train))
    # again overfitting

[0 2 0 0 2 9 2 0 6 2 5 8 1 1 1 1 1 5 1 5 2 2 6 2 0 0 2 0 2 9 4 9 4 3 3 4 4
 3 9 9 4 4 4 6 4 6 9 3 4 9 8 5 5 5 5 5 5 8 5 4 9 9 6 6 6 6 6 6 6 7 7 7 7 7
 7 7 7 7 7 3 8 8 7 2 2 8 2 8 4 9 6 9 9 6 9 9 6 2 0]
0.43434343434343436
0.23777777777777778
