In [62]:
import copy
from pathlib import Path

import scipy.io.wavfile as wav
import numpy as np
from sklearn import preprocessing

from python_speech_features import mfcc
from python_speech_features import delta

import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier

In [75]:
genres = {'reggae': 0, 'metal': 1, 'blues': 2, 'country': 3,
         'classical': 4, 'pop': 5, 'jazz': 6, 'rock': 7,
         'hiphop': 8, 'disco': 9}
#Genres
# 0: reggae
# 1: metal
# 2: blues
# 3: country
# 4: classical
# 5: pop
# 6: jazz
# 7: rock
# 8: hiphop
# 9: disco

def_rate = 22050

def readSongs(duration):
    pathlist = Path('../genres').glob('**/*.wav')
    
    song_sigs = {'reggae': [], 'metal': [], 'blues': [], 'country': [],
         'classical': [], 'pop': [], 'jazz': [], 'rock': [],
         'hiphop': [], 'disco': []}
    
    train_sigs = copy.deepcopy(song_sigs)
    test_sigs = copy.deepcopy(song_sigs)
    
    for path in pathlist:
        p = str(path)
        genre = p.split('/')[2]
        (rate, sig) = wav.read(p)
        sig = sig[:rate*duration]
        song_sigs[genre].append(sig)
    
    for key in song_sigs:
        #################
        #100-70-70 breyting
        perm = np.random.permutation(100)
        train = perm[:70]
        test = perm[70:]
        ###############3
        
        train_sigs[key] = np.array(song_sigs[key])[train]
        test_sigs[key] = np.array(song_sigs[key])[test]
    return train_sigs, test_sigs

def extractFeatures(signals, sample_len):
    features = []
    labels = []
    for key in signals:
        print(key)
        for sig in signals[key]:
            no_samples = int((len(sig)/def_rate)/sample_len)
            part_len = def_rate*sample_len
            for i in range(no_samples):
                part = sig[i*part_len:(i+1)*part_len]
                mfcc_feat = mfcc(part, def_rate, nfft=551)
                d_mfcc_feat = delta(mfcc_feat, 2)
                dd_mfcc_feat = delta(d_mfcc_feat, 2)
                
                #Cast to single vector
                sample = np.hstack((np.hstack((mfcc_feat.flatten(), d_mfcc_feat.flatten())), dd_mfcc_feat.flatten()))
                features.append(sample)
                labels.append(genres[key])
                
    return features, labels

def getData(duration, sample_len):
    train_signal, test_signal = readSongs(duration)
    
    train_feat, train_label = extractFeatures(train_signal, sample_len)
    test_feat, test_label = extractFeatures(test_signal, sample_len)
    
    scaler = preprocessing.StandardScaler().fit(train_feat)
    train_feat = scaler.transform(train_feat)
    test_feat = scaler.transform(test_feat)
    
    return train_feat, train_label, test_feat, test_label

trf_21_3, trl_21_3, tef_21_3, tel_21_3 = getData(21, 3)
    
# train_signal, test_signal = readSongs(28)

# train_feat, train_label = extractFeatures(train_signal, 28)
# test_feat, test_label = extractFeatures(test_signal, 28)

# scaler = preprocessing.StandardScaler().fit(train_feat)
# train_feat = scaler.transform(train_feat)
# test_feat = scaler.transform(test_feat)

reggae
metal
blues
country
classical
pop
jazz
rock
hiphop
disco
reggae
metal
blues
country
classical
pop
jazz
rock
hiphop
disco


In [None]:
rf = RandomForestClassifier(n_jobs=-1, n_estimators=1000, max_features='sqrt', oob_score=True)
rf.fit(trf_21_3, trl_21_3)
print("RF Out-of-bag error rate: ", 1-rf.oob_score_)

rf.score(tef_21_3, tel_21_3)