In [1]:
import copy
from pathlib import Path

import scipy.io.wavfile as wav
import numpy as np
from sklearn import preprocessing

from python_speech_features import mfcc
from python_speech_features import delta

import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import precision_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import precision_recall_fscore_support

In [None]:
genres = {'reggae': 0, 'metal': 1, 'blues': 2, 'country': 3,
         'classical': 4, 'pop': 5, 'jazz': 6, 'rock': 7,
         'hiphop': 8, 'disco': 9}

#Genres
# 0: reggae
# 1: metal
# 2: blues
# 3: country
# 4: classical
# 5: pop
# 6: jazz
# 7: rock
# 8: hiphop
# 9: disco

def_rate = 22050

def readSongs(duration):
    pathlist = Path('../genres').glob('**/*.wav')
    
    song_sigs = {'reggae': [], 'metal': [], 'blues': [], 'country': [],
         'classical': [], 'pop': [], 'jazz': [], 'rock': [],
         'hiphop': [], 'disco': []}
    
    train_sigs = []
    train_labels = []
    test_sigs = []
    test_labels = []
    
    for path in pathlist:
        p = str(path)
        genre = p.split('/')[2]
        (rate, sig) = wav.read(p)
        sig = sig[:rate*duration]
        song_sigs[genre].append(sig)
    
    for key in song_sigs:
        perm = np.random.permutation(100)
        train = perm[:70]
        test = perm[70:]
        
        train_sigs.extend(np.array(song_sigs[key])[train])
        train_labels.extend(np.repeat(genres[key], 70))
        
        test_sigs.extend(np.array(song_sigs[key])[test])
        test_labels.extend(np.repeat(genres[key], 30))
        
    return train_sigs, train_labels, test_sigs, test_labels

def extractFeatures(signals, labels, sample_len):
    features = []
    f_labels = []
    no_samples = int((len(signals[0])/def_rate)/sample_len)
    part_len = int(def_rate*sample_len)
    for i in range(len(signals)):
        lab = labels[i]
        sig = signals[i]
        
        for j in range(no_samples):
            part = sig[j*part_len:(j+1)*part_len]

            mfcc_feat = mfcc(part, def_rate, nfft=551)
            d_mfcc_feat = delta(mfcc_feat, 2)
            dd_mfcc_feat = delta(d_mfcc_feat, 2)
            #Cast to single vector
            sample = np.hstack((np.hstack((mfcc_feat.flatten(), d_mfcc_feat.flatten())), dd_mfcc_feat.flatten()))
            features.append(sample)
            f_labels.append(lab)
            
    return features, f_labels

def createForest(feat, labels):
    rf = RandomForestClassifier(n_jobs=-1, n_estimators=50, max_features='sqrt')
    rf.fit(feat, labels)
    
    return rf

def getData(signals, labels, duration, sample_len):
    feat, f_labels = extractFeatures(signals, labels, sample_len)
    
    scaler = preprocessing.StandardScaler().fit(feat)
    feat = scaler.transform(feat)
    
    return np.array(feat), np.array(f_labels)

def validateModel(signals, labels, duration, sample_len):
    skf = StratifiedKFold(n_splits=7, shuffle=True)
    
    print('start features')
    trf, trl = getData(signals, labels, duration, sample_len)
    print('end features')
    
    no_samples = int(duration/sample_len)
    list_len = np.arange(no_samples)
    indices = np.arange(len(signals)) * no_samples
    t = np.array(list(map(lambda x: x + list_len, indices)))
    
    for tr_idx, te_idx in skf.split(signals, labels):
        new_tridx = t[tr_idx].flatten()
        new_teidx = t[te_idx].flatten()
        
        print('start forest')
        rf = createForest(trf[new_tridx], trl[new_tridx])
        print('end forest')
        pred = rf.predict(trf[new_teidx])
        print(precision_recall_fscore_support(trl[te_idx], pred))
        
        
trs, trl, tes, tel = readSongs(28)

validateModel(trs, trl, 28, 14)

In [None]:
skf = StratifiedKFold(n_splits=7, shuffle=True)

# for train_index, test_index in skf.split(trl, trl):
#     print("TRAIN:", len(train_index), "TEST:", test_index)