In [138]:
import copy
from pathlib import Path

import scipy.io.wavfile as wav
import numpy as np
from sklearn import preprocessing

from python_speech_features import mfcc
from python_speech_features import delta

import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import precision_score


In [151]:
genres = {'reggae': 0, 'metal': 1, 'blues': 2, 'country': 3,
         'classical': 4, 'pop': 5, 'jazz': 6, 'rock': 7,
         'hiphop': 8, 'disco': 9}
#Genres
# 0: reggae
# 1: metal
# 2: blues
# 3: country
# 4: classical
# 5: pop
# 6: jazz
# 7: rock
# 8: hiphop
# 9: disco

def_rate = 22050

def readSongs(duration):
    pathlist = Path('../genres').glob('**/*.wav')
    
    song_sigs = {'reggae': [], 'metal': [], 'blues': [], 'country': [],
         'classical': [], 'pop': [], 'jazz': [], 'rock': [],
         'hiphop': [], 'disco': []}
    
    train_sigs = copy.deepcopy(song_sigs)
    test_sigs = copy.deepcopy(song_sigs)
    
    for path in pathlist:
        p = str(path)
        genre = p.split('/')[2]
        (rate, sig) = wav.read(p)
        sig = sig[:rate*duration]
        song_sigs[genre].append(sig)
    
    for key in song_sigs:
        #################
        #100-70-70 breyting
        perm = np.random.permutation(100)
        train = perm[:70]
        test = perm[70:]
        ###############3
        
        train_sigs[key] = np.array(song_sigs[key])[train]
        test_sigs[key] = np.array(song_sigs[key])[test]
    return train_sigs, test_sigs

def extractFeatures(signals, sample_len):
    features = []
    labels = []
    for key in signals:
        print(key)
        for sig in signals[key]:
            no_samples = int((len(sig)/def_rate)/sample_len)
            part_len = def_rate*sample_len
            for i in range(no_samples):
                part = sig[i*part_len:(i+1)*part_len]
                mfcc_feat = mfcc(part, def_rate, nfft=551)
                d_mfcc_feat = delta(mfcc_feat, 2)
                dd_mfcc_feat = delta(d_mfcc_feat, 2)
                
                #Cast to single vector
                sample = np.hstack((np.hstack((mfcc_feat.flatten(), d_mfcc_feat.flatten())), dd_mfcc_feat.flatten()))
                features.append(sample)
                labels.append(genres[key])
                
    return features, labels

def getData(duration, sample_len):
    train_signal, test_signal = readSongs(duration)
    
    train_feat, train_label = extractFeatures(train_signal, sample_len)
    test_feat, test_label = extractFeatures(test_signal, sample_len)
    
    scaler = preprocessing.StandardScaler().fit(train_feat)
    train_feat = scaler.transform(train_feat)
    test_feat = scaler.transform(test_feat)
    
    return train_feat, train_label, test_feat, test_label

def createForest(duration, sample_len):
    trf, trl, tef, tel = getData(duration, sample_len)
    
    rf = RandomForestClassifier(n_jobs=-1, n_estimators=100, max_features='sqrt', oob_score=True)
    rf.fit(trf, trl)
    
    return rf, tef, tel

def songWeightedPred(rf, feat, label, no_samples):
    truth = []
    pred = []
    for i in range(int(feat.shape[0]/no_samples)):
        if i%10 == 0:
            print(i)
        truth.append(label[i*no_samples])
        
        pr = rf.predict_proba(feat[i*no_samples:(i+1)*no_samples,:])
        pr = np.sum(pr, axis=0)/no_samples
        pred.append(np.argmax(pr))
    return truth, pred

def testModel(duration, sample_len):
    rf, tef, tel = createForest(duration, sample_len)
    
    tar_names = list(genres.keys())
    pred = rf.predict(tef)
    print(confusion_matrix(tel, pred))
    print(classification_report(tel, pred, target_names=tar_names))
          
    tr, pr = songWeightedPred(rf, tef, tel, int(duration/sample_len))
    print(np.where(tr==np.array(pr))[0].shape[0]/300)

    print(confusion_matrix(tr, pr))
    print(classification_report(tr, pr, target_names=tar_names))


print('=============== 28/1 =================')
testModel(28, 1)
print('\n\n\n\n')
print('=============== 28/2 =================')
testModel(28, 2)
print('\n\n\n\n')
print('=============== 27/3 =================')
testModel(27, 3)
print('\n\n\n\n')
print('=============== 25/5 =================')
testModel(25, 5)
print('\n\n\n\n')
print('=============== 20/10 =================')
testModel(20, 10)
print('\n\n\n\n')
print('=============== 28/14 =================')
testModel(28, 14)
print('\n\n\n\n')
print('=============== 28/28 =================')
testModel(28, 28)


reggae
metal
blues
country
classical
pop
jazz
rock
hiphop
disco
reggae
metal
blues
country
classical
pop
jazz
rock
hiphop
disco
[[363  15  44  78   4  89  36  53 111  47]
 [ 31 637  31   8   0   7   8  69  20  29]
 [ 25 142 349  76   4   0  58 128  17  41]
 [ 48  25 100 227   8 138  86 112  14  82]
 [  2   0   2  52 693   0  57  29   0   5]
 [ 33   1   0  24   6 620  43   8  68  37]
 [ 22  16  46  96  76  70 442  36   5  31]
 [ 55 125  82  96  14  62  70 194  32 110]
 [ 80 114  26  14  12 115  16  68 307  88]
 [ 60 104  52  40  21 153  20 126  67 197]]
             precision    recall  f1-score   support

     reggae       0.50      0.43      0.47       840
      metal       0.54      0.76      0.63       840
      blues       0.48      0.42      0.44       840
    country       0.32      0.27      0.29       840
  classical       0.83      0.82      0.83       840
        pop       0.49      0.74      0.59       840
       jazz       0.53      0.53      0.53       840
       rock     

10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
0.49
[[10  1  3  4  0  3  4  1  2  2]
 [ 1 26  0  0  0  0  0  1  1  1]
 [ 0  9 12  2  0  0  2  3  0  2]
 [ 1  1  4  8  0  2  7  6  0  1]
 [ 0  0  0  0 28  0  1  1  0  0]
 [ 0  0  0  4  0 22  1  0  1  2]
 [ 1  0  0  2  2  5 18  1  1  0]
 [ 1  3  4  5  0  1  3  9  0  4]
 [ 2  6  0  0  0  7  0  2  8  5]
 [ 2  3  0  3  0  6  1  6  3  6]]
             precision    recall  f1-score   support

     reggae       0.56      0.33      0.42        30
      metal       0.53      0.87      0.66        30
      blues       0.52      0.40      0.45        30
    country       0.29      0.27      0.28        30
  classical       0.93      0.93      0.93        30
        pop       0.48      0.73      0.58        30
       jazz       0.49      0.60      0.54        30
       rock       0.30      0.30      0.30        30
     hiphop       0.50      0.27      0.35        30
      disco       0.26    