In [1]:
import tensorflow as tf
import sklearn as skl
import librosa as lr
import numpy as np
import logging
import os

In [2]:
logging.warnings.filterwarnings('ignore')

### Genre Selection:

In [3]:
genres = {
    'blues': 0,
    'classical': 1,
    'country': 2,
    'disco': 3,
    'hiphop': 4,
    'jazz': 5,
    'metal': 6,
    'pop': 7,
    'reggae': 8,
    'rock': 9
}

### Hyper Parameters:

In [4]:
hparams = {
    'samplerate': 22050,
    'seq_length': 256,
    'hop_length': 512,
    'fft_window': 2048,
    'num_classes': len(genres)
}

### Extract Features:

In [5]:
def extract_features(filepath, hparams):
    
    audio_data_array = lr.load(filepath, sr=hparams['samplerate'], mono=True)
    
    features = [
        
        # MFCC Features:
        lr.feature.mfcc(
            y          = audio_data_array[0],
            sr         = hparams['samplerate'],
            hop_length = hparams['hop_length'],
            n_fft      = hparams['fft_window']
        ),

        # Chroma STFT:
        lr.feature.chroma_stft(
            y          = audio_data_array[0],
            sr         = hparams['samplerate'],
            hop_length = hparams['hop_length'],
            n_fft      = hparams['fft_window']
        ),
        
        # Spectral Centroid:
        lr.feature.spectral_centroid(
            y          = audio_data_array[0],
            sr         = hparams['samplerate'],
            hop_length = hparams['hop_length'],
            n_fft      = hparams['fft_window']
        ),
        
        # Spectral Contrast:
        lr.feature.spectral_contrast(
            y          = audio_data_array[0],
            sr         = hparams['samplerate'],
            hop_length = hparams['hop_length'],
            n_fft      = hparams['fft_window']
        )
    ]
    
    # features[0] = skl.preprocessing.scale(features[0], axis=1)
    # features[1] = skl.preprocessing.scale(features[1], axis=1)
    # features[2] = skl.preprocessing.minmax_scale(features[2], axis=0)
    # features[3] = skl.preprocessing.minmax_scale(features[3], axis=1)
    
    return np.vstack(features)

### Creating Dataset:

In [6]:
train_x = np.zeros(shape=(5 * 70 * hparams['num_classes'], 40, hparams['seq_length']))
val_x   = np.zeros(shape=(5 * 20 * hparams['num_classes'], 40, hparams['seq_length']))
test_x  = np.zeros(shape=(5 * 10 * hparams['num_classes'], 40, hparams['seq_length']))

##### Generate Train Dataset if not exist:

In [7]:
if not os.path.exists('./prepared/train_x.npy'):
    train_counter = 0
    for genre in genres.keys():
        m = hparams['seq_length']
        train_filenames = sorted(os.listdir('./dataset/' + genre))[0:70]
        for i in range(70):
            train_example = extract_features(
                './dataset/' + genre + '/' + train_filenames[i], 
                hparams
            )
            for j in range(5):
                train_x[train_counter, :] = train_example[:, (j*m):(j+1)*m]
                train_counter += 1
            print('Train Files Loaded:', train_counter // 5)

    if 'prepared' not in os.listdir('.'):
        os.mkdir('prepared')
    np.save('./prepared/train_x.npy', train_x, allow_pickle=False)
else:
    train_x = np.load('./prepared/train_x.npy', allow_pickle=False)

Train Files Loaded: 1
Train Files Loaded: 2
Train Files Loaded: 3
Train Files Loaded: 4
Train Files Loaded: 5
Train Files Loaded: 6
Train Files Loaded: 7
Train Files Loaded: 8
Train Files Loaded: 9
Train Files Loaded: 10
Train Files Loaded: 11
Train Files Loaded: 12
Train Files Loaded: 13
Train Files Loaded: 14
Train Files Loaded: 15
Train Files Loaded: 16
Train Files Loaded: 17
Train Files Loaded: 18
Train Files Loaded: 19
Train Files Loaded: 20
Train Files Loaded: 21
Train Files Loaded: 22
Train Files Loaded: 23
Train Files Loaded: 24
Train Files Loaded: 25
Train Files Loaded: 26
Train Files Loaded: 27
Train Files Loaded: 28
Train Files Loaded: 29
Train Files Loaded: 30
Train Files Loaded: 31
Train Files Loaded: 32
Train Files Loaded: 33
Train Files Loaded: 34
Train Files Loaded: 35
Train Files Loaded: 36
Train Files Loaded: 37
Train Files Loaded: 38
Train Files Loaded: 39
Train Files Loaded: 40
Train Files Loaded: 41
Train Files Loaded: 42
Train Files Loaded: 43
Train Files Loaded: 

Train Files Loaded: 347
Train Files Loaded: 348
Train Files Loaded: 349
Train Files Loaded: 350
Train Files Loaded: 351
Train Files Loaded: 352
Train Files Loaded: 353
Train Files Loaded: 354
Train Files Loaded: 355
Train Files Loaded: 356
Train Files Loaded: 357
Train Files Loaded: 358
Train Files Loaded: 359
Train Files Loaded: 360
Train Files Loaded: 361
Train Files Loaded: 362
Train Files Loaded: 363
Train Files Loaded: 364
Train Files Loaded: 365
Train Files Loaded: 366
Train Files Loaded: 367
Train Files Loaded: 368
Train Files Loaded: 369
Train Files Loaded: 370
Train Files Loaded: 371
Train Files Loaded: 372
Train Files Loaded: 373
Train Files Loaded: 374
Train Files Loaded: 375
Train Files Loaded: 376
Train Files Loaded: 377
Train Files Loaded: 378
Train Files Loaded: 379
Train Files Loaded: 380
Train Files Loaded: 381
Train Files Loaded: 382
Train Files Loaded: 383
Train Files Loaded: 384
Train Files Loaded: 385
Train Files Loaded: 386
Train Files Loaded: 387
Train Files Load

Train Files Loaded: 689
Train Files Loaded: 690
Train Files Loaded: 691
Train Files Loaded: 692
Train Files Loaded: 693
Train Files Loaded: 694
Train Files Loaded: 695
Train Files Loaded: 696
Train Files Loaded: 697
Train Files Loaded: 698
Train Files Loaded: 699
Train Files Loaded: 700


##### Generate Validation Dataset if not exist:

In [8]:
if not os.path.exists('./prepared/val_x.npy'):
    val_counter   = 0
    for genre in genres.keys():
        m = hparams['seq_length']
        val_filenames   = sorted(os.listdir('./dataset/' + genre))[70:90]
        for i in range(20):
            val_example = extract_features(
                './dataset/' + genre + '/' + val_filenames[i], 
                hparams
            )
            for j in range(5):
                val_x[val_counter, :] = val_example[:, (j*m):(j+1)*m]
                val_counter += 1
            print('Validation Files Loaded:', val_counter // 5)
            
    if 'prepared' not in os.listdir('.'):
        os.mkdir('prepared')
    np.save('./prepared/val_x.npy', val_x, allow_pickle=False)
else:
    val_x = np.load('./prepared/val_x.npy', allow_pickle=False)

Validation Files Loaded: 1
Validation Files Loaded: 2
Validation Files Loaded: 3
Validation Files Loaded: 4
Validation Files Loaded: 5
Validation Files Loaded: 6
Validation Files Loaded: 7
Validation Files Loaded: 8
Validation Files Loaded: 9
Validation Files Loaded: 10
Validation Files Loaded: 11
Validation Files Loaded: 12
Validation Files Loaded: 13
Validation Files Loaded: 14
Validation Files Loaded: 15
Validation Files Loaded: 16
Validation Files Loaded: 17
Validation Files Loaded: 18
Validation Files Loaded: 19
Validation Files Loaded: 20
Validation Files Loaded: 21
Validation Files Loaded: 22
Validation Files Loaded: 23
Validation Files Loaded: 24
Validation Files Loaded: 25
Validation Files Loaded: 26
Validation Files Loaded: 27
Validation Files Loaded: 28
Validation Files Loaded: 29
Validation Files Loaded: 30
Validation Files Loaded: 31
Validation Files Loaded: 32
Validation Files Loaded: 33
Validation Files Loaded: 34
Validation Files Loaded: 35
Validation Files Loaded: 36
V

##### Generate Test Dataset if not exist:

In [9]:
if not os.path.exists('./prepared/test_x.npy'):
    test_counter   = 0
    for genre in genres.keys():
        m = hparams['seq_length']
        test_filenames   = sorted(os.listdir('./dataset/' + genre))[90:100]
        for i in range(10):
            test_example = extract_features(
                './dataset/' + genre + '/' + test_filenames[i], 
                hparams
            )
            for j in range(5):
                test_x[test_counter, :] = test_example[:, (j*m):(j+1)*m]
                test_counter += 1
            print('testidation Files Loaded:', test_counter // 5)
            
    if 'prepared' not in os.listdir('.'):
        os.mkdir('prepared')
    np.save('./prepared/test_x.npy', test_x, allow_pickle=False)
else:
    test_x = np.load('./prepared/test_x.npy', allow_pickle=False)

testidation Files Loaded: 1
testidation Files Loaded: 2
testidation Files Loaded: 3
testidation Files Loaded: 4
testidation Files Loaded: 5
testidation Files Loaded: 6
testidation Files Loaded: 7
testidation Files Loaded: 8
testidation Files Loaded: 9
testidation Files Loaded: 10
testidation Files Loaded: 11
testidation Files Loaded: 12
testidation Files Loaded: 13
testidation Files Loaded: 14
testidation Files Loaded: 15
testidation Files Loaded: 16
testidation Files Loaded: 17
testidation Files Loaded: 18
testidation Files Loaded: 19
testidation Files Loaded: 20
testidation Files Loaded: 21
testidation Files Loaded: 22
testidation Files Loaded: 23
testidation Files Loaded: 24
testidation Files Loaded: 25
testidation Files Loaded: 26
testidation Files Loaded: 27
testidation Files Loaded: 28
testidation Files Loaded: 29
testidation Files Loaded: 30
testidation Files Loaded: 31
testidation Files Loaded: 32
testidation Files Loaded: 33
testidation Files Loaded: 34
testidation Files Loade

### Ground Truth Labels:

In [10]:
train_y = np.zeros(shape=(5 * 70 * hparams['num_classes'], hparams['num_classes']), dtype=float)
val_y   = np.zeros(shape=(5 * 20 * hparams['num_classes'], hparams['num_classes']), dtype=float)
test_y  = np.zeros(shape=(5 * 10 * hparams['num_classes'], hparams['num_classes']), dtype=float)

train_counter = 0
val_counter   = 0
test_counter  = 0

for i in range(5 * 70 * hparams['num_classes']):
    train_y[train_counter, :] = tf.keras.utils.to_categorical(train_counter // (5*70), num_classes=hparams['num_classes'])
    train_counter += 1

for i in range(5 * 20 * hparams['num_classes']):
    val_y[val_counter, :] = tf.keras.utils.to_categorical(val_counter // (5*20), num_classes=hparams['num_classes'])
    val_counter += 1
        
for i in range(5 * 10 * hparams['num_classes']):
    test_y[test_counter, :] = tf.keras.utils.to_categorical(test_counter // (5*10), num_classes=hparams['num_classes'])
    test_counter += 1

In [11]:
train_x = np.moveaxis(train_x, 1, 2)
val_x = np.moveaxis(val_x, 1, 2)
test_x = np.moveaxis(test_x, 1, 2)

In [12]:
class GenreClassifierModel(tf.keras.Model):
    
    def __init__(self):
        super().__init__()
        
        self.L1 = tf.keras.layers.LSTM(64, return_sequences=True)
        self.L2 = tf.keras.layers.LSTM(64, return_sequences=False)
        self.L3 = tf.keras.layers.Dense(64, activation='relu')
        self.L4 = tf.keras.layers.Dropout(0.3)
        self.L5 = tf.keras.layers.Dense(units=hparams['num_classes'], activation="softmax")
        
    def call(self, inputs):
        self.x = self.L1(inputs)
        self.x = self.L2(self.x)
        self.x = self.L3(self.x)
        self.x = self.L4(self.x)
        self.x = self.L5(self.x)
        return self.x

In [13]:
model = GenreClassifierModel()

In [14]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [15]:
hist = model.fit(train_x, train_y, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [16]:
model.evaluate(val_x, val_y)



[3.924561023712158, 0.367000013589859]

In [17]:
gno = 0

g = list(genres.keys())[gno]
print('Actual Genre', g, gno)

for i in range(hparams['num_classes']):

    m = hparams['seq_length']
    
    example = extract_features('./dataset/{}/{}.000'.format(g, g) + str(90+i) + '.wav', hparams)
    
    segment_1 = np.moveaxis(example, 0, 1)[0*m:1*m, :].reshape(1, m, 40)
    segment_2 = np.moveaxis(example, 0, 1)[1*m:2*m, :].reshape(1, m, 40)
    segment_3 = np.moveaxis(example, 0, 1)[2*m:3*m, :].reshape(1, m, 40)
    segment_4 = np.moveaxis(example, 0, 1)[3*m:4*m, :].reshape(1, m, 40)
    segment_5 = np.moveaxis(example, 0, 1)[4*m:5*m, :].reshape(1, m, 40)
    
    print('===============================================')
    print('Segment-0:', np.argmax(model.predict(segment_1)))
    print('Segment-1:', np.argmax(model.predict(segment_2)))
    print('Segment-2:', np.argmax(model.predict(segment_3)))
    print('Segment-3:', np.argmax(model.predict(segment_4)))
    print('Segment-4:', np.argmax(model.predict(segment_5)))

Actual Genre blues 0
Segment-0: 0
Segment-1: 0
Segment-2: 9
Segment-3: 9
Segment-4: 0
Segment-0: 8
Segment-1: 2
Segment-2: 0
Segment-3: 8
Segment-4: 2
Segment-0: 0
Segment-1: 0
Segment-2: 5
Segment-3: 0
Segment-4: 0
Segment-0: 5
Segment-1: 1
Segment-2: 0
Segment-3: 5
Segment-4: 0
Segment-0: 9
Segment-1: 0
Segment-2: 8
Segment-3: 4
Segment-4: 0
Segment-0: 5
Segment-1: 3
Segment-2: 0
Segment-3: 2
Segment-4: 2
Segment-0: 1
Segment-1: 1
Segment-2: 2
Segment-3: 5
Segment-4: 2
Segment-0: 8
Segment-1: 8
Segment-2: 8
Segment-3: 9
Segment-4: 9
Segment-0: 6
Segment-1: 0
Segment-2: 9
Segment-3: 0
Segment-4: 9
Segment-0: 9
Segment-1: 0
Segment-2: 8
Segment-3: 2
Segment-4: 9
