In [32]:
from data_pipeline_v4 import DataGen
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.utils import class_weight
import os

Set up the dataset from our generator:

In [34]:
def _fixup_shape(x, y):
  x.set_shape([None, 259, 128]) # n, h, w, c
  y.set_shape([None]) # n, nb_classes
  return x, y

batch_size = 64
tracks = pd.read_csv('./data/processed_genres_mel.csv')

# Set up train/test split
all_idxs = list(range(len(tracks)))
np.random.shuffle(all_idxs)
num_train = np.floor(len(all_idxs) * 0.8).astype(np.int32)
train_idxs = all_idxs[:num_train]
test_idxs = all_idxs[num_train:]

# Parse filepaths
track_fpaths = list(tracks['fpath'])
track_fpaths = ['./data/fma_medium' + fpath for fpath in track_fpaths]

# Set up generator processing function
gen = DataGen()

# Set up train data
train_data = ([track_fpaths[i] for i in train_idxs],
              list(tracks['parent_genre_id'][train_idxs]))
train_dataset = tf.data.Dataset.from_tensor_slices(train_data)
train_dataset = train_dataset.map(lambda fpath, label: tuple(tf.py_function(gen.get_sample, [fpath, label], [tf.float32, tf.int32])),
                                  num_parallel_calls=tf.data.experimental.AUTOTUNE)
train_dataset = train_dataset.shuffle(buffer_size=len(train_idxs)).batch(batch_size).map(_fixup_shape)
train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)

# Set up test data
test_data = ([track_fpaths[i] for i in test_idxs],
             list(tracks['parent_genre_id'][test_idxs]))
test_dataset = tf.data.Dataset.from_tensor_slices(test_data)
test_dataset = test_dataset.map(lambda fpath, label: tuple(tf.py_function(gen.get_sample, [fpath, label], [tf.float32, tf.int32])),
                                num_parallel_calls=tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.shuffle(buffer_size=len(test_idxs)).batch(batch_size).map(_fixup_shape)
test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)

Compute the class weights for balancing:

In [35]:
genres = np.array(tracks['parent_genre_id'])
class_weights = class_weight.compute_class_weight(class_weight='balanced',
                                                  classes=np.unique(genres),
                                                  y=genres)

class_weights = dict(enumerate(class_weights))

class_weights

array([ 0.7109482 ,  1.31339587,  0.21964009,  1.0261446 ,  0.69322986,
        4.05647786,  0.24701673,  1.54993781, 10.1148539 , 21.04983108,
       13.20074153,  8.75105337,  2.5328252 ,  3.20511831,  1.15555453,
       74.17559524])

Build and train the model:

In [36]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(259, dropout=0.2, recurrent_dropout=0.2), input_shape=(259, 128)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(256, activation="relu"),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation="relu"),  
    tf.keras.layers.Dense(16)
])
    
model.summary()
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=["acc"], optimizer='adam')

history = model.fit(x=train_dataset, epochs=50,
                    validation_data=test_dataset, class_weight=class_weights,
                    steps_per_epoch=len(train_idxs) // batch_size)

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_4 (Bidirectio  (None, 518)              803936    
 nal)                                                            
                                                                 
 dropout_16 (Dropout)        (None, 518)               0         
                                                                 
 dense_20 (Dense)            (None, 256)               132864    
                                                                 
 dropout_17 (Dropout)        (None, 256)               0         
                                                                 
 dense_21 (Dense)            (None, 128)               32896     
                                                                 
 dropout_18 (Dropout)        (None, 128)               0         
                                                      