In [8]:
from data_pipeline_new import DataGen
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.utils import class_weight

Set up the dataset from our generator:

In [9]:
num_epochs = 50

In [10]:
def _fixup_shape(x, y):
  x.set_shape([None, 512, 512]) # n, h, w, c
  y.set_shape([None]) # n, nb_classes
  return x, y

batch_size = 16
tracks = pd.read_csv('./data/processed_genres.csv')

# Set up train/test split
all_idxs = list(range(len(tracks)))
np.random.shuffle(all_idxs)
num_train = np.floor(len(all_idxs) * 0.8).astype(np.int32)
train_idxs = all_idxs[:num_train]
test_idxs = all_idxs[num_train:]

# Set up generator processing function
gen = DataGen(tracks, batch_size=batch_size)

# Set up train data
train_dataset = tf.data.Dataset.from_generator(lambda: train_idxs, tf.uint16)
train_dataset = train_dataset.shuffle(buffer_size=len(train_idxs),
                                      seed=0, reshuffle_each_iteration=True)
train_dataset = train_dataset.map(lambda i: tf.py_function(func=gen.get_sample, 
                                                           inp=[i], 
                                                           Tout=[tf.float32,
                                                                 tf.int32]
                                                          ), 
                                                          num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.repeat(num_epochs)
train_dataset = train_dataset.batch(batch_size).map(_fixup_shape).repeat()
train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)

# Set up test data
test_dataset = tf.data.Dataset.from_generator(lambda: test_idxs, tf.uint16)
test_dataset = test_dataset.shuffle(buffer_size=len(test_idxs),
                                      seed=0, reshuffle_each_iteration=True)
test_dataset = test_dataset.map(lambda i: tf.py_function(func=gen.get_sample, 
                                                           inp=[i], 
                                                           Tout=[tf.float32,
                                                                 tf.int32]
                                                          ), 
                                                          num_parallel_calls=tf.data.AUTOTUNE)
test_dataset = test_dataset.repeat(num_epochs)
test_dataset = test_dataset.batch(batch_size).map(_fixup_shape)
test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)

19941
Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Compute the class weights for balancing:

In [11]:
genres = np.array(tracks['parent_genre_id'])
class_weights = class_weight.compute_class_weight(class_weight='balanced',
                                                  classes=np.unique(genres),
                                                  y=genres)

class_weights = dict(enumerate(class_weights))

class_weights

{0: 0.7104138166894665,
 1: 1.3136066610455313,
 2: 0.21967533840947548,
 3: 1.0263092885375493,
 4: 0.6930326957295374,
 5: 4.05712890625,
 6: 0.24705637488106566,
 7: 1.550186567164179,
 8: 10.116477272727273,
 9: 21.05320945945946,
 10: 13.202860169491526,
 11: 8.75245786516854,
 12: 2.533231707317073,
 13: 3.205632716049383,
 14: 1.1548832468495183,
 15: 74.1875}

Build and train the model:

In [12]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16, (16,16), input_shape=(512, 512, 1), activation="relu"),
    tf.keras.layers.MaxPooling2D((4, 4)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Conv2D(32, (16, 16), activation="relu"),
    tf.keras.layers.MaxPooling2D((4, 4)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(32, activation="relu"), 
    tf.keras.layers.Dense(16)
])
    
model.summary()
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=["accuracy"], optimizer='adam')

history = model.fit(x=train_dataset, epochs=50,
                    validation_data=test_dataset, class_weight=class_weights,
                    steps_per_epoch=len(train_idxs) // batch_size)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 497, 497, 16)      4112      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 124, 124, 16)     0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 124, 124, 16)      0         
                                                                 
 conv2d_1 (Conv2D)           (None, 109, 109, 32)      131104    
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 27, 27, 32)       0         
 2D)                                                             
                                                                 
 dropout_1 (Dropout)         (None, 27, 27, 32)        0