In [1]:
import numpy as np
import pandas as pd
import os

In [2]:
MFCCs = np.load("../preprocessing/MFCCs/MFCC.npy")

In [3]:
MFCCs = (MFCCs - np.nanmean(MFCCs))/np.nanstd(MFCCs)

In [4]:
print(MFCCs[2])

[[-7.49531220e+00 -7.49531220e+00 -7.49531220e+00 ... -2.28470204e+00
  -2.28154364e+00 -2.46218506e+00]
 [ 3.78452600e-03  3.78452600e-03  3.78452600e-03 ...  2.66772499e+00
   2.75375372e+00  2.64647346e+00]
 [ 3.78452600e-03  3.78452600e-03  3.78452600e-03 ... -3.14520361e-01
  -2.91229857e-01 -4.12438540e-01]
 ...
 [ 3.78452600e-03  3.78452600e-03  3.78452600e-03 ...  3.09898681e-02
   4.16713754e-03 -1.56268668e-03]
 [ 3.78452600e-03  3.78452600e-03  3.78452600e-03 ...  5.42445234e-02
   1.41780647e-01  9.51679247e-02]
 [ 3.78452600e-03  3.78452600e-03  3.78452600e-03 ...  4.73457277e-02
   6.71201664e-02  1.52584397e-01]]


In [5]:
info_songs = pd.read_csv("../Info/info.csv")

In [6]:
label_songs = pd.read_csv("../preprocessing/labels.csv")

In [7]:
select_label = "genre"

if select_label == "genre":
    labels = label_songs[select_label].map({"classical":0, "electronic":1, "pop":2, "rock":3})
labels = labels.to_numpy()

In [8]:
length_songs = pd.DataFrame(info_songs["Track ID"])
length_songs["Song length"] = info_songs["Duration"]*10
length_songs["Song length"] = length_songs["Song length"].astype(int)
length_songs

Unnamed: 0,Track ID,Song length
0,1,600
1,10,600
2,100,600
3,11,600
4,12,600
...,...,...
395,395,600
396,396,600
397,397,600
398,398,600


In [9]:
n_data = len(length_songs)
idx = np.random.permutation(n_data)
length_songs = length_songs.reindex(idx)["Song length"].to_numpy()
MFCCs = MFCCs[idx, :, :]
labels = labels[idx]

In [10]:
import tensorflow as tf

In [11]:
fraction_validation = 0.25
n_test = int(fraction_validation*len(labels))
n_train = len(labels) - n_test

train_features, train_labels, train_length = MFCCs[:n_train], labels[:n_train], length_songs[:n_train]
validation_features, validation_labels, validation_length = MFCCs[n_train:], labels[n_train:], length_songs[n_train:]

In [12]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_features, train_labels, train_length))
validation_dataset = tf.data.Dataset.from_tensor_slices((validation_features, validation_labels, validation_length))

In [13]:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras import Input, layers
from tensorflow.keras import backend as K

#sample_size = (20, 50, 1)
sample_size = (20, 100, 1)

drop_out_rate = 0.2

# very simple keras Sequential model
input_tensor = Input(sample_size)
x = layers.Conv2D(16, (1, 10), padding="valid", activation="relu", strides=1)(input_tensor)
x = layers.Conv2D(32, (1, 5), padding="valid", activation="relu", strides=1)(x)
x = layers.Dropout(drop_out_rate)(x)
x = layers.MaxPooling2D(pool_size=(2,2))(x)
x = layers.Flatten()(x)
x = layers.Dense(50, activation="relu")(x)
x = layers.Flatten()(x)
x = layers.Dropout(drop_out_rate)(x)
output_tensor = layers.Dense(4, activation="softmax")(x)

model_1 = tf.keras.Model(input_tensor, output_tensor)

In [14]:
model_1.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 20, 100, 1)]      0         
_________________________________________________________________
conv2d (Conv2D)              (None, 20, 91, 16)        176       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 20, 87, 32)        2592      
_________________________________________________________________
dropout (Dropout)            (None, 20, 87, 32)        0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 10, 43, 32)        0         
_________________________________________________________________
flatten (Flatten)            (None, 13760)             0         
_________________________________________________________________
dense (Dense)                (None, 50)                688050

In [15]:
input_tensor = Input(sample_size)
x = layers.Conv2D(16, (1, 25), padding="valid", activation="relu", strides=1)(input_tensor)
x = layers.Conv2D(32, (1, 15), padding="valid", activation="relu", strides=1)(x)
x = layers.Conv2D(32, (1, 15), padding="valid", activation="relu", strides=1)(x)
x = layers.Dropout(drop_out_rate)(x)
x = layers.MaxPooling2D(pool_size=(2,2))(x)
x = layers.Conv2D(32, (2,2), padding="valid", activation="relu", strides=1)(x)
x = layers.Flatten()(x)
x = layers.Dropout(2*drop_out_rate)(x)
x = layers.Dense(32, activation="relu")(x)
x = layers.Flatten()(x)
x = layers.Dropout(drop_out_rate)(x)
output_tensor = layers.Dense(4, activation="softmax")(x)

model_2 = tf.keras.Model(input_tensor, output_tensor)
model_2.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 20, 100, 1)]      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 20, 76, 16)        416       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 20, 62, 32)        7712      
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 20, 48, 32)        15392     
_________________________________________________________________
dropout_2 (Dropout)          (None, 20, 48, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 10, 24, 32)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 9, 23, 32)         4128

In [16]:
input_tensor = Input(sample_size)
x = layers.Conv2D(16, (3, 3), padding="valid", activation="relu", strides=1)(input_tensor)
x = layers.MaxPooling2D((2,2))(x)

x = layers.Conv2D(32, (3, 3), padding="valid", activation="relu", strides=1)(x)
x = layers.MaxPooling2D(pool_size=(2,2))(x)

x = layers.Conv2D(32, (1,2), padding="valid", activation="relu", strides=1)(x)
x = layers.MaxPooling2D(pool_size=(1,2))(x)

x = layers.Conv2D(16, (2,2), padding="valid", activation="relu", strides=1)(x)
x = layers.MaxPooling2D(pool_size=(2,2))(x)

x = layers.Flatten()(x)

x = layers.Dropout(2*drop_out_rate)(x)
x = layers.Dense(32, activation="relu")(x)
x = layers.Flatten()(x)
x = layers.Dropout(drop_out_rate)(x)
output_tensor = layers.Dense(4, activation="softmax")(x)

model_3 = tf.keras.Model(input_tensor, output_tensor)
model_3.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 20, 100, 1)]      0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 18, 98, 16)        160       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 9, 49, 16)         0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 7, 47, 32)         4640      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 3, 23, 32)         0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 3, 22, 32)         2080      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 3, 11, 32)         0   

In [17]:
#model = model_1
model = model_2
#model = model_3

model.compile(optimizer="adam",
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
              metrics=['accuracy'])

In [18]:
def slice_accordingly(input_tensor, labels, size=sample_size):

    input_shape = (tf.shape(input_tensor).numpy())
    
    input_shape[1], input_shape[2] = size[0], size[1]
    size = input_shape
    
    sliced_tensor = tf.image.random_crop(input_tensor, size)
    
    nan_values, idx = tf.unique(tf.gather(tf.where(tf.math.is_nan(sliced_tensor)), 0, axis=1))
    nan_values = nan_values.numpy()
  
    msk = np.zeros((input_shape[0]), dtype=np.bool)
    msk[nan_values] = True
    msk = ~msk

    sliced_tensor = tf.boolean_mask(sliced_tensor, msk , axis=0)
    labels = tf.boolean_mask(labels , msk, axis=0)
    
    return sliced_tensor, labels

In [19]:
n_epochs = 25
batch_size = 50

for epoch in range(n_epochs):
    print("Epoch", epoch)
    train_ds = train_dataset.shuffle(n_train).batch(batch_size)
    val_ds = validation_dataset.shuffle(n_test).batch(n_test)
    
    x_val, y_val, length_val = next(iter(val_ds))
    x_val, y_val = slice_accordingly(x_val, y_val)
    
    for features, labels, lengths in train_ds:
        features, labels = slice_accordingly(features, labels)
        
        history = model.fit(features, labels,
                           validation_data=(x_val, y_val),verbose=1)
        predictions = model.predict(features)

Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
Epoch 11
Epoch 12
Epoch 13
Epoch 14
Epoch 15
Epoch 16
Epoch 17
Epoch 18
Epoch 19
Epoch 20
Epoch 21
Epoch 22
Epoch 23
Epoch 24
