In [14]:
import keras.callbacks
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.layers import Dropout
%matplotlib inline
import tensorflow as tf
import warnings

warnings.filterwarnings('ignore')
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from keras import layers, models

In [15]:
def prepareData():
    allData = pd.read_csv("data/balanced_6000.csv", index_col=0,header=[0, 1, 2])
    genres = allData["genre_id"]
    genres_index = pd.read_csv("data/genres_no_zeroes.csv", index_col=0)
    print(genres.shape)
    genres.columns = range(genres.shape[1])
    print(genres.columns)
    allData.drop(columns=["genre_id"], inplace=True)
    data_x = allData
    data_x.columns = range(data_x.shape[1])
    curr = genres_index['genre_index']
    late = genres[0].unique().tolist()
    update_genres = {late[i]:curr[i] for i in range(len(curr))}
    #normalizing
    mu = np.mean(data_x, axis=0)
    sigma = (np.std(data_x, axis=0))
    data_x = (data_x - mu) / sigma
    #Creating the right lables
    genres.columns = range(genres.shape[1])
    genres[0]=genres[0].map(update_genres)
    targets = OneHotEncoder().fit_transform(genres_index).toarray()
    #generes[0] = generes[0] - 1
    data_y = [targets[i - 1] for i in genres[0]]
    genres[0] = data_y
    # print(genres)
    test = genres.copy()
    test.rename(columns={0: "oneHotVector"}, inplace=True)
    # test.shape
    split = pd.DataFrame(test["oneHotVector"].to_list(), columns=range(16))
    split.index = genres.index
    genres = split
    return data_x, genres

In [16]:
data_x, labels = prepareData()
x_train, x_rest, y_train, y_rest = train_test_split(data_x, labels, train_size=0.8)
x_test, x_valid, y_test, y_valid = train_test_split(x_rest, y_rest, train_size=0.5)
print(x_train.shape), print(len(y_train))
print(x_valid.shape), print(len(y_valid))
print(x_test.shape), print(len(y_test))

(95998, 1)
RangeIndex(start=0, stop=1, step=1)
(76798, 518)
76798
(9600, 518)
9600
(9600, 518)
9600


(None, None)

In [17]:
# Talia
n_features, n_outputs = x_train.shape[1], y_train.shape[1]
CNN = models.Sequential()
CNN.add(layers.Conv1D(filters=32, kernel_size=3, activation='relu',
                      input_shape=(n_features, 1), padding='same'))
CNN.add(layers.MaxPooling1D(pool_size=2))
CNN.add(layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
CNN.add(layers.Dropout(0.4))
CNN.add(layers.Conv1D(filters=128, kernel_size=5, activation='relu'))
CNN.add(layers.MaxPooling1D(pool_size=2))
CNN.add(layers.Dropout(0.5))  #0.6
CNN.add(layers.Flatten())
CNN.add(layers.Dense(128, activation='relu'))
CNN.add(layers.Dense(64, activation='relu'))
CNN.add(layers.Dense(32, activation='relu'))
CNN.add(layers.Dense(16,activation='softmax'))


In [18]:
CNN.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_3 (Conv1D)           (None, 518, 32)           128       
                                                                 
 max_pooling1d_2 (MaxPooling  (None, 259, 32)          0         
 1D)                                                             
                                                                 
 conv1d_4 (Conv1D)           (None, 257, 64)           6208      
                                                                 
 dropout_2 (Dropout)         (None, 257, 64)           0         
                                                                 
 conv1d_5 (Conv1D)           (None, 253, 128)          41088     
                                                                 
 max_pooling1d_3 (MaxPooling  (None, 126, 128)         0         
 1D)                                                  

In [19]:
CNN.compile(optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])

In [None]:
callback = [keras.callbacks.EarlyStopping(monitor='accuracy', mode='max', patience=10)]
training = CNN.fit(x_train, y_train, epochs=30, validation_data=(x_valid, y_valid), batch_size=400, callbacks=callback)

Epoch 1/30
Epoch 2/30
Epoch 3/30

In [None]:
# _, accuracy = CNN.evaluate(x_valid, y_valid, verbose=0)
# print(accuracy)
metrics_df = pd.DataFrame(training.history)
metrics_df[["loss","val_loss"]].plot()
metrics_df[["accuracy", "val_accuracy"]].plot()

In [None]:
_, accuracy_test = CNN.evaluate(x_test, y_test, verbose=0)
print(accuracy_test)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report

ypred = CNN.predict(x_test)
y_pred = (ypred > 0.5)
y = y_test.values
y = (y > 0.5)

In [None]:
print('Accuracy score : ', accuracy_score(y, y_pred))
print('Precision score : ', precision_score(y, y_pred, average='weighted'))
print('Recall score : ', recall_score(y, y_pred, average='weighted'))
print('F1 score : ', f1_score(y, y_pred, average='weighted'))
print(classification_report(y_test, y_pred))

In [None]:
x = np.linspace(0, 2, 100)  # Sample data.

# Note that even in the OO-style, we use `.pyplot.figure` to create the Figure.
fig, ax = plt.subplots(figsize=(5, 2.7), layout='constrained')
ax.plot(range(30), metrics_df["val_accuracy"], label='Talia')
# ax.plot(range(30), metrics_df["val_accuracy"], label='Lior')
# ax.plot(range(30), metrics_df["val_accuracy"], label='Netanel')
ax.set_xlabel('iteration')  # Add an x-label to the axes.
ax.set_ylabel('accuracy')  # Add a y-label to the axes.
ax.set_title("Networks Plot")  # Add a title to the axes.
ax.legend();

In [None]:
from keras.utils import plot_model
from IPython.display import Image
plot_model(CNN, to_file='model.png', show_shapes=True)
Image("model.png")