# CNN Exploration

In [1]:
# Imports
import os
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.python.keras import utils
from keras.utils import to_categorical

Using TensorFlow backend.


In [2]:
# Reading in the data
mel_specs = pd.read_csv('../data/genre_mel_specs_clean.csv')

In [3]:
# First 5 rows for reference
mel_specs.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,84472,84473,84474,84475,84476,84477,84478,84479,labels,y
0,-38.71436,-33.474228,-27.310455,-25.299803,-28.430004,-28.678144,-27.830578,-26.89418,-34.463097,-30.501217,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,classical,2
1,-21.302162,-39.085693,-28.659452,-31.36457,-30.419193,-40.327023,-28.70608,-43.529984,-33.345123,-33.197315,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,rock,10
2,-15.267654,-14.026318,-14.920742,-16.21959,-16.906425,-20.542664,-25.68327,-10.716038,-21.445236,-18.547516,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pop,8
3,-31.311068,-36.68953,-42.98152,-38.595932,-35.907497,-39.644302,-43.886433,-42.308525,-35.456673,-33.849125,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,jazz,6
4,-18.864574,-22.681887,-24.525406,-29.33086,-28.273998,-29.35329,-31.51618,-25.65717,-27.893257,-30.826773,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,metal,7


## Data Preprocessing

### Function to Get a Subset of the Genres

In [4]:
def get_genre_subset(data, genre_subset):
    '''
    This function takes in a dataframe and a list of genres and returns a new dataframe only including
    the genres in the given list. Its index is reset and new labels are created so that the labels are 0 
    through one less than the number of genres. 
    '''
    
    # Filtering the dataframe for the subset of the genres and resetting the index
    df = data.loc[data['labels'].isin(genre_subset)]
    df = df.reset_index().drop(columns=['index'])
    
    # Creating a new label dictionary
    new_label_dict = {}
    for i in range(len(genre_subset)):
        new_label_dict[genre_subset[i]] = i
    
    # Changing labels to be the new labels
    df['y'] = df['labels'].map(new_label_dict)

    return df

### Function to Preprocess the Features and Targets

In [5]:
def preprocess_mel_spec_data(data, genre_subset):
    '''
    This function takes in a dataframe of audio files and a list of genres,
    calls the function get_genre_subset to get a dataframe including only the given genres,
    and completes all of the data preprocessing steps needed to run a neural network.
    
    Preprecessing steps include:
    1. Reshaping the mel spectrograms to their original form (128 x 660)
    2. Defining the array of targets
    3. Train test split
    4. Standardizing the data
    5. Reshaping the data to be 128 x 660 x 1, where the 1 represents a single color channel
    6. One-hot-encoding target data
    
    Parameters:
    data (DataFrame): a dataframe of audio files, flattened mel spectrograms, and genre labels
    genre_subset (list): a list of genres included in the dataframe
    
    Returns:
    X_train (array): training set of features
    X_test (array): testing set of features
    y_train (array): training set of targets
    y_test (array): testing set of targets
    '''
    
    # Getting a subset of the genres using our genre_subset function
    subset = get_genre_subset(data, genre_subset)
    
    # Dropping label columns to prepare our feature vector
    specs = subset.drop(columns=['labels', 'y'])
    
    # Reshaping the arrays to their original "image" form
    X = []
    for i in range(len(genre_subset)*100):
        X.append(np.array(specs.iloc[i]).reshape(128,660))
        
    # Converting list X to an array
    X = np.array(X)
    
    # Defining our targets
    y = subset.loc[subset['labels'].isin(genre_subset), 'y'].values
    
    # train test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y, test_size=.2)
    
    # Scaling our data to be between 0 and 1
    X_train /= -80
    X_test /= -80
    
    # Reshaping images to be 128 x 660 x 1
    X_train = X_train.reshape(X_train.shape[0], 128, 660, 1)
    X_test = X_test.reshape(X_test.shape[0], 128, 660, 1)
    
    # One hot encoding our labels
    y_train = to_categorical(y_train, len(genre_subset))
    y_test = to_categorical(y_test, len(genre_subset))
    
    return X_train, X_test, y_train, y_test

In [6]:
# List of all the genres
genre_list = {
    'classical': 0,
    'hiphop': 1,
    'jazz': 2,
    'metal': 3,
    'pop': 4,
    'rock': 5
}

In [7]:
# List of a subset of the genres
genre_subset = [
    'hiphop',
    'jazz',
    'metal',
    'pop'
]

In [8]:
# Using our function to get our features and targets
X_train, X_test, y_train, y_test = preprocess_mel_spec_data(mel_specs, genre_subset)

## CNN Model for Subset of Genres

In [9]:
np.random.seed(23456)

# Initiating an empty neural network
cnn_model = Sequential(name='cnn_1')

# Adding convolutional layer
cnn_model.add(Conv2D(filters=16,
                     kernel_size=(3,3),
                     activation='relu',
                     input_shape=(128,660,1)))

# Adding max pooling layer
cnn_model.add(MaxPooling2D(pool_size=(2,4)))

# Adding convolutional layer
cnn_model.add(Conv2D(filters=32,
                     kernel_size=(3,3),
                     activation='relu'))

# Adding max pooling layer
cnn_model.add(MaxPooling2D(pool_size=(2,4)))

# Adding a flattened layer to input our image data
cnn_model.add(Flatten())

# Adding a dense layer with 64 neurons
cnn_model.add(Dense(64, activation='relu'))

# Adding a dropout layer for regularization
cnn_model.add(Dropout(0.25))

# Adding an output layer
cnn_model.add(Dense(7, activation='softmax'))

# Compiling our neural network
cnn_model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

# Fitting our neural network
history = cnn_model.fit(X_train,
                        y_train, 
                        batch_size=16,
                        validation_data=(X_test, y_test),
                        epochs=15)

ValueError: A target array with shape (320, 4) was passed for an output of shape (None, 7) while using as loss `categorical_crossentropy`. This loss expects targets to have the same shape as the output.

In [None]:
# Checking the model summary
cnn_model.summary()

In [None]:
# The code in this cell was adapted from a lecture at General Assembly

# Check out our train loss and test loss over epochs.
train_loss = history.history['loss']
test_loss = history.history['val_loss']

# Set figure size.
plt.figure(figsize=(12, 8))

# Generate line plot of training, testing loss over epochs.
plt.plot(train_loss, label='Training Loss', color='blue')
plt.plot(test_loss, label='Testing Loss', color='red')

# Set title
plt.title('Training and Testing Loss by Epoch', fontsize = 25)
plt.xlabel('Epoch', fontsize = 18)
plt.ylabel('Categorical Crossentropy', fontsize = 18)
plt.xticks(range(1,11), range(1,11))

plt.legend(fontsize = 18);

In [None]:
# Making predictions from the cnn model
predictions = cnn_model.predict(X_test, verbose=1)

### Confusion Matrix

In [None]:
# Calculating the confusion matrix 
# row: actual
# columns: predicted
conf_matrix = confusion_matrix(np.argmax(y_test, 1), np.argmax(predictions, 1))
conf_matrix

In [None]:
# Creating a dataframe of the confusion matrix with labels for readability 
confusion_df = pd.DataFrame(conf_matrix)
confusion_df

In [None]:
# List of a subset of the genres
genre_subset = [
    0:'hiphop',
    1:'jazz',
    2:'meta',
    3:'pop'
]


In [None]:
# Renaming rows and columns with labes
confusion_df = confusion_df.rename(columns=genre_labels)
confusion_df.index = confusion_df.columns
confusion_df

## CNN Model for Binary Classification of Genres

In [None]:
# List of a subset of the genres
genre_subset_2 = [
    'metal',
    'classical'
]

In [None]:
# Using our function to get our features and targets
X_train, X_test, y_train, y_test = preprocess_mel_spec_data(mel_specs, genre_subset_2)

In [None]:
np.random.seed(23456)

# Initiating an empty neural network
cnn_model_2 = Sequential(name='cnn_2')

# Adding convolutional layer
cnn_model_2.add(Conv2D(filters=16,
                     kernel_size=(3,3),
                     activation='relu',
                     input_shape=(128,660,1)))

# Adding max pooling layer
cnn_model_2.add(MaxPooling2D(pool_size=(2,4)))

# Adding convolutional layer
cnn_model_2.add(Conv2D(filters=32,
                     kernel_size=(3,3),
                     activation='relu'))

# Adding max pooling layer
cnn_model_2.add(MaxPooling2D(pool_size=(2,4)))

# Adding a flattened layer to input our image data
cnn_model_2.add(Flatten())

# Adding a dense layer with 64 neurons
cnn_model_2.add(Dense(64, activation='relu'))

# Adding a dropout layer for regularization
cnn_model_2.add(Dropout(0.25))

# Adding an output layer
cnn_model_2.add(Dense(2, activation='softmax'))

# Compiling our neural network
cnn_model_2.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

# Fitting our neural network
history = cnn_model_2.fit(X_train,
                        y_train, 
                        batch_size=16,
                        validation_data=(X_test, y_test),
                        epochs=15)

In [None]:
# Checking the model summary
cnn_model_2.summary()

In [None]:
# The code in this cell was adapted from a lecture at General Assembly

# Check out our train loss and test loss over epochs.
train_loss = history.history['loss']
test_loss = history.history['val_loss']

# Set figure size.
plt.figure(figsize=(12, 8))

# Generate line plot of training, testing loss over epochs.
plt.plot(train_loss, label='Training Loss', color='blue')
plt.plot(test_loss, label='Testing Loss', color='red')

# Set title
plt.title('Training and Testing Loss by Epoch', fontsize = 25)
plt.xlabel('Epoch', fontsize = 18)
plt.ylabel('Categorical Crossentropy', fontsize = 18)
plt.xticks(range(1,11), range(1,11))

plt.legend(fontsize = 18);

In [None]:
# Making predictions from the cnn model
predictions_2 = cnn_model_2.predict(X_test, verbose=1)

### Confusion Matrix

In [None]:
# Calculating the confusion matrix 
# row: actual
# columns: predicted
conf_matrix_2 = confusion_matrix(np.argmax(y_test, 1), np.argmax(predictions_2, 1))
conf_matrix_2

In [None]:
# Creating a dataframe of the confusion matrix with labels for readability 
confusion_df_2 = pd.DataFrame(conf_matrix_2)

In [None]:
# List of a subset of the genres
genre_labels_2 = {
    0:'metal',
    1:'classical'
}

In [None]:
# Renaming rows and columns with labes
confusion_df_2 = confusion_df_2.rename(columns=genre_labels_2)
confusion_df_2.index = confusion_df_2.columns
confusion_df_2