# IMPORTS FOR MODEL HANDLING
"Models" here refer to models specifically for genre classification.

In [1]:
# For handling neural networks:
import keras

# For handling NumPy arrays:
import numpy as np

# For plotting:
import matplotlib.pyplot as plt

**Some preliminary issues faced & their solutions**

**ISSUE**: Dependencies for `keras`

I was unable to import `keras` without having `tensorflow` installed.

---

**ISSUE**: Changing certain OS settings to install `tensorflow`

I was unable to install `tensorflow` without referring to the following:

https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=powershell#enable-long-paths-in-windows-10-version-1607-and-later

I followed the PowerShell solution given.

---

**ISSUE**: Procedure entry point not in the dynamic link library

I faced the following error from the Windows OS (in a dialog box):

```
The procedure entry point could not be located in the dynamic link library <DDL path>.
```

NOTE: `<DDL path>` is a placeholder for the actual path.

To solve this, I simply restarted and updated the OS. To verify the integrity of the system files, I ran:

```
sfc /scannow
```

NOTE: The above needs to be run as an administrator in Command Prompt.

This solution was found here...

https://www.drivereasy.com/knowledge/fixed-entry-point-not-found-error-in-windows/

# Model architectures

## 3-second MFCCs input CNN

In [None]:
def get_cnn_3_sec_mfcc(input_shape, n_classes=10, lr=0.0001): # There are 10 genres, so 10 classes
    '''
    Input parameters:
    - `input_shape (tuple)`: Shape of input data
    - `n_classes`: Number of output classes

    Return values:
    - `model`: CNN model
    '''

    # BUILD MODEL TOPOLOGY
    
    model = keras.Sequential([
        keras.layers.Input(input_shape),
        #________________________
        # CONVOLUTIONAL LAYERS
        keras.layers.Conv2D(32, (3, 3), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same'),
        keras.layers.BatchNormalization(),

        keras.layers.Conv2D(32, (3, 3), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same'),
        keras.layers.BatchNormalization(),

        keras.layers.Conv2D(32, (2, 2), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
        keras.layers.BatchNormalization(),
        keras.layers.Dropout(0.3),
        #________________________
        # DENSE LAYERS
        # Flatten output and feed it into dense layer:
        keras.layers.Flatten(),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(n_classes, activation='softmax')]) # Output layer
    
    #------------------------------------
    # COMPILING MODEL WITH APPROPRIATE LOSS AND OPTIMIZER

    # Optimizer:
    optimizer = keras.optimizers.Adam(learning_rate=lr)
    # Loss function:
    loss = keras.losses.CategoricalCrossentropy()
    
    model.compile(
        optimizer=optimizer,
        loss=loss,
        metrics=['accuracy'])

    return model

## 5-second MFCCs input CNN

In [None]:
def get_cnn_5_sec_mfcc(input_shape, n_classes=10, lr=0.0001): # There are 10 genres, so 10 classes
    '''
    Input parameters:
    - `input_shape (tuple)`: Shape of input data
    - `n_classes`: Number of output classes

    Return values:
    - `model`: CNN model
    '''

    # BUILD MODEL TOPOLOGY
    
    model = keras.Sequential([
        keras.layers.Input(input_shape),
        keras.layers.Identity(),
        keras.layers.BatchNormalization(),
        #________________________
        # CONVOLUTIONAL LAYERS
        keras.layers.Conv2D(32, (3, 3), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same'),

        keras.layers.Conv2D(32, (3, 3), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same'),

        keras.layers.Conv2D(32, (2, 2), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),

        keras.layers.Conv2D(32, (2, 2), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
        
        keras.layers.Conv2D(32, (2, 2), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
        keras.layers.BatchNormalization(),
        keras.layers.Dropout(0.3), # Previously was after 1st dense layer and was equal to 0.5
        #________________________
        # DENSE LAYERS
        # Flatten output and feed it into dense layer:
        keras.layers.Flatten(),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(n_classes, activation='softmax')]) # Output layer
    
    #------------------------------------
    # COMPILING MODEL WITH APPROPRIATE LOSS AND OPTIMIZER

    # Optimizer:
    optimizer = keras.optimizers.Adam(learning_rate=lr)
    # Loss function:
    loss = keras.losses.CategoricalCrossentropy()
    
    model.compile(
        optimizer=optimizer,
        loss=loss,
        metrics=['accuracy'])

    return model

## 3-second segment melspectrogram input CNN

In [None]:
def get_cnn_3_sec_melspectrogram(input_shape, n_classes=10, lr=0.0001): # There are 10 genres, so 10 classes
    '''
    Input parameters:
    - `input_shape (tuple)`: Shape of input data
    - `n_classes`: Number of output classes

    Return values:
    - `model`: CNN model
    '''

    # BUILD MODEL TOPOLOGY
    
    model = keras.Sequential([
        keras.layers.Input(input_shape),
        keras.layers.BatchNormalization(),
        #________________________
        # CONVOLUTIONAL LAYERS
        keras.layers.Conv2D(32, (3, 3), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same'),

        keras.layers.Conv2D(32, (3, 3), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same'),

        keras.layers.Conv2D(32, (2, 2), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),

        keras.layers.Conv2D(32, (2, 2), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
        keras.layers.BatchNormalization(),
        #________________________
        # DENSE LAYERS
        # Flatten output and feed it into dense layer:
        keras.layers.Flatten(),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(n_classes, activation='softmax')]) # Output layer
    
    #------------------------------------
    # COMPILING MODEL WITH APPROPRIATE LOSS AND OPTIMIZER

    # Optimizer:
    optimizer = keras.optimizers.Adam(learning_rate=lr)
    # Loss function:
    loss = keras.losses.CategoricalCrossentropy()
    
    model.compile(
        optimizer=optimizer,
        loss=loss,
        metrics=['accuracy'])

    return model

## 5-second segment melspectrogram input CNN

In [None]:
def get_cnn_5_sec_melspectrogram(input_shape, n_classes=10, lr=0.0001): # There are 10 genres, so 10 classes
    '''
    Input parameters:
    - `input_shape (tuple)`: Shape of input data
    - `n_classes`: Number of output classes

    Return values:
    - `model`: CNN model
    '''

    # BUILD MODEL TOPOLOGY
    
    model = keras.Sequential([
        keras.layers.Input(input_shape),
        keras.layers.Identity(),
        keras.layers.BatchNormalization(),
        #________________________
        # CONVOLUTIONAL LAYERS
        keras.layers.Conv2D(32, (2, 2), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),

        keras.layers.Conv2D(32, (2, 2), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),

        keras.layers.Conv2D(32, (2, 2), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),

        keras.layers.Conv2D(32, (2, 2), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),

        keras.layers.Conv2D(32, (2, 2), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),

        keras.layers.Conv2D(32, (2, 2), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),

        keras.layers.Conv2D(32, (2, 2), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='same'),
        
        keras.layers.Conv2D(32, (2, 2), activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='same'),
        keras.layers.BatchNormalization(),
        keras.layers.Dropout(0.3),
        #________________________
        # DENSE LAYERS
        # Flatten output and feed it into dense layer:
        keras.layers.Flatten(),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(n_classes, activation='softmax')]) # Output layer
    
    #------------------------------------
    # COMPILING MODEL WITH APPROPRIATE LOSS AND OPTIMIZER

    # Optimizer:
    optimizer = keras.optimizers.Adam(learning_rate=lr)
    # Loss function:
    loss = keras.losses.CategoricalCrossentropy()
    
    model.compile(
        optimizer=optimizer,
        loss=loss,
        metrics=['accuracy'])

    return model

# Saving and loading models
**_Specifically model weights_**

In [None]:
def save_model(model, file_name):
    W = {}
    for i, weights in enumerate(model.get_weights()):
        W[i] = weights
    np.save(file_name, W)

def load_model(model, file_name):
    V = np.load(file_name, allow_pickle=True).tolist()
    W = []
    for i in range(len(V)):
        W.append(V[i])
    model.set_weights(W)

# Reading and displaying model training logs and history
(Logs made using `keras.callbacks.CSVLogger`)

In [None]:
def read_logs(logs_file_path):
    logs = []
    f = open(logs_file_path, 'r')
    file_contents = f.read().split('\n')
    
    # 1st row is header, so skip:
    headers = file_contents[0].split(',')
    file_contents = file_contents[1:]
    
    # Adding each row as a list of floats:
    for i in range(len(file_contents)):
        if len(file_contents[i]) < 4: continue
        logs.append([float(value) for value in file_contents[i].split(',')])
    f.close()

    # Storing logs as a dictionary for convenience:
    logs = np.array(logs)
    history = {}
    for i, header in enumerate(headers):
        history[header] = logs[:, i]
    
    return history

In [None]:
# Plotting model training history:
def plot_model_history(history):
    # Model accuracy across epochs:
    plt.plot(history['accuracy'])
    plt.plot(history['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['training', 'validation'])
    plt.show()

    # Model loss across epochs:
    plt.plot(history['loss'])
    plt.plot(history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['training', 'validation'])
    plt.show()

# Predicting from trained model
**NOTE**: We assume the input is one or more MFCC arrays or melspectrograms for one or more appropriately-sized segments.

### Important constants and variables

- List of all genres
- Genre-to-integer maps and inverse maps

In [None]:
genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
genre_to_integer_map = {}
integer_to_genre_map = {}
    
# Genre-to-integer:
for i, genre in enumerate(genres):
    genre_to_integer_map[genre] = i

# Integer-to-genre:
for i, genre in enumerate(genres):
    integer_to_genre_map[i] = genre

### Prediction processing functions

In [None]:
# Function to get predictions (as genre names) per segment (returns a list of predictions):
def get_predictions_per_segment(model, melspectrograms):    
    # If only a single segment's melspectrogram is given, add dimensions:
    if len(melspectrograms.shape) == 2:
        melspectrograms = np.expand_dims(melspectrograms, axis=0)
        
    # Getting raw predictions:
    predictions = np.argmax(model(melspectrograms), axis=1)
    # NOTE: The index of the maximum predicted class = Target integer of the predicted class
    
    # Getting cooked predictions:
    predictions = [integer_to_genre_map[p] for p in predictions]

    return predictions

#================================================
# Function to get the proportion of each predicted class from the overall predictions:
def get_predictions_overall(model, melspectrograms):
    predictions = get_predictions_per_segment(model, melspectrograms)

    # Getting counts for each prediction:
    predictions_tracker = {}
    for p in predictions:
        try:
            predictions_tracker[p] += 1
        except:
            predictions_tracker[p] = 1

    # Getting as proportions:
    total = float(len(predictions))
    for key in predictions_tracker:
        predictions_tracker[key] = predictions_tracker[key] / total

    return predictions_tracker

#================================================
# Function to display and return the results of `get_predictions_overall`:
def get_predictions_summary(model, melspectrograms, return_value=True):
    predictions_tracker = get_predictions_overall(model, melspectrograms)
    for key in predictions_tracker:
        print(f'Proportion of total predictions for class "{key}" = {predictions_tracker[key]}')
    
    if return_value:
        return predictions_tracker