# Music Genre Classifier


In [33]:
%pip install wget
import os
import cv2
import wget
import random
import shutil
import tarfile
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from pandas import DataFrame
from keras import Sequential
from sklearn.model_selection import train_test_split, KFold
from keras.layers import Conv2D, UpSampling2D, MaxPooling2D, Input, Cropping2D, Cropping3D, Flatten, Dense, Reshape

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [34]:
SEED_VALUE = 42

# Fix seed to make training deterministic.
random.seed(SEED_VALUE)
np.random.seed(SEED_VALUE)
tf.random.set_seed(SEED_VALUE)

## Load the GTZAN Dataset


In [35]:
if os.getenv('COLAB_RELEASE_TAG'):
    from google.colab import drive 
    drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [36]:
PREPROCESSING = False

try:
    if 'dataset' not in os.listdir('/content'):
        os.mkdir('/content/dataset/')
    pwd = os.getcwd()
    os.chdir('/content/dataset/')

    if 'preprocessing.tar.gz' not in os.listdir('.'):
        if os.getenv('COLAB_RELEASE_TAG'):
            if 'preprocessing.tar.gz' in os.listdir('/content/gdrive/MyDrive'):
                shutil.copy2('/content/gdrive/MyDrive/preprocessing.tar.gz', '.')
            else:
                # make sure to download the GTZAN dataset from "https://drive.google.com/file/d/1UdmqcrBw71EgOtCLy6ic_C6EDBz9KQt_/view?usp=share_link"
                # upload it to your own google drive for it to be copied in the previous if statement block
                pass
        else:
            if pwd != '/content/dataset':
                if 'preprocessing.tar.gz' in os.listdir(f'{pwd}/dataset'):
                    shutil.copy2(f'{pwd}/dataset/preprocessing.tar.gz', '.')
                else:
                    raise Exception("Download the GTZAN dataset preprocessed.")

        tar = tarfile.open('preprocessing.tar.gz', 'r:gz')
        tar.extractall()
        tar.close()

finally:
    GENRES = os.listdir('/content/dataset/preprocessing/mfcc')


## Dataset


In [37]:
def load_data(src: str, feature: str, 
              random_state: float = SEED_VALUE, shuffle: bool = True, 
              stratify: list = None):
    dataset = []
    for genre in os.listdir(f'{src}/{feature}'):
        for img in os.listdir(f'{src}/{feature}/{genre}'):
            img = cv2.imread(f'{src}/{feature}/{genre}/{img}')
            # img = cv2.resize(img, (256, 192))
            img = np.array(img, dtype=np.float32)
            dataset.append([img, genre])

    df = DataFrame(data=np.array(dataset, dtype=object), columns=[feature, 'genre'])

    one_hot = pd.get_dummies(df['genre'])

    df = pd.concat([df, one_hot], axis=1)
    df.drop(['genre'], axis=1, inplace=True)
    
    strat = df[stratify] if stratify else None
    train_set, test_set = train_test_split(df, test_size=0.5, random_state=random_state, 
                     shuffle=shuffle, stratify=strat)

    return (np.array([tf.convert_to_tensor(img) for img in train_set['mfcc']]), train_set[GENRES])\
          , (np.array([tf.convert_to_tensor(img) for img in train_set['mfcc']]), test_set[GENRES])

In [38]:
(X_train, _), (X_test, _) = load_data('./preprocessing/', 'mfcc', stratify=GENRES)

# for cross validation purposes it is encouraged to concatenate all input/output data, in this case, only input
X_train = np.concatenate((X_train, X_test), axis=0)
# this is just to normalize the values between 0 and 1
X_train = X_train / 255

## CNN Autoencoder


In [53]:
def generate_autoencoder():
    input_l=Input(shape=(217,334,3))

    maxp_ini=MaxPooling2D((4,6), padding='same')(input_l)
    encoding_1=Conv2D(12, (3,3), activation='relu',padding='same')(maxp_ini)

    maxp_1=MaxPooling2D((2,2), padding='same')(encoding_1)
    encoding_2=Conv2D(6, (3,3), activation='relu',padding='same')(maxp_1)

    maxp_2=MaxPooling2D((2,2), padding='same')(encoding_2)
    encoding_3=Conv2D(3, (3,3), activation='relu',padding='same')(maxp_2)

    bottleneck=MaxPooling2D((2,2), padding='same')(encoding_3)

    decoding_2=Conv2D(3, (3,3), activation='relu', padding='same')(bottleneck)
    Up_2=UpSampling2D((2,2))(decoding_2)

    decoding_3=Conv2D(6, (3,3), activation='relu', padding='same')(Up_2)
    Up_3=UpSampling2D((2,2))(decoding_3)

    decoding_4=Conv2D(12, (3,3), activation='relu', padding='same')(Up_3)
    Up_4=UpSampling2D((2,2))(decoding_4)
    crop_1=Cropping2D(((0,1), (0,0)))(Up_4)

    decoding_5= Conv2D(3,(3,3),activation='sigmoid',padding='same')(crop_1)
    up_out=UpSampling2D((4,6))(decoding_5)
    output_l=Cropping2D(((0,3), (0,2)))(up_out)

    autoencoder=Model(inputs=[input_l],outputs=[output_l])

    autoencoder.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])

    return autoencoder

In [54]:
generate_autoencoder().summary()

Model: "model_28"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_32 (InputLayer)       [(None, 217, 334, 3)]     0         
                                                                 
 max_pooling2d_16 (MaxPoolin  (None, 55, 56, 3)        0         
 g2D)                                                            
                                                                 
 conv2d_28 (Conv2D)          (None, 55, 56, 12)        336       
                                                                 
 max_pooling2d_17 (MaxPoolin  (None, 28, 28, 12)       0         
 g2D)                                                            
                                                                 
 conv2d_29 (Conv2D)          (None, 28, 28, 6)         654       
                                                                 
 max_pooling2d_18 (MaxPoolin  (None, 14, 14, 6)        0  

In [42]:
model_storage = '/content'

if os.getenv('COLAB_RELEASE_TAG'):
    model_storage = '/content/gdrive/MyDrive'

if 'model_storage' not in os.listdir(model_storage):
    os.mkdir(f'{model_storage}/model_storage')

## Training and Cross Validating

In [None]:
history = []

autoencoder = generate_autoencoder()
last_iter = 0

if len(os.listdir(f'{model_storage}/model_storage')) > 0:
    last_iter = sorted([int(autoencoder.removeprefix(f'autoencoder_model_').removesuffix('.keras'))
        for autoencoder in os.listdir(f'{model_storage}/model_storage') if autoencoder[0]=='a'], reverse=True)[0]
    autoencoder = tf.keras.saving.load_model(f'{model_storage}/model_storage/autoencoder_model_{last_iter}.keras')
    last_iter+=1 #this is for not overwriting the last saved model

for i in range(last_iter, 50):
    history.append(autoencoder.fit(x=X_train, y=X_train, epochs=10))
    autoencoder.save(f'{model_storage}/model_storage/autoencoder_model_{i}.keras')

kfold=KFold(n_splits=10, shuffle=True)
fold_no=1
final_errors_per_fold=[]

for train, test in kfold.split(X_train, X_train):
    last_iter = 0

    autoencoder = generate_autoencoder()

    if f'fold_{fold_no}' not in os.listdir(f'{model_storage}/model_storage'):
        os.mkdir(f'{model_storage}/model_storage/fold_{fold_no}')

    if len(os.listdir(f'{model_storage}/model_storage/fold_{fold_no}')) > 0:
        last_iter = sorted([int(autoencoder.removeprefix(f'autoencoder_model_').removesuffix('.keras'))
            for autoencoder in os.listdir(f'{model_storage}/model_storage/fold_{fold_no}')], reverse=True)[0]
        autoencoder = tf.keras.saving.load_model(f'{model_storage}/model_storage/fold_{fold_no}/autoencoder_model_{last_iter}.keras')
        last_iter+=1 #this is for not overwriting the last saved model

    for i in range(last_iter, 50):
        history.append(autoencoder.fit(x=X_train[train], y=X_train[train], epochs=10))
        autoencoder.save(f'{model_storage}/model_storage/fold_{fold_no}/autoencoder_model_{i}.keras')

    # Generate generalization metrics
    scores = autoencoder.evaluate(X_train[test], X_train[test], verbose=0)
    print(f'Score for fold {fold_no}: {autoencoder.metrics_names[0]} of {scores[0]}')
    final_errors_per_fold.append(scores[0])

    # Increase fold number
    fold_no = fold_no + 1

Score for fold 1: loss of 0.002612202661111951
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10

## Encoder

In [52]:
# if necessary, download this precise autoencoder model from https://drive.google.com/file/d/1oWJtDdYOnT9rVYcnzlTBC3SJEBq41p-H/view?usp=sharing
if 'autoencoder_model_49.keras' not in os.listdir(f'{model_storage}/model_storage'):
    print(f'Not finished optimizing: autoencoder_model_49.keras not found in {model_storage}/model_storage.')
else:
    autoencoder = tf.keras.saving.load_model(f'{model_storage}/model_storage/autoencoder_model_49.keras')
    encoder_input_l = Input(shape=(217,334,3))
    res_1 = autoencoder.layers[1](encoder_input_l)
    res_2 = autoencoder.layers[2](res_1)
    res_3 = autoencoder.layers[3](res_2)
    res_4 = autoencoder.layers[4](res_3)
    res_5 = autoencoder.layers[5](res_4)
    res_6 = autoencoder.layers[6](res_5)
    res_7 = autoencoder.layers[7](res_6)
    # For some reason the first channel of the every encoding returns 0, so we'll take it out
    reshape_l1 = Reshape(target_shape=(7, 7, 3, 1)) (res_7)
    cropping_l = Cropping3D(((0, 0), (0, 0), (1, 0))) (reshape_l1)
    reshape_l2 = Reshape(target_shape=(7, 7, 2, 1)) (cropping_l)
    encoder_output_l = Flatten()(reshape_l2)
    encoder=Model(inputs=[encoder_input_l],outputs=[encoder_output_l])
    # encoder is already saved at https://drive.google.com/file/d/1rijbXEhqughprwFi-N00csI1tRkLr-p8/view?usp=sharing
    encoder.save(f'{model_storage}/model_storage/encoder_model.keras')

    print(encoder(X_train[:1])) # test




tf.Tensor(
[[0.48886928 0.42168018 0.48452303 0.3872243  0.4562108  0.38743228
  0.46328247 0.40348303 0.4656453  0.4006004  0.468429   0.39216167
  0.49295703 0.43239418 0.38917184 0.5591275  0.37219787 0.5076032
  0.36908293 0.49033305 0.35652226 0.4892758  0.34592035 0.494827
  0.34394217 0.49516535 0.34770793 0.50194806 0.44187835 0.63676345
  0.38193563 0.6137222  0.39638764 0.5849194  0.41019332 0.5839056
  0.41821408 0.57456815 0.39890087 0.5881196  0.4075479  0.5838381
  0.48673102 0.7150291  0.47722837 0.68987393 0.4776282  0.6995964
  0.4931775  0.7197758  0.48436156 0.726348   0.466303   0.7223091
  0.4516628  0.7179196  0.5034404  0.76586473 0.52083033 0.7368886
  0.5622438  0.7444346  0.5575884  0.7239367  0.5556451  0.7423751
  0.5439823  0.74527556 0.54464996 0.7154896  0.52632797 0.47846657
  0.4837418  0.37268925 0.51215875 0.35443795 0.5183003  0.3947485
  0.5279131  0.3932616  0.54134476 0.34726214 0.5170207  0.33671856
  0.4031814  0.21641904 0.39854756 0.25519097 0