# Music Genre Classifier


In [2]:
%pip install wget
import os
import cv2
import wget
import random
import shutil
import tarfile
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from pandas import DataFrame
from keras import Sequential
from sklearn.model_selection import train_test_split, KFold
from keras.layers import Conv2D, UpSampling2D, MaxPooling2D, Input, Cropping2D, Flatten, Dense, Reshape

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
SEED_VALUE = 42

# Fix seed to make training deterministic.
random.seed(SEED_VALUE)
np.random.seed(SEED_VALUE)
tf.random.set_seed(SEED_VALUE)

## Load the GTZAN Dataset


In [4]:
if os.getenv('COLAB_RELEASE_TAG'):
    from google.colab import drive 
    drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [5]:
PREPROCESSING = False

try:
    if 'dataset' not in os.listdir('/content'):
        os.mkdir('/content/dataset/')
    pwd = os.getcwd()
    os.chdir('/content/dataset/')

    if 'preprocessing.tar.gz' not in os.listdir('.'):
        if os.getenv('COLAB_RELEASE_TAG'):
            if 'preprocessing.tar.gz' in os.listdir('/content/gdrive/MyDrive'):
                shutil.copy2('/content/gdrive/MyDrive/preprocessing.tar.gz', '.')
            else:
                # make sure to download the GTZAN dataset from "https://drive.google.com/file/d/1UdmqcrBw71EgOtCLy6ic_C6EDBz9KQt_/view?usp=share_link"
                # upload it to your own google drive for it to be copied in the previous if statement block
                pass
        else:
            if pwd != '/content/dataset':
                if 'preprocessing.tar.gz' in os.listdir(f'{pwd}/dataset'):
                    shutil.copy2(f'{pwd}/dataset/preprocessing.tar.gz', '.')
                else:
                    raise Exception("Download the GTZAN dataset preprocessed.")

        tar = tarfile.open('preprocessing.tar.gz', 'r:gz')
        tar.extractall()
        tar.close()

finally:
    GENRES = os.listdir('/content/dataset/preprocessing/mfcc')


## Dataset


In [6]:
def load_data(src: str, feature: str, 
              random_state: float = SEED_VALUE, shuffle: bool = True, 
              stratify: list = None):
    dataset = []
    for genre in os.listdir(f'{src}/{feature}'):
        for img in os.listdir(f'{src}/{feature}/{genre}'):
            img = cv2.imread(f'{src}/{feature}/{genre}/{img}')
            # img = cv2.resize(img, (256, 192))
            img = np.array(img, dtype=np.float32)
            dataset.append([img, genre])

    df = DataFrame(data=np.array(dataset, dtype=object), columns=[feature, 'genre'])

    one_hot = pd.get_dummies(df['genre'])

    df = pd.concat([df, one_hot], axis=1)
    df.drop(['genre'], axis=1, inplace=True)
    
    strat = df[stratify] if stratify else None
    train_set, test_set = train_test_split(df, test_size=0.5, random_state=random_state, 
                     shuffle=shuffle, stratify=strat)

    return (np.array([tf.convert_to_tensor(img) for img in train_set['mfcc']]), train_set[GENRES])\
          , (np.array([tf.convert_to_tensor(img) for img in train_set['mfcc']]), test_set[GENRES])

In [7]:
(X_train, _), (X_test, _) = load_data('./preprocessing/', 'mfcc', stratify=GENRES)

## CNN


In [8]:
def generate_autoencoder():
    input_l=Input(shape=(217,334,3))

    maxp_ini=MaxPooling2D((4,6), padding='same')(input_l)
    encoding_1=Conv2D(12, (3,3), activation='relu',padding='same')(maxp_ini)

    maxp_1=MaxPooling2D((2,2), padding='same')(encoding_1)
    encoding_2=Conv2D(6, (3,3), activation='relu',padding='same')(maxp_1)

    maxp_2=MaxPooling2D((2,2), padding='same')(encoding_2)
    encoding_3=Conv2D(3, (3,3), activation='relu',padding='same')(maxp_2)

    bottleneck=MaxPooling2D((2,2), padding='same')(encoding_3)

    decoding_2=Conv2D(3, (3,3), activation='relu', padding='same')(bottleneck)
    Up_2=UpSampling2D((2,2))(decoding_2)

    decoding_3=Conv2D(6, (3,3), activation='relu', padding='same')(Up_2)
    Up_3=UpSampling2D((2,2))(decoding_3)

    decoding_4=Conv2D(12, (3,3), activation='relu', padding='same')(Up_3)
    Up_4=UpSampling2D((2,2))(decoding_4)
    crop_1=Cropping2D(((0,1), (0,0)))(Up_4)

    decoding_5= Conv2D(3,(3,3),activation='sigmoid',padding='same')(crop_1)
    up_out=UpSampling2D((4,6))(decoding_5)
    output_l=Cropping2D(((0,3), (0,2)))(up_out)

    autoencoder=Model(inputs=[input_l],outputs=[output_l])

    autoencoder.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])

    return autoencoder

In [9]:
generate_autoencoder().summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 217, 334, 3)]     0         
                                                                 
 max_pooling2d (MaxPooling2D  (None, 55, 56, 3)        0         
 )                                                               
                                                                 
 conv2d (Conv2D)             (None, 55, 56, 12)        336       
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 28, 28, 12)       0         
 2D)                                                             
                                                                 
 conv2d_1 (Conv2D)           (None, 28, 28, 6)         654       
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 14, 14, 6)        0     

In [10]:
X_train = np.concatenate((X_train, X_test), axis=0)
X_train = X_train / 255

In [None]:
history = []

model_storage = '/content'

if os.getenv('COLAB_RELEASE_TAG'):
    model_storage = '/content/gdrive/MyDrive'

if 'model_storage' not in os.listdir(model_storage):
    os.mkdir(f'{model_storage}/model_storage')

autoencoder = generate_autoencoder()
last_iter = 0

if len(os.listdir(f'{model_storage}/model_storage')) > 0:
    last_iter = sorted([int(autoencoder.removeprefix(f'autoencoder_model_').removesuffix('.keras'))
        for autoencoder in os.listdir(f'{model_storage}/model_storage')], reverse=True)[0]
    autoencoder = tf.keras.saving.load_model(f'{model_storage}/model_storage/autoencoder_model_{last_iter}.keras')

for i in range(last_iter, 50):
    history.append(autoencoder.fit(x=X_train, y=X_train, epochs=10))
    autoencoder.save(f'{model_storage}/model_storage/autoencoder_model_{i}.keras')

kfold=KFold(n_splits=10, shuffle=True)
fold_no=1
final_errors_per_fold=[]

for train, _ in kfold.split(X_train, X_train):
    last_iter = 0

    autoencoder = generate_autoencoder()

    if len(os.listdir(f'{model_storage}/model_storage')) > 0:
        last_iter = sorted([int(autoencoder.removeprefix(f'autoencoder_model_{fold_no}_').removesuffix('.keras'))
            for autoencoder in os.listdir(f'{model_storage}/model_storage')], reverse=True)[0]
        autoencoder = tf.keras.saving.load_model(f'{model_storage}/model_storage/autoencoder_model_{fold_no}_{last_iter}.keras')

    for i in range(last_iter, 50):
        history.append(autoencoder.fit(x=X_train[train], y=X_train[train], epochs=10))
        autoencoder.save(f'{model_storage}/model_storage/autoencoder_model_{i}.keras')
    
    # Generate generalization metrics
    scores = autoencoder.evaluate(X_train, X_train, verbose=0)
    print(f'Score for fold {fold_no}: {autoencoder.metrics_names[0]} of {scores[0]}')
    final_errors_per_fold.append(scores[0])

    # Increase fold number
    fold_no = fold_no + 1

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10