# Music Genre Classifier

In [None]:
%pip install wget essentia

In [None]:
import os
import cv2
import wget
import random
import shutil
import tarfile
import numpy as np
import pandas as pd
import tensorflow as tf
from pylab import imshow
from essentia import Pool 
from pandas import DataFrame
from keras import Sequential
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from keras.losses import SparseCategoricalCrossentropy
from keras.layers import Flatten, Dense, Conv2D, MaxPool2D
from essentia.standard import FrameGenerator, MonoLoader, \
    Windowing, Spectrum, MFCC, UnaryOperator

In [None]:
SEED_VALUE = 42

# Fix seed to make training deterministic.
random.seed(SEED_VALUE)
np.random.seed(SEED_VALUE)
tf.random.set_seed(SEED_VALUE)

## Load the GTZAN Dataset

In [None]:
from google.colab import drive 
drive.mount('/content/gdrive')

In [None]:
PREPROCESSING = False

try:
    if 'dataset' not in os.listdir('.'):
        os.mkdir('./dataset/')
    os.chdir('./dataset/')

    if 'genres.tar.gz' not in os.listdir('.'):    
        if os.getenv('COLAB_RELEASE_TAG'):
            if 'preprocessing.tar.gz' in os.listdir('../gdrive/MyDrive'):
                shutil.copy2('../gdrive/MyDrive/preprocessing.tar.gz', '.')
                tar = tarfile.open('preprocesing.tar.gz', 'r:gz')
                tar.extractall()
                tar.close()
            else:
                # download the GTZAN dataset
                wget.download("https://huggingface.co/datasets/marsyas/gtzan/resolve/main/data/genres.tar.gz")
        elif not 'genres.tar.gz' in os.listdir('.'):
            raise Exception("Download the GTZAN dataset.")
        
    # extract all dataset
    if 'genres' not in os.listdir('.'):
        tar = tarfile.open('genres.tar.gz', 'r:gz')
        tar.extractall()
        tar.close()

    if 'preprocessing' not in os.listdir('.'):
        PREPROCESSING = True

    if PREPROCESSING:
        os.mkdir('./preprocessing')
        os.mkdir('./preprocessing/mfcc')
        os.mkdir('./preprocessing/mfcc_bands')
        os.mkdir('./preprocessing/mfcc_bands_log')

        # remove all unnecesary files
        for genre in os.listdir('./genres'):
            if genre.startswith('.'): 
                os.remove(f'./genres/{genre}')
                continue
            
            if PREPROCESSING:
                os.mkdir(f'./preprocessing/mfcc/{genre}')
                os.mkdir(f'./preprocessing/mfcc_bands/{genre}')
                os.mkdir(f'./preprocessing/mfcc_bands_log/{genre}')
            
            for wav in os.listdir(f'./genres/{genre}'):
                if wav.startswith('._'):
                    os.remove(f'./genres/{genre}/{wav}')

        for file in os.listdir('.'):
            if file.startswith('._'):
                os.remove(file)

finally:
    os.chdir('..')
    GENRES = os.listdir('./dataset/genres/')


## Feature Extraction

In [None]:
plt.rcParams['figure.figsize'] = (16, 9)

def extract_mfcc(src: str, dst: str, name: str, genre: str):
    # we start by instantiating the audio loader:
    loader = MonoLoader(filename=src)

    # and then we actually perform the loading:
    audio = loader()

    w = Windowing(type = 'hann')
    spectrum = Spectrum()  # FFT() would return the complex FFT, here we just want the magnitude spectrum
    mfcc = MFCC()
    
    logNorm = UnaryOperator(type='log')

    pool = Pool()

    for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512, startFromZero=True):
        mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame)))
        pool.add('lowlevel.mfcc', mfcc_coeffs)
        pool.add('lowlevel.mfcc_bands', mfcc_bands)
        pool.add('lowlevel.mfcc_bands_log', logNorm(mfcc_bands))

    imshow(pool['lowlevel.mfcc'].T[1:,:], aspect='auto', origin='lower', interpolation='none')
    plt.axis('off')
    plt.savefig(f'{dst}/mfcc/{genre}/{name}.png', bbox_inches='tight', pad_inches=0)

    imshow(pool['lowlevel.mfcc_bands'].T, aspect = 'auto', origin='lower', interpolation='none')
    plt.axis('off')
    plt.savefig(f'{dst}/mfcc_bands/{genre}/{name}.png', bbox_inches='tight', pad_inches=0)

    imshow(pool['lowlevel.mfcc_bands_log'].T, aspect = 'auto', origin='lower', interpolation='none')
    plt.axis('off')
    plt.savefig(f'{dst}/mfcc_bands_log/{genre}/{name}.png', bbox_inches='tight', pad_inches=0)

## Dataset Preprocessing

In [None]:
count = 1

# extract features from the data set
for genre in ['jazz', 'rock']:
    for wav in os.listdir(f'./dataset/genres/{genre}'):
        if PREPROCESSING:
            try: 
                extract_mfcc(
                    src=f'./dataset/genres/{genre}/{wav}', 
                    dst=f'./dataset/preprocessing',
                    name=wav.removesuffix('.wav'),
                    genre=genre
                )
                print(f'{count} - {wav} EXTRACTED')
                count += 1
            except:...


PREPROCESSING = False

In [None]:
def load_data(src: str, feature: str, test_size: float=0.3, 
              random_state: float = SEED_VALUE, shuffle: bool = True, 
              stratify: list = None):
    """
        Divide the data set into three subsets, the training set, \
        the test set and the validation set.
    """
    dataset = []

    for genre in os.listdir(f'{src}/{feature}'):
        for img in os.listdir(f'{src}/{feature}/{genre}'):
            img = np.array(cv2.imread(f'{src}/{feature}/{genre}/{img}'), dtype=np.float32)
            dataset.append([img, genre])

    df = DataFrame(data=np.array(dataset), columns=[feature, 'genre'])

    one_hot = pd.get_dummies(df['genre'])

    df = pd.concat([df, one_hot], axis=1)
    df.drop(['genre'], axis=1, inplace=True)
    
    strat = df[stratify] if stratify else None
    train_set, test_set = train_test_split(df, test_size=test_size, random_state=random_state, 
                     shuffle=shuffle, stratify=strat)
    
    strat = test_set[stratify] if stratify else None
    test_set, val_set = train_test_split(test_set, test_size=0.5, random_state=random_state, 
                     shuffle=shuffle, stratify=strat)

    return (np.array([tf.convert_to_tensor(img) for img in train_set['mfcc']]), train_set[GENRES]) \
        , (np.array([tf.convert_to_tensor(img) for img in test_set['mfcc']]), test_set[GENRES]) \
        , (np.array([tf.convert_to_tensor(img) for img in val_set['mfcc']]), val_set[GENRES])

## Dataset

In [None]:
(X_train, y_train), (X_test, y_test), (X_val, y_val) = load_data('./dataset/preprocessing/', 'mfcc', stratify=GENRES)

## CNN

In [None]:
model = Sequential()

model.add(Conv2D(filters=32, kernel_size=(3,3), input_shape=(326, 837, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(units=64, activation='relu'))
model.add(Dense(units=10, activation='softmax'))

model.compile(loss=SparseCategoricalCrossentropy(), 
              optimizer='adam', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:

history = model.fit(x=X_train, y=y_train, epochs=10, validation_data=(X_test, y_test))