<a href="https://colab.research.google.com/github/konsteva/LSTM_Music_Genre_Classification/blob/main/New_Copy_of_Genre_Classification_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Check devices type

In [None]:
import tensorflow as tf
from tensorflow.python.client import device_lib

tf.test.gpu_device_name()
device_lib.list_local_devices()

# Mount Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Set up, Connect to Kaggle API and Download the Dataset

In [None]:
!pip install kaggle

In [None]:
! mkdir ~/.kaggle

In [None]:
! cp /content/drive/MyDrive/kaggle.json ~/.kaggle/

In [None]:
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
! kaggle datasets download andradaolteanu/gtzan-dataset-music-genre-classification
! unzip /content/gtzan-dataset-music-genre-classification.zip

# Initializing parameters

In [None]:
import math

dataset_path = "/content/Data/genres_original"
json_path = "/content/extracted_data.json"
sample_rate = 22050
n_fft = 2048
num_mfcc = 13
hop_length = 512
track_duration = 30  # measured in seconds
samples_per_track = sample_rate * track_duration
num_segments = 10
samples_per_segment = int(samples_per_track / num_segments)
num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / hop_length)

In [None]:
import os
import librosa

# dictionary to save the extracted features
data = {
    "mapping": [],
    "labels": [],
    "mfcc": []
}

# loop through all genre sub-folder
for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):

    # ensure we're processing a genre sub-folder level
    if dirpath is not dataset_path:

        # save genre label (i.e., sub-folder name) in the mapping
        semantic_label = dirpath.split("\\")[-1]
        data["mapping"].append(semantic_label)
        print("\nProcessing: {}".format(semantic_label))
        print("-"*70)

        # process all audio files in genre sub-dir
        for f in filenames:
                try:
                    # load audio file
                    file_path = os.path.join(dirpath, f)
                    signal, sample_rate = librosa.load(file_path, sr=sample_rate)

                    # process all segments of audio file
                    for d in range(num_segments):

                        # calculate start and finish sample for current segment
                        start = samples_per_segment * d
                        finish = start + samples_per_segment

                        # extract mfcc
                        mfcc = librosa.feature.mfcc(signal[start:finish], sr=sample_rate, n_mfcc=num_mfcc, n_fft=n_fft,
                                                    hop_length=hop_length)
                        mfcc = mfcc.T

                        # store only mfcc feature with expected number of vectors
                        if len(mfcc) == num_mfcc_vectors_per_segment:
                            data["mfcc"].append(mfcc.tolist())
                            data["labels"].append(i - 1)
                            print("{}, segment:{}".format(file_path.split("\\")[-1], d + 1))
                except:
                      print('Skipped song')


In [None]:
import json

# save MFCCs to json file
with open(json_path, "w") as fp:
    print('Writing the data in the .json file. This might take several minutes!')
    json.dump(data, fp, indent=4)

Writing the data in the .json file. This might take several minutes!


# Load and reform the data

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

with open(json_path, "r") as f:
    data = json.load(f)

# Make numpy arrays from dictionary
# 3D arrays (song, sample ,MFCC)
X = np.array(data["mfcc"])
y = np.array(data["labels"])

# Split data (train=80%, test=20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0, stratify=y)
# Split data (train=60%, validation=20%)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=0)


input_shape = X_train[0].shape

# Build and save model

In [None]:
import keras
from keras import regularizers
from tensorflow.keras.layers import LSTM, Dense, Dropout
from keras.models import load_model

model = keras.Sequential()
# Input Layer
model.add(LSTM(128,input_shape=input_shape))
model.add(Dropout(0.2))
# 1st Hidden Layer
model.add(Dense(128, activation='relu'))
# 2nd Hidden Layer
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.4))
# 3rd Hidden Layer
model.add(Dense(48, activation='relu'))
model.add(Dropout(0.4))
# Output Layer
model.add(Dense(24, activation='softmax'))

model.summary()

model.compile(optimizer='adam', loss='SparseCategoricalCrossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=150, batch_size=20, validation_data=(X_val, y_val), shuffle=False)
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print('\nTest accuracy:', test_acc)

model_name = 'Genre_model.h5'
model.save(model_name)

# Visualizing the results

In [None]:
from matplotlib import pyplot as plt

# Plot train and validation set accuracy
plt.figure(0)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy Plot')
plt.legend()

# Plot train and validation set error
plt.figure(1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Error')
plt.title('Error Plot')
plt.legend()