<a href="https://colab.research.google.com/github/shrestha-bikash/Music_Genre_Classification/blob/main/Music_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Prepare and Preprocess Dataset

In [None]:
# Download dataset

! wget http://opihi.cs.uvic.ca/sound/genres.tar.gz

--2021-04-01 19:40:29--  http://opihi.cs.uvic.ca/sound/genres.tar.gz
Resolving opihi.cs.uvic.ca (opihi.cs.uvic.ca)... 142.104.68.135
Connecting to opihi.cs.uvic.ca (opihi.cs.uvic.ca)|142.104.68.135|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1225571541 (1.1G) [application/x-gzip]
Saving to: ‘genres.tar.gz’


2021-04-01 19:51:12 (1.82 MB/s) - ‘genres.tar.gz’ saved [1225571541/1225571541]



In [None]:
! tar -xzf genres.tar.gz

In [None]:
import librosa, librosa.display
import matplotlib.pyplot as plt
import numpy as np
import os
import math
import json

In [None]:
npy_path = 'music_genre.json'
SAMPLE_RATE = 22050
TRACK_DURATION = 30 # measured in seconds
SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION

def save_mfcc(dataset_path, json_path, num_mfcc=13, n_fft=2048, hop_length=512, num_segments=5):
    data = {
        "mapping": [],
        "labels": [],
        "mfcc": []
    }

    samples_per_segment = int(SAMPLES_PER_TRACK/num_segments)
    num_mfcc_per_segment = math.ceil(samples_per_segment/hop_length)

    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):

        if dirpath is not dataset_path:
            mapping_label = dirpath.split('/')[-1]
            data['mapping'].append(mapping_label)
            print('For label:', mapping_label)

            for f in filenames:
                # load audio file
                file_path = dirpath + '/' + f
                signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)

                # process all segments of audio file
                for d in range(num_segments):

                    # calculate start and finish sample for current segment
                    start = samples_per_segment * d
                    finish = start + samples_per_segment

                    # generate mfcc
                    mfcc = librosa.feature.mfcc(signal[start:finish], sr=sr, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
                    mfcc = mfcc.T

                    if len(mfcc) == num_mfcc_per_segment:
                        data["mfcc"].append(mfcc.tolist())
                        data["labels"].append(i-1)

    # np.savez(npz_path, data)
    with open(json_path, 'w') as f:
        json.dump(data, f, indent=4)    


save_mfcc('genres/', npy_path)

For label: classical
For label: reggae
For label: pop
For label: hiphop
For label: blues
For label: jazz
For label: rock
For label: disco
For label: country
For label: metal


In [None]:
import numpy as np

def get_data(path='music_genre.json'):
    with open(path, 'r') as f:
        data = json.load(f)
    # print(data)
    X = np.array(data["mfcc"])
    Y = np.array(data["labels"])

    return X, Y

X, Y = get_data()

In [None]:
print(X.shape)
print(len(Y))

(4998, 259, 13)
4998


In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
x_train, x_validation, y_train, y_validation = train_test_split(x_train, y_train, test_size=0.2)

print(x_train.shape)
print(x_validation.shape)
print(x_test.shape)

(3198, 259, 13)
(800, 259, 13)
(1000, 259, 13)


In [None]:
x_train = x_train[..., np.newaxis]
x_validation = x_validation[..., np.newaxis]
x_test = x_test[..., np.newaxis]

print(x_train.shape)
print(x_validation.shape)
print(x_test.shape)

(3198, 259, 13, 1)
(800, 259, 13, 1)
(1000, 259, 13, 1)


### Create Deep Learning Models

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.python.keras.layers import Input, Convolution2D, Flatten, Dense, Activation, MaxPooling2D, add, Dropout, BatchNormalization, GlobalAveragePooling2D, GlobalMaxPool2D

def cnn_model(input_shape):
    model = Sequential()

    model.add(BatchNormalization(input_shape=input_shape))
    model.add(Convolution2D(32, 3, padding='same', activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2), padding='same'))

    model.add(BatchNormalization())
    model.add(Convolution2D(32, 3, padding = 'same', activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2), padding='same'))

    model.add(BatchNormalization())
    model.add(Convolution2D(32, 2, padding = 'same', activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2), padding='same'))

    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.3))

    model.add(Dense(10, activation='softmax'))

    return model


In [None]:
(n, a, b, c) = x_train.shape
input_shape = (a, b, c)

model = cnn_model(input_shape)
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization (BatchNo (None, 259, 13, 1)        4         
_________________________________________________________________
conv2d (Conv2D)              (None, 259, 13, 32)       320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 130, 7, 32)        0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 130, 7, 32)        128       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 130, 7, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 65, 4, 32)         0         
_________________________________________________________________
batch_normalization_2 (Batch (None, 65, 4, 32)         1

In [None]:
batch = 32
epochs = 50

history = model.fit(x_train, y_train, validation_data=(x_validation, y_validation), batch_size=batch, epochs=epochs)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
def predict_data(model, x, y):

    # x = x[np.newaxis, ...]

    prediction = model.predict(x)

    predicted_class = np.argmax(prediction, axis=1)

    print('Actual class:', y)
    print('Predicted class:', predicted_class)

x = x_test
y = y_test

predict_data(model, x, y)

Actual class: [0 3 9 3 8 8 1 1 9 4 5 7 5 2 5 6 2 2 3 0 6 6 5 2 5 0 5 4 1 5 5 8 0 2 4 9 7
 7 6 4 5 3 0 1 3 0 6 2 8 3 1 7 5 8 7 5 3 5 4 3 8 1 0 9 6 3 9 9 9 4 3 3 1 2
 0 7 6 9 8 7 8 4 8 6 4 0 1 2 0 9 5 8 9 8 6 8 6 6 3 8 5 0 2 4 2 3 3 5 8 7 2
 7 4 6 0 7 7 8 2 5 5 5 9 3 1 4 6 7 7 8 6 3 6 7 3 7 0 5 9 0 8 7 0 8 8 0 8 9
 1 0 7 9 6 7 3 9 5 3 2 8 0 4 4 4 8 1 6 7 8 7 2 3 4 5 0 1 8 8 9 4 6 8 8 0 5
 5 0 9 8 6 2 7 0 3 6 8 8 3 5 3 3 2 4 7 2 3 1 5 5 1 9 9 7 1 7 2 1 2 0 6 2 5
 4 6 6 4 1 6 0 7 7 9 2 7 1 7 4 2 8 1 9 2 6 0 1 7 4 1 1 1 7 4 8 8 4 1 6 5 3
 9 9 6 4 7 2 3 3 7 8 0 6 4 5 2 9 6 8 0 4 5 7 2 0 9 8 4 5 9 1 7 2 0 8 5 1 2
 6 1 3 8 5 1 6 8 1 7 5 8 8 1 1 5 4 7 1 4 8 2 1 0 3 2 5 3 1 8 1 1 8 3 5 6 5
 9 9 6 6 7 6 5 1 5 6 2 3 2 9 0 8 0 4 7 2 3 7 3 0 4 9 4 6 0 7 6 3 4 5 5 4 3
 4 1 0 7 9 6 5 3 7 5 7 5 7 1 2 5 7 6 3 5 5 9 3 5 8 8 6 8 6 4 4 7 1 5 4 1 9
 0 5 6 0 9 5 4 4 0 2 7 3 5 6 6 4 6 8 9 1 4 4 6 3 4 4 4 7 9 2 4 8 5 5 7 1 9
 8 3 2 9 6 2 1 2 8 7 0 6 5 7 7 8 0 4 1 4 9 0 8 2 0 7 5 4 6 4 7 5 2 3 0 2 6
 4 5 1 7 8 