In [8]:
import sklearn
import json
import numpy as np

DATASET_PATH = "../../musicDataf.json"

def load_data(dataset_path):
    with open(dataset_path, "r") as fp:
        data = json.load(fp)

    #convert lists to numpy
    inputs = np.array(data["mfcc"])
    targets = np.array(data["labels"])

    return inputs, targets

In [9]:
inputs, targets = load_data(DATASET_PATH)

In [10]:
inputs.shape

(9996, 130, 13)

In [11]:
from sklearn.model_selection import train_test_split
in_train, in_test, tar_train, tar_test = train_test_split(inputs, targets, test_size=0.3)

In [12]:
import tensorflow as tf
from tensorflow import keras

In [13]:
from sklearn.model_selection import train_test_split
def prepare_dataset(test_size, validation_size):
    X, y = load_data(DATASET_PATH)

    # create train, validation and test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size)
    # print(X_train)
    # add an axis to input sets
    X_train = X_train[..., np.newaxis]
    # print(X_train)
    X_validation = X_validation[..., np.newaxis]
    X_test = X_test[..., np.newaxis]

    return X_train, X_validation, X_test, y_train, y_validation, y_test

In [14]:
def build_model(input_shape):
    model = keras.Sequential()

    # 1st conv layer
    model.add(keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    # 2nd conv layer
    model.add(keras.layers.Conv2D(32, (3, 3), activation='relu'))
    model.add(keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    # 3rd conv layer
    model.add(keras.layers.Conv2D(32, (2, 2), activation='relu'))
    model.add(keras.layers.MaxPooling2D((2, 2), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    # flatten output and feed it into dense layer
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dropout(0.3))

    # output layer
    model.add(keras.layers.Dense(10, activation='softmax'))

    return model

In [15]:
X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_dataset(0.25, 0.2)

# create network
X_train.shape, X_test.shape, X_validation.shape

((5997, 130, 13, 1), (2499, 130, 13, 1), (1500, 130, 13, 1))

In [16]:
input_shape = (X_train.shape[1], X_train.shape[2], 1)
model = build_model(input_shape)

# compile model
optimiser = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimiser,
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])

In [17]:
model.fit(X_train,y_train, validation_data=(X_validation,y_validation), batch_size=32, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x2459c6eee50>

In [18]:
model.evaluate(X_test,y_test)



[0.8150926828384399, 0.7591036558151245]

In [22]:
model.save("cnn_mfcc_model_final")

INFO:tensorflow:Assets written to: cnn_mfcc_model_final\assets


INFO:tensorflow:Assets written to: cnn_mfcc_model_final\assets


In [6]:
import tensorflow as tf
svmodel = tf.keras.models.load_model("cnn_mfcc_model_final")

In [1]:
import pickle
# pickle.dump(svmodel, open("./model_saved",'wb'))

In [2]:
model_loaded = pickle.load(open("./model_saved",'rb'))



In [7]:
model_loaded.history

AttributeError: 'NoneType' object has no attribute 'history'

In [3]:
model_loaded.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 128, 11, 32)       320       
                                                                 
 max_pooling2d (MaxPooling2  (None, 64, 6, 32)         0         
 D)                                                              
                                                                 
 batch_normalization (Batch  (None, 64, 6, 32)         128       
 Normalization)                                                  
                                                                 
 conv2d_1 (Conv2D)           (None, 62, 4, 32)         9248      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 31, 2, 32)         0         
 g2D)                                                            
                                                        

In [1]:
import numpy as np
import librosa
import os
import json
import math

SAMPLE_RATE = 22050
DURATION = 30
SAMPLE_PER_TRACK = SAMPLE_RATE * DURATION
num_segments = 10
n_mfcc=13
n_fft=2048
hop_length=512
num_samples_per_segment = int(SAMPLE_PER_TRACK / num_segments)
expected_num_mfcc_vectors_per_segment = math.ceil(num_samples_per_segment/hop_length)

In [14]:
import soundfile as sf

In [2]:
file_path = "C:/Users/Anurag/Downloads/wellerman.wav"
signal, sr = librosa.load(file_path,sr=SAMPLE_RATE)
print(signal)

[0. 0. 0. ... 0. 0. 0.]


In [13]:
file_path = "C:/anurag_work/MusicGenreClassification/data/genres_original/classical/classical.00004.wav"
try:
    signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)
except:
    print("shit")
    pass

mfcc_data = []

for s in range(num_segments):
    start_sample = num_samples_per_segment * s
    finish_sample = start_sample + num_samples_per_segment


    mfcc = librosa.feature.mfcc(y=signal[start_sample:finish_sample], n_fft=n_fft, hop_length=hop_length, n_mfcc=n_mfcc, sr=sr)
    mfcc = mfcc.T

    #  store mfccs for segment igf it has expected vector length
    if len(mfcc) == expected_num_mfcc_vectors_per_segment:
        mfcc_data.append(mfcc.tolist())

mfcc_data = np.array(mfcc_data)

y_pred = model_loaded.predict(mfcc_data)

predictions = {}
key = None
for i in y_pred:
    try:
        predictions[np.argmax(i)]+=1
        if key==None or predictions[np.argmax(i)]>predictions[key]:
            key = np.argmax(i)
    except:
        predictions[np.argmax(i)] = 1

class_list = ["blues","classical","country","disco","hiphop","jazz","metal","pop","reggae","rock"]
class_list[key]



'classical'