In [6]:
%matplotlib inline

import os
import shutil
import random

from glob import glob
import numpy as np

from tensorflow.keras import layers,regularizers, optimizers
from tensorflow.keras import models
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LeakyReLU,Dense, Activation, Flatten, Dropout, BatchNormalization,Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import librosa
import librosa.display
import matplotlib.pyplot as plt
from matplotlib import figure


In [7]:
def create_spectrogram(filename,name,store_path):
    plt.interactive(False)
    clip, sample_rate = librosa.load(filename, sr=None)
    fig = plt.figure(figsize=[0.72,0.72])
    ax = fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    S = librosa.feature.melspectrogram(y=clip, sr=sample_rate)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    filename  = store_path + name + '.jpg'
    plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
    plt.close()    
    fig.clf()
    plt.close(fig)
    plt.close('all')
    del filename,name,clip,sample_rate,fig,ax,S

In [8]:
def convert_Audio_File_to_jpg_file(filename):
    # make dic for given class and their filepath name
    file_list=list(glob(filename + "\\*"))
    file_dic = {}
    for i,file in enumerate(file_list):
        all_files = []
        for root, dirs, files in os.walk(file):
            for file_ in files:
                # Join the root directory with the file name to get the full path
                all_files.append(os.path.join(root, file_))
        file_dic[file] = all_files
        
    # create file directory to store the converted audio file in to jpg
    file_path = []
    for file in file_dic.keys():
        file_rot = r'Convert_Music_Genre_File_to_jpg_file' + '\\' + file + '\\'
        file_path.append(file_rot)
        os.makedirs(file_rot, exist_ok=True)

    # Here each file is converted into jpg file using spectrogram and stored in above created directory.
    for i,folder in enumerate(file_dic.keys()):
        music_files = file_dic[folder]
        for file in music_files:
          create_spectrogram(file,file.split('\\')[-1],file_path[i])

In [9]:
convert_Audio_File_to_jpg_file('genres')

In [10]:
source_dir = r"C:\Users\WIN10\Desktop\IIT Kanpur ML AI Course\Deep Learning\Music Genre Recognition\Convert_Music_Genre_File_to_jpg_file\genres"
target_dir = r'C:\Users\WIN10\Desktop\IIT Kanpur ML AI Course\Deep Learning\Music Genre Recognition\genres_train_val_split_data\genres'
split_ratio = 0.8

def Train_Test_Split(source_dir,target_dir,split_ratio):
    # Define source and target directories
    train_dir = os.path.join(target_dir, 'train')
    val_dir = os.path.join(target_dir, 'val')
    
    # Create target directories if they don't exist
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(val_dir, exist_ok=True)
    
    # Get the list of class directories
    classes = [d for d in os.listdir(source_dir) if os.path.isdir(os.path.join(source_dir, d))]
        
    for class_name in classes:
        # Create class directories in train and val folders
        os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
        os.makedirs(os.path.join(val_dir, class_name), exist_ok=True)
        
        # Get list of images in the class directory
        class_dir = os.path.join(source_dir, class_name)
        images = [f for f in os.listdir(class_dir) if os.path.isfile(os.path.join(class_dir, f))]
        
        # Shuffle the images
        random.shuffle(images)
        
        # Compute the split point
        split_point = int(len(images) * split_ratio)
        
        # Split the images into training and validation sets
        train_images = images[:split_point]
        val_images = images[split_point:]
        
        # Move the images to the respective directories
        for img in train_images:
            shutil.copy(os.path.join(class_dir, img), os.path.join(train_dir, class_name, img))
        
        for img in val_images:
            shutil.copy(os.path.join(class_dir, img), os.path.join(val_dir, class_name, img))
    
    print("Data split completed successfully!")

In [11]:
Train_Test_Split(source_dir,target_dir,split_ratio)

Data split completed successfully!


In [12]:
WIDTH = 64
HEIGHT = 64
BATCH_SIZE = 32
TRAIN_DIR=r'genres_train_val_split_data/genres/train/'
val_dir = r'genres_train_val_split_data/genres/val/'

# data prep
train_datagen = ImageDataGenerator(
    rescale=1./255.,validation_split=0.25)


train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=(HEIGHT, WIDTH),
        batch_size=BATCH_SIZE,
        class_mode='categorical')

validation_gen = train_datagen.flow_from_directory(
    val_dir,target_size = (HEIGHT,WIDTH),
    batch_size = BATCH_SIZE,
    class_mode = 'categorical'
 )

Found 800 images belonging to 10 classes.
Found 200 images belonging to 10 classes.


In [13]:
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=(64,64,3)))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Conv2D(128, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
model.compile(optimizers.RMSprop(learning_rate=0.0005, decay=1e-6),loss="categorical_crossentropy",metrics=["accuracy"])
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [18]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size

model.fit(train_generator,validation_data=validation_gen,epochs=150)

Epoch 1/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 244ms/step - accuracy: 0.8495 - loss: 0.3771 - val_accuracy: 0.6350 - val_loss: 1.6115
Epoch 2/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 247ms/step - accuracy: 0.8779 - loss: 0.3352 - val_accuracy: 0.6650 - val_loss: 1.6054
Epoch 3/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 251ms/step - accuracy: 0.9033 - loss: 0.2730 - val_accuracy: 0.5900 - val_loss: 1.7581
Epoch 4/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 245ms/step - accuracy: 0.9153 - loss: 0.2529 - val_accuracy: 0.6150 - val_loss: 1.9061
Epoch 5/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 252ms/step - accuracy: 0.8885 - loss: 0.2973 - val_accuracy: 0.6300 - val_loss: 1.7696
Epoch 6/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 236ms/step - accuracy: 0.8905 - loss: 0.3439 - val_accuracy: 0.6350 - val_loss: 1.7691
Epoch 7/150
[1m25/25

[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 249ms/step - accuracy: 0.9741 - loss: 0.0766 - val_accuracy: 0.6100 - val_loss: 3.0267
Epoch 52/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 241ms/step - accuracy: 0.9634 - loss: 0.1311 - val_accuracy: 0.6150 - val_loss: 2.6839
Epoch 53/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 265ms/step - accuracy: 0.9651 - loss: 0.1211 - val_accuracy: 0.6250 - val_loss: 2.1897
Epoch 54/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 246ms/step - accuracy: 0.9797 - loss: 0.0590 - val_accuracy: 0.5900 - val_loss: 2.2623
Epoch 55/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 242ms/step - accuracy: 0.9703 - loss: 0.0701 - val_accuracy: 0.6200 - val_loss: 3.1232
Epoch 56/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 267ms/step - accuracy: 0.9798 - loss: 0.0640 - val_accuracy: 0.6000 - val_loss: 3.1660
Epoch 57/150
[1m25/25[0m [

[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 257ms/step - accuracy: 0.9782 - loss: 0.0670 - val_accuracy: 0.6350 - val_loss: 2.8287
Epoch 102/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 249ms/step - accuracy: 0.9890 - loss: 0.0272 - val_accuracy: 0.6000 - val_loss: 3.5565
Epoch 103/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 246ms/step - accuracy: 0.9820 - loss: 0.0615 - val_accuracy: 0.6300 - val_loss: 2.8189
Epoch 104/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 259ms/step - accuracy: 0.9758 - loss: 0.0763 - val_accuracy: 0.6250 - val_loss: 3.1578
Epoch 105/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 237ms/step - accuracy: 0.9882 - loss: 0.0340 - val_accuracy: 0.6250 - val_loss: 3.1510
Epoch 106/150
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 251ms/step - accuracy: 0.9879 - loss: 0.0553 - val_accuracy: 0.6000 - val_loss: 2.9671
Epoch 107/150
[1m25/25

[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 237ms/step - accuracy: 0.9768 - loss: 0.0924 - val_accuracy: 0.6500 - val_loss: 3.8397


<keras.src.callbacks.history.History at 0x2410d1ac610>