In [None]:
#Load necessary libraries
import IPython.display as ipd
import os
import csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from keras.models import Sequential, Model
from keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout, Conv2D, MaxPooling2D
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.utils import to_categorical
import librosa
import librosa.display

In [None]:
filename = 'C:/Users/bupes/OneDrive/Desktop/project/genres_original/blues/blues.00000.wav'
plt.figure(figsize=(14,5))
data,sample_rate=librosa.load(filename)
librosa.display.waveshow(data,sr=sample_rate)
ipd.Audio(filename) #To show which audio file we are playing
plt.xlabel('Time (seconds)')
plt.ylabel('Amplitude')
plt.title('Audio Waveform [Blues]')
plt.tight_layout()
plt.show()

In [None]:
from scipy.io import wavfile as wav
wave_sample_rate,wave_audio = wav.read(filename)
print("The sample rate is ",sample_rate)
print("The sample rate using the scipy library is ",wave_sample_rate)
print("The librosa data is ",data)
print("The scipy data is ",wave_audio)

In [None]:
#To check if the dataset is balanced or not
import pandas as pd
metadata = pd.read_csv('C:/Users/bupes/OneDrive/Desktop/project/features_30_sec.csv')
metadata.head(10)
metadata['label'].value_counts()

In [None]:
plt.figure(figsize = (12,4))
plt.plot(wave_audio)

In [None]:
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate tempo flux contrast flatness'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()
file = open('data.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
for g in genres:
    for filename in os.listdir('C:/Users/bupes/OneDrive/Desktop/project/genres_original/'+g):
        songname = 'C:/Users/bupes/OneDrive/Desktop/project/genres_original/'+ g + '/' + filename
        y, sr = librosa.load(songname, mono=True, duration=30)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        #rmse = librosa.feature.mfcc(y=y)

        #tempo = librosa.beat.tempo(y, sr=sr)[0]

        #flux = librosa.onset.onset_strength(y=y, sr=sr)
        #contrast = librosa.feature.spectral_contrast(y, sr=sr)
        #flatness = librosa.feature.spectral_flatness(y)

        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('data.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

In [None]:
  #spectrogram for Blues genre
  songname = 'C:/Users/bupes/OneDrive/Desktop/project/genres_original/blues/blues.00009.wav'
  y, sr = librosa.load(songname, mono=True, duration=30)
  X = librosa.stft(y)
  Xdb = librosa.amplitude_to_db(abs(X))
  plt.figure(figsize=(14, 5))
  librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
  plt.colorbar()
  print("Blues")

# Loading the saved dataset

In [None]:
df = pd.read_csv("data.csv")
df

In [None]:
print("The shape of the dataframe is",df.shape)
print(df.head(5))
print(type(df))
df = df.dropna(axis = 1)

In [None]:
x_df = df.iloc[:,1:-1]
y_df = df.iloc[:,-1]
#y_df_knn = y_df_knn.to_frame()

In [None]:
x_df

In [None]:
y_df

# Train-Test split for the dataset

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_df, y_df, test_size = 0.10, random_state = 0)


In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)

Different Kernels

In [None]:
#Linear 
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(x_train, y_train)

In [None]:
y_pred = classifier.predict(x_test)

In [None]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print("The accuracy score of prediction with SVM Model is ",str((accuracy*100).round(2))+"%")

In [None]:
#rbf Kernal

In [None]:
classifier2 = SVC(kernel = 'rbf', random_state = 0)
classifier2.fit(x_train, y_train)
y_pred2 = classifier2.predict(x_test)
accuracy2 = accuracy_score(y_test, y_pred2)
print("The accuracy score of prediction with SVM Model is ",str((accuracy2*100).round(2))+"%")

In [None]:
#poly kernal
classifier1 = SVC(kernel = 'poly', random_state = 0)
classifier1.fit(x_train, y_train)
y_pred1 = classifier1.predict(x_test)
accuracy1 = accuracy_score(y_test, y_pred1)
print("The accuracy score of prediction with SVM Model is ",str((accuracy1*100).round(2))+"%")

In [None]:
#Sigmoid Kernal
classifier3 = SVC(kernel = 'sigmoid', random_state = 0)
classifier3.fit(x_train, y_train)
y_pred3 = classifier3.predict(x_test)
accuracy3 = accuracy_score(y_test, y_pred3)
print("The accuracy score of prediction with SVM Model is ",str((accuracy3*100).round(2))+"%")

In [None]:
##simple feedforward neural network 

# Label encoding for y_train
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)

# Example using TensorFlow/Keras for a simple neural network
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Determine the input shape
your_input_shape = x_train.shape[1]

model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(your_input_shape,)))
model.add(Dense(10, activation='softmax'))  # Assuming 10 genres
# Label encoding for y_test
y_test_encoded = label_encoder.transform(y_test)

# Compile and train the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train_encoded, epochs=10, batch_size=32, validation_data=(x_test, y_test_encoded))

In [None]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(x_test, y_test_encoded)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')


In [None]:
#simpler implementation of a CNN with basic training and evaluation
import numpy as np
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import LabelEncoder

# Assuming x_train and x_test are already defined
# Assuming y_train and y_test are already defined

# Label encoding for y_train and y_test
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Reshape the data for CNN
x_train_cnn = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))  # Adjusted input shape
x_test_cnn = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

# CNN model architecture
model_cnn = Sequential()
model_cnn.add(Conv1D(32, kernel_size=3, activation='relu', input_shape=(x_train.shape[1], 1)))  # Adjusted input shape
model_cnn.add(MaxPooling1D(pool_size=2))
model_cnn.add(Flatten())
model_cnn.add(Dense(128, activation='relu'))
model_cnn.add(Dense(10, activation='softmax'))

# Compile and train the CNN model
model_cnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_cnn.fit(x_train_cnn, y_train_encoded, epochs=10, batch_size=32, validation_data=(x_test_cnn, y_test_encoded))

# Evaluate the CNN model on the test set
test_loss_cnn, test_accuracy_cnn = model_cnn.evaluate(x_test_cnn, y_test_encoded)
print(f'Test Loss (CNN): {test_loss_cnn}, Test Accuracy (CNN): {test_accuracy_cnn}')


In [None]:
#more complex CNN by incorporating hyperparameter tuning, dropout for regularization, 
#and a more sophisticated model architecture. Code aims to find the best combination of hyperparameters 
#for improved performance.

# Label encoding for y_train and y_test
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Convert the encoded labels to categorical format
num_classes = len(label_encoder.classes_)
y_train_categorical = to_categorical(y_train_encoded, num_classes=num_classes)
y_test_categorical = to_categorical(y_test_encoded, num_classes=num_classes)

# Reshape the data for CNN
x_train_cnn = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))  # Adjusted input shape
x_test_cnn = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

# Define the 1D CNN model
def create_cnn_model(input_shape, learning_rate=0.001, dropout_rate=0.25):
    model = Sequential()
    model.add(Conv1D(32, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    
    model.add(Conv1D(64, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    
    model.add(Conv1D(128, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    
    model.add(Flatten())
    
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(dropout_rate))
    
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(dropout_rate))
    
    model.add(Dense(10, activation='softmax'))  # Assuming 10 classes for the output layer
    
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

# Hyperparameter tuning
learning_rates = [0.001, 0.01, 0.1]
batch_sizes = [32, 64, 128]

best_model = None
best_accuracy = 0.0

# Use the shape of a single example in the dataset to determine the input shape
input_shape = x_train_cnn[0].shape

for lr in learning_rates:
    for batch_size in batch_sizes:
        cnn_model = create_cnn_model(input_shape, learning_rate=lr)
        
        # Train the model
        cnn_history = cnn_model.fit(x_train_cnn, y_train_categorical, epochs=10, batch_size=batch_size,
                                     validation_split=0.2, callbacks=[EarlyStopping(patience=3)])
        
        # Evaluate on the test set
        test_loss, test_accuracy = cnn_model.evaluate(x_test_cnn, y_test_categorical)
        
        print(f"Learning Rate: {lr}, Batch Size: {batch_size}")
        print(f"Test Accuracy: {test_accuracy}")
        
        # Check if this model is better
        if test_accuracy > best_accuracy:
            best_accuracy = test_accuracy
            best_model = cnn_model

# Use the best model for predictions or further analysis
print("Best Model Summary:")
best_model.summary()


In [None]:
#LeNet-5:
# Label encoding for y_train and y_test
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Convert the encoded labels to categorical format
num_classes = len(label_encoder.classes_)
y_train_categorical = to_categorical(y_train_encoded, num_classes=num_classes)
y_test_categorical = to_categorical(y_test_encoded, num_classes=num_classes)

# Reshape the data for CNN
x_train_cnn = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))  # Adjusted input shape
x_test_cnn = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

# Define the LeNet-5 model
def create_lenet5_model(input_shape, learning_rate=0.001, dropout_rate=0.25):
    model = Sequential()
    model.add(Conv1D(6, kernel_size=5, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(16, kernel_size=5, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(120, activation='relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(84, activation='relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(10, activation='softmax'))  # Assuming 10 classes for the output layer
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Hyperparameter tuning
learning_rates = [0.001, 0.01, 0.1]
batch_sizes = [32, 64, 128]

best_model = None
best_accuracy = 0.0

# Use the shape of a single example in the dataset to determine the input shape
input_shape = x_train_cnn[0].shape

for lr in learning_rates:
    for batch_size in batch_sizes:
        cnn_model = create_lenet5_model(input_shape, learning_rate=lr)
        
        # Train the model
        cnn_history = cnn_model.fit(x_train_cnn, y_train_categorical, epochs=10, batch_size=batch_size,
                                     validation_split=0.2, callbacks=[EarlyStopping(patience=3)])
        
        # Evaluate on the test set
        test_loss, test_accuracy = cnn_model.evaluate(x_test_cnn, y_test_categorical)
        
        print(f"Learning Rate: {lr}, Batch Size: {batch_size}")
        print(f"Test Accuracy: {test_accuracy}")
        
        # Check if this model is better
        if test_accuracy > best_accuracy:
            best_accuracy = test_accuracy
            best_model = cnn_model

# Use the best model for predictions or further analysis
print("Best Model Summary:")
best_model.summary()

In [None]:
# Label encoding for y_train and y_test
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Convert the encoded labels to categorical format
num_classes = len(label_encoder.classes_)
y_train_categorical = to_categorical(y_train_encoded, num_classes=num_classes)
y_test_categorical = to_categorical(y_test_encoded, num_classes=num_classes)

# Reshape the data for CNN
x_train_cnn = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))  # Adjusted input shape
x_test_cnn = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

from keras.layers import GlobalAveragePooling1D

# Define the AlexNet model
def create_alexnet_model(input_shape, learning_rate=0.001, dropout_rate=0.25):
    model = Sequential()
    print(f"Input Shape: {input_shape}")
    model.add(Conv1D(96, kernel_size=11, strides=1, activation='relu', input_shape=input_shape))
    print(f"Conv1D_1 Output Shape: {model.output_shape}")
    model.add(MaxPooling1D(pool_size=3, strides=1))
    print(f"MaxPooling1D_1 Output Shape: {model.output_shape}")
    model.add(Conv1D(256, kernel_size=5, activation='relu'))
    print(f"Conv1D_2 Output Shape: {model.output_shape}")
    model.add(MaxPooling1D(pool_size=3, strides=1))
    print(f"MaxPooling1D_2 Output Shape: {model.output_shape}")
    model.add(Conv1D(384, kernel_size=3, activation='relu'))
    print(f"Conv1D_3 Output Shape: {model.output_shape}")
    model.add(Conv1D(384, kernel_size=3, activation='relu'))
    print(f"Conv1D_4 Output Shape: {model.output_shape}")
    model.add(Conv1D(256, kernel_size=3, activation='relu'))
    print(f"Conv1D_5 Output Shape: {model.output_shape}")
    model.add(GlobalAveragePooling1D())
    print(f"GlobalAveragePooling1D Output Shape: {model.output_shape}")
    model.add(Dense(4096, activation='relu'))
    print(f"Dense_1 Output Shape: {model.output_shape}")
    model.add(Dropout(dropout_rate))
    model.add(Dense(4096, activation='relu'))
    print(f"Dense_2 Output Shape: {model.output_shape}")
    model.add(Dropout(dropout_rate))
    model.add(Dense(num_classes, activation='softmax'))  # Assuming 'num_classes' for the output layer
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model


# Hyperparameter tuning
learning_rates = [0.001, 0.01, 0.1]
batch_sizes = [32, 64, 128]

best_model = None
best_accuracy = 0.0

# Use the shape of a single example in the dataset to determine the input shape
input_shape = x_train_cnn[0].shape

for lr in learning_rates:
    for batch_size in batch_sizes:
        alexnet_model = create_alexnet_model(input_shape, learning_rate=lr)
        
        # Train the model
        alexnet_history = alexnet_model.fit(x_train_cnn, y_train_categorical, epochs=10, batch_size=batch_size,
                                           validation_split=0.2, callbacks=[EarlyStopping(patience=3)])
        
        # Evaluate on the test set
        test_loss, test_accuracy = alexnet_model.evaluate(x_test_cnn, y_test_categorical)
        
        print(f"Learning Rate: {lr}, Batch Size: {batch_size}")
        print(f"Test Accuracy: {test_accuracy}")
        
        # Check if this model is better
        if test_accuracy > best_accuracy:
            best_accuracy = test_accuracy
            best_model = alexnet_model

# Use the best model for predictions or further analysis
print("Best Model Summary:")
best_model.summary()

In [None]:
# Label encoding for y_train and y_test
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Convert the encoded labels to categorical format
num_classes = len(label_encoder.classes_)
y_train_categorical = to_categorical(y_train_encoded, num_classes=num_classes)
y_test_categorical = to_categorical(y_test_encoded, num_classes=num_classes)

# Reshape the data for CNN
x_train_cnn = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))  # Adjusted input shape
x_test_cnn = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

def create_vggnet_model(input_shape, learning_rate=0.001, dropout_rate=0.25):
    model = Sequential()
    model.add(Conv1D(64, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(128, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(10, activation='softmax'))  # Assuming 10 classes for the output layer
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Hyperparameter tuning
learning_rates = [0.001, 0.01, 0.1]
batch_sizes = [32, 64, 128]

best_model = None
best_accuracy = 0.0

# Use the shape of a single example in the dataset to determine the input shape
input_shape = x_train_cnn[0].shape

for lr in learning_rates:
    for batch_size in batch_sizes:
        cnn_model = create_vggnet_model(input_shape, learning_rate=lr)
        
        # Train the model
        cnn_history = cnn_model.fit(x_train_cnn, y_train_categorical, epochs=10, batch_size=batch_size,
                                     validation_split=0.2, callbacks=[EarlyStopping(patience=3)])
        
        # Evaluate on the test set
        test_loss, test_accuracy = cnn_model.evaluate(x_test_cnn, y_test_categorical)
        
        print(f"Learning Rate: {lr}, Batch Size: {batch_size}")
        print(f"Test Accuracy: {test_accuracy}")
        
        # Check if this model is better
        if test_accuracy > best_accuracy:
            best_accuracy = test_accuracy
            best_model = cnn_model

# Use the best model for predictions or further analysis
print("Best Model Summary:")
best_model.summary()

In [None]:
def create_googlenet_model(input_shape, learning_rate=0.001, dropout_rate=0.25):
    def inception_module(layer_in, f1, f2_in, f2_out, f3_in, f3_out, f4_out):
        conv1 = Conv1D(f1, kernel_size=1, activation='relu')(layer_in)
        conv3_1 = Conv1D(f2_in, kernel_size=1, activation='relu')(layer_in)
        conv3_3 = Conv1D(f2_out, kernel_size=3, padding='same', activation='relu')(conv3_1)
        conv5_1 = Conv1D(f3_in, kernel_size=1, activation='relu')(layer_in)
        conv5_5 = Conv1D(f3_out, kernel_size=5, padding='same', activation='relu')(conv5_1)
        pool = MaxPooling1D(pool_size=3, strides=1, padding='same')(layer_in)
        pool_conv = Conv1D(f4_out, kernel_size=1, activation='relu')(pool)
        layer_out = concatenate([conv1, conv3_3, conv5_5, pool_conv], axis=-1)
        return layer_out

    input_layer = Input(shape=input_shape)
    conv1 = Conv1D(64, kernel_size=7, strides=2, activation='relu')(input_layer)
    pool1 = MaxPooling1D(pool_size=3, strides=2, padding='same')(conv1)
    conv2 = Conv1D(192, kernel_size=3, activation='relu')(pool1)
    pool2 = MaxPooling1D(pool_size=3, strides=2, padding='same')(conv2)

    # Add the inception module directly to the model
    inception1 = inception_module(pool2, 64, 128, 128, 32, 32, 32)
    
    pool3 = MaxPooling1D(pool_size=2, strides=2, padding='same')(inception1)
    inception2 = inception_module(pool3, 128, 192, 192, 96, 96, 96)
    pool4 = MaxPooling1D(pool_size=2, strides=2, padding='same')(inception2)
    inception3 = inception_module(pool4, 192, 384, 384, 192, 192, 192)
    inception4 = inception_module(inception3, 256, 384, 384, 192, 192, 192)
    pool5 = MaxPooling1D(pool_size=2, strides=2, padding='same')(inception4)

    flatten = Flatten()(pool5)
    dense1 = Dense(1024, activation='relu')(flatten)
    dropout = Dropout(dropout_rate)(dense1)
    output_layer = Dense(10, activation='softmax')(dropout)  # Assuming 10 classes for the output layer

    model = Model(inputs=input_layer, outputs=output_layer)
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Hyperparameter tuning
learning_rates = [0.001, 0.01, 0.1]
batch_sizes = [32, 64, 128]

best_model = None
best_accuracy = 0.0

# Use the shape of a single example in the dataset to determine the input shape
input_shape = x_train_cnn[0].shape

for lr in learning_rates:
    for batch_size in batch_sizes:
        cnn_model = create_googlenet_model(input_shape, learning_rate=lr)
        
        # Train the model
        cnn_history = cnn_model.fit(x_train_cnn, y_train_categorical, epochs=10, batch_size=batch_size,
                                     validation_split=0.2, callbacks=[EarlyStopping(patience=3)])
        
        # Evaluate on the test set
        test_loss, test_accuracy = cnn_model.evaluate(x_test_cnn, y_test_categorical)
        
        print(f"Learning Rate: {lr}, Batch Size: {batch_size}")
        print(f"Test Accuracy: {test_accuracy}")
        
        # Check if this model is better
        if test_accuracy > best_accuracy:
            best_accuracy = test_accuracy
            best_model = cnn_model

# Use the best model for predictions or further analysis
print("Best Model Summary:")
best_model.summary()


# 2D CNN


In [None]:
# Function to create and save spectrogram images
def save_spectrogram_image(y, sr, genre, filename):
    plt.interactive(False)
    fig = plt.figure(figsize=(10, 5))
    plt.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap='inferno', sides='default', mode='default', scale='dB');
    plt.axis('off')
    plt.savefig(filename, dpi=100, bbox_inches='tight', pad_inches=0)
    plt.close(fig)

In [None]:
# Data processing
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate tempo flux contrast flatness'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()
file = open('data_with_image_paths.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
for g in genres:
    for filename in os.listdir('C:/Users/bupes/OneDrive/Desktop/project/genres_original/'+g):
        songname = 'C:/Users/bupes/OneDrive/Desktop/project/genres_original/'+ g + '/' + filename
        y, sr = librosa.load(songname, mono=True, duration=30)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)

        # Save the spectrogram image
        save_spectrogram_image(y, sr, g, 'C:/Users/bupes/OneDrive/Desktop/images/' + filename[:-3] + 'png')

        # Append data to the CSV file
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g} {"C:/Users/bupes/OneDrive/Desktop/images/" + filename[:-3] + "png"}'
        with open('data_with_image_paths.csv', 'a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

In [None]:
# Load the saved dataset with image paths
image_metadata = pd.read_csv("data_with_image_paths.csv")
image_metadata.head()

In [None]:
# Load images and convert to numpy array
X = []
for img_path in image_metadata['mfcc17']:
    img = plt.imread(img_path)
    X.append(img)

X = np.array(X)

# Label encoding for y_train and y_test
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(image_metadata['label'])
y_categorical = to_categorical(y_encoded, num_classes=len(label_encoder.classes_))

# Train-Test split for the dataset
x_train, x_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=0)

In [None]:
# Define your CNN model
def create_cnn_model(input_shape, num_classes, learning_rate=0.001, dropout_rate=0.25):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(num_classes, activation='softmax'))
    
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
# Hyperparameter tuning
learning_rates = [0.001, 0.01, 0.1]
batch_sizes = [32, 64, 128]

best_model = None
best_accuracy = 0.0

for lr in learning_rates:
    for batch_size in batch_sizes:
        cnn_model = create_cnn_model(input_shape=x_train[0].shape, num_classes=len(label_encoder.classes_), learning_rate=lr)
        
        # Train the model
        cnn_history = cnn_model.fit(x_train, y_train, epochs=10, batch_size=batch_size,
                                    validation_split=0.2, callbacks=[EarlyStopping(patience=3)])
        
        # Evaluate on the test set
        test_loss, test_accuracy = cnn_model.evaluate(x_test, y_test)
        
        print(f"Learning Rate: {lr}, Batch Size: {batch_size}")
        print(f"Test Accuracy: {test_accuracy}")
        
        # Check if this model is better
        if test_accuracy > best_accuracy:
            best_accuracy = test_accuracy
            best_model = cnn_model

# Use the best model for predictions or further analysis
print("Best Model Summary:")
best_model.summary()