In [None]:
import pandas as pd
import numpy as np
import librosa
import os
import glob
from keras.models import Sequential
from keras.layers import Dense, Input, Dropout, Flatten
from keras.wrappers.scikit_learn import KerasClassifier
import keras
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from keras.layers import Conv2D, MaxPooling2D, SimpleRNN
import tensorflow as tf
from keras.layers.normalization import BatchNormalization
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.naive_bayes import GaussianNB

In [None]:
#1 2D_CONVOLUTIONAL NEURAL NETWORKS  -- # MAX ACC = 67%

# Getting the path of the data
data_dir = '../input/gtzan-dataset-music-genre-classification/Data/images_original'
img_height = 255
img_width = 255
batch_size = 10

# Creating the train and test dataset
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_height),
  batch_size=batch_size)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_height),
  batch_size=batch_size)

# Creating the model
def build_CNN_model():
    model = Sequential()
    model.add(tf.keras.layers.experimental.preprocessing.Rescaling(1./255, input_shape=(img_height, img_width, 3)))
    model.add(Conv2D(32, kernel_size = (3,3),input_shape = (288, 432,3), padding = 'same' ,strides = 2, activation = 'relu'))
    model.add(MaxPooling2D(pool_size=(4,4)))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    
    model.add(Conv2D(64, kernel_size = (3,3),padding = 'same', strides = 2, activation = 'relu'))
    model.add(MaxPooling2D(pool_size=(4,4)))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    
    model.add(Conv2D(128, kernel_size = (3,3),padding = 'same' ,strides = 1, activation = 'relu'))
    model.add(MaxPooling2D(pool_size=(4,4)))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    
    model.add(Flatten())
    model.add(Dense(100, activation = 'relu'))
    model.add(Dense(10, activation = 'softmax'))
    
    model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer='adam', metrics=['accuracy'])
    
    return model

# Invoking the model and training it
cnn_model = build_CNN_model()
cnn_model.fit(train_ds, epochs = 150, validation_data = val_ds)

# Evaluating the model
cnn_model.evaluate(val_ds)

In [None]:
# DATA PROCESSING FOR THE MODELS
path = '../input/gtzan-dataset-music-genre-classification/Data/features_30_sec.csv'
file = pd.read_csv(path)
file = file.drop(['filename', 'length'], axis = 1)
y = file.label
X = file.drop(columns = ['label'])

# Processing the data(Standard scaling + Encoding)
scaler = StandardScaler()
X = scaler.fit_transform(np.array(X, dtype = float))
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=0, shuffle=True)
label_encoder = LabelEncoder()

# Label Encoding
transformed_train_y = label_encoder.fit_transform(train_y)
transformed_test_y = label_encoder.transform(test_y)

# One_hot Encoding
tr_y = pd.get_dummies(train_y)
te_y = pd.get_dummies(test_y)

In [None]:
tensor_train_X = tf.reshape(tf.convert_to_tensor(train_X), (800,58,1))
tensor_test_X = tf.reshape(tf.convert_to_tensor(test_X), (200, 58, 1))

# 2 1D_Covolutional NN --- MAX ACC = 66%

model = Sequential()

model.add(Conv1D(128, 8,strides = 2,kernel_regularizer=l2(0.02),\
                 input_shape=train_X.shape[1:], activation='sigmoid'))
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv1D(64, 6,strides = 2,kernel_regularizer=l2(0.02), activation='sigmoid'))
# model.add(MaxPooling1D(pool_size=4)) 
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv1D(32, 11,strides = 2, kernel_regularizer=l2(0.02), activation='sigmoid'))
# model.add(MaxPooling1D(pool_size=4)) 
model.add(BatchNormalization())
model.add(Dropout(0.1))

# model.add(Conv1D(16, 5, activation=LeakyReLU(0.25)))
# model.add(MaxPooling1D(pool_size=4)) 
# model.add(BatchNormalization())
# model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(128, activation='sigmoid'))
model.add(BatchNormalization())

model.add(Dense(64, activation='sigmoid')) 
model.add(BatchNormalization())

# model.add(Dense(32, activation='sigmoid')) 
# model.add(Dense(16, activation='sigmoid')) 
model.add(Dense(10,activation='softmax'))

model.compile(loss='categorical_crossentropy' , optimizer='adam' , metrics=['accuracy'])
History = model.fit(train_X, tr_y, batch_size = 20, epochs=150, verbose = 1, validation_data=(test_X, te_y))

In [None]:
# Creating the stoping by accuracy callback
from keras.callbacks import Callback
class EarlyStoppingByAccuracy(Callback):
    def __init__(self, monitor='accuracy', value=0.98, verbose=0):
        super(Callback, self).__init__()
        self.monitor = monitor
        self.value = value
        self.verbose = verbose

    def on_epoch_end(self, epoch, logs={}):
        current = logs.get(self.monitor)
        if current is None:
            warnings.warn("Early stopping requires %s available!" % self.monitor, RuntimeWarning)

        if current >= self.value:
            if self.verbose > 0:
                print("Epoch %05d: early stopping THR" % epoch)
            self.model.stop_training = True

In [None]:
# 3 Multi_layered Perceptron model ---- MAX ACCURACY = 83.5%
def build_MLP_model():
    model = Sequential([
        Dense(265, activation = 'relu'),
        BatchNormalization(),
        
        Dense(128, activation = 'relu'),
        BatchNormalization(),
        
        Dense(128, activation = 'relu'),
        BatchNormalization(),
        
        Dense(64, activation = 'relu'),
        BatchNormalization(),
        
        Dense(10, activation = 'softmax')])
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

MLP_model = build_MLP_model()

MLP_model.fit(train_X, transformed_train_y, batch_size = 10, epochs = 150, validation_data = (test_X, transformed_test_y),\
              callbacks = [EarlyStoppingByAccuracy(monitor = 'val_accuracy', value = 0.82)])

    
    

# MLP_model_loaded =  keras.models.load_model('../input/my-model/third_83.5_acc model.h5')
# MLP_model.summary()

In [None]:
# 4 Creating Some classifier models (LR, RandomForrest ... etc.)
rf = RandomForestClassifier(random_state = 0)
dt = DecisionTreeClassifier(max_depth = 18, random_state = 0)
lr = LogisticRegression(max_iter=10000)
s = LinearSVC(random_state = 0, max_iter=i)
KN = KNeighborsClassifier(n_neighbors = i)
gb = GaussianNB()
classi = VotingClassifier(estimators=[('lr', lr),('x', x), ('dt', dt), ('rf', rf)])
classi.fit(train_X, transformed_train_y)
preds_lr = classi.predict(test_X)
np.round(preds_lr)
test_acc = round(classi.score(test_X, transformed_test_y) * 100, 2)
print("Train Accuracy: ", round(classi.score(train_X, transformed_train_y) * 100, 2))
print("Test Accuracy: ", test_acc)

In [None]:
# 5 Creating an LSTM classifier model --- MAX ACCURACY = 71%
train_X = tf.reshape(tf.convert_to_tensor(train_X), (800,58,1))
test_X = tf.reshape(tf.convert_to_tensor(test_X), (200, 58, 1))
def build_lstm_model():
    model = Sequential()
    model.add(LSTM(units=128, dropout=0.05, recurrent_dropout=0.35, return_sequences=True))
    model.add(LSTM(units=32,  dropout=0.05, recurrent_dropout=0.35, return_sequences=False))
    model.add(Dense(units=10, activation="softmax"))
    model.compile(loss = "categorical_crossentropy", optimizer='adam', metrics=["accuracy"])
    return model

lstm_model = build_lstm_model()
lstm_model.fit(train_X, tr_y, epochs = 150, batch_size = 32,validation_data = (test_X, te_y))     

In [None]:
# 6 Creating an RNN  classifier model --- MAX ACCURACY = 63%
train_X = tf.reshape(tf.convert_to_tensor(train_X), (800,58,1))
test_X = tf.reshape(tf.convert_to_tensor(test_X), (200, 58, 1))
model = keras.Sequential()
model.add(SimpleRNN(units=256, input_shape=(train_X.shape[1],train_X.shape[2]), activation="tanh", dropout= 0.2, return_sequences=True))
model.add(SimpleRNN(units=128, input_shape=(train_X.shape[1],train_X.shape[2]), activation="tanh", dropout= 0.2))
model.add(Dense(64, activation="relu")) 
model.add(Dense(10, activation="softmax"))
model.compile(loss='mean_squared_error', optimizer='rmsprop' , metrics = ['accuracy'])
model.summary()
model.fit(train_X, tr_y, batch_size = 100, epochs = 150, validation_data = (test_X, te_y))

In [None]:
import numpy as np
import glob
import librosa
def create_spectogram(path):
    y, sr = librosa.load(path)
    spect = librosa.feature.melspectrogram(y=y, sr=sr,n_fft=2048, hop_length=1024)
    spect = librosa.power_to_db(spect, ref=np.max)
    return spect.T
data = np.empty((1000, 640, 128))
labels = np.empty((1000, 1))
path = '../input/gtzan-dataset-music-genre-classification/Data/genres_original'

genre_to_nums = {j: i for i, j in enumerate(os.listdir(path))}
nums_to_genre = {i: j for i, j in enumerate(os.listdir(path))}

for folder in os.listdir(path):
    new = path + "/" + folder + "/*.wav"
    for row, wav_file in enumerate(glob.glob(new)):
        data[row, :, :] = create_spectogram(wav_file).reshape((640, 128))
        labels[row, :] = genre_to_nums[folder]
        
# def plot_spect(track_id):
#     spect = create_spectogram(track_id)
#     print(spect.shape)
#     plt.figure(figsize=(10, 4))
#     librosa.display.specshow(spect.T, y_axis='mel', fmax=8000, x_axis='time')
#     plt.colorbar(format='%+2.0f dB')
#     plt.show()
    
