In [1]:
# feature extractoring and preprocessing data
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image
import pathlib
import csv

# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

#Keras
import keras

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Extracting music and features
'''
Dataset
We use GTZAN genre collection dataset for classification. 

The dataset consists of 10 genres i.e

Blues
Classical
Country
Disco
Hiphop
Jazz
Metal
Pop
Reggae
Rock
Each genre contains 100 songs. Total dataset: 1000 songs
'''

'\nDataset\nWe use GTZAN genre collection dataset for classification. \n\nThe dataset consists of 10 genres i.e\n\nBlues\nClassical\nCountry\nDisco\nHiphop\nJazz\nMetal\nPop\nReggae\nRock\nEach genre contains 100 songs. Total dataset: 1000 songs\n'

In [3]:
# Extracting the Spectrogram for every Audio

In [4]:
# cmap = plt.get_cmap('inferno')

# plt.figure(figsize=(10,10))
# genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
# for g in genres:
#     pathlib.Path(f'img_data/{g}').mkdir(parents=True, exist_ok=True)     
#     for filename in os.listdir(f'./MIR/genres/{g}'):
#         songname = f'./MIR/genres/{g}/{filename}'
#         y, sr = librosa.load(songname, mono=True, duration=5)
#         plt.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB');
#         plt.axis('off');
#         plt.savefig(f'img_data/{g}/{filename[:-3].replace(".", "")}.png')
#         plt.clf()

In [5]:
'''
Extracting features from Spectrogram
We will extract

Mel-frequency cepstral coefficients (MFCC)(20 in number)
Spectral Centroid,
Zero Crossing Rate
Chroma Frequencies
Spectral Roll-off.
'''

'\nExtracting features from Spectrogram\nWe will extract\n\nMel-frequency cepstral coefficients (MFCC)(20 in number)\nSpectral Centroid,\nZero Crossing Rate\nChroma Frequencies\nSpectral Roll-off.\n'

In [6]:
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

In [7]:
# Writing data to csv file

In [8]:
# file = open('data.csv', 'w', newline='')
# with file:
#     writer = csv.writer(file)
#     writer.writerow(header)
# genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
# for g in genres:
#     for filename in os.listdir(f'./MIR/genres/{g}'):
#         songname = f'./MIR/genres/{g}/{filename}'
#         y, sr = librosa.load(songname, mono=True, duration=30)
#         chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
#         rmse = librosa.feature.rmse(y=y)
#         spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
#         spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
#         rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
#         zcr = librosa.feature.zero_crossing_rate(y)
#         mfcc = librosa.feature.mfcc(y=y, sr=sr)
#         to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
#         for e in mfcc:
#             to_append += f' {np.mean(e)}'
#         to_append += f' {g}'
#         file = open('data.csv', 'a', newline='')
#         with file:
#             writer = csv.writer(file)
#             writer.writerow(to_append.split())


In [9]:
# Analysing the Data in Pandas

In [10]:
data = pd.read_csv('data.csv')
data.head()

Unnamed: 0,filename,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,blues.00060.au,0.430894,0.196222,1946.565652,1979.909934,3955.867746,0.097454,-67.77098,111.704184,-34.646105,...,12.295832,-12.477988,1.681278,-5.142068,4.644002,-6.919217,1.040718,-4.736871,-0.660037,blues
1,blues.00082.au,0.338896,0.25135,2141.461656,2168.01556,4627.997015,0.105151,-29.362093,108.66795,-25.573165,...,5.456504,-7.687713,7.4106,-11.319177,7.229288,-9.466552,1.930059,-6.328476,-1.304812,blues
2,blues.00030.au,0.263016,0.170081,1379.081742,2004.00085,3015.831764,0.039376,-206.98759,117.781468,23.256245,...,-8.015467,-17.616342,-8.138554,-8.646157,-15.538988,-15.331506,-9.664872,-10.10331,-17.8351,blues
3,blues.00007.au,0.307921,0.131785,1451.754147,1577.369917,2955.348796,0.061435,-179.395447,136.459244,-26.656359,...,-6.954827,-3.544535,-8.051242,-8.959537,-8.424337,-10.558885,-10.788159,-4.693749,-8.638613,blues
4,blues.00064.au,0.33248,0.117413,2553.232415,2280.128669,5148.102203,0.146852,-85.15025,88.806722,-16.322611,...,8.478453,-19.590226,6.41321,-13.779667,6.112037,-13.154644,3.933456,-7.615454,3.752626,blues


In [11]:
data.shape

(1000, 28)

In [12]:
# Dropping unneccesary columns
data = data.drop(['filename'],axis=1)

In [13]:
# Encoding the Labels

In [14]:
genre_list = data.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)

In [15]:
# Scaling the Feature columns

In [16]:
scaler = StandardScaler()
X = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float))

In [17]:
# Dividing data into training and Testing set

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [19]:
len(y_train)

800

In [20]:
len(y_test)

200

In [21]:
X_train[10]

array([-0.31095711, -0.63130939, -0.89219752, -0.88085844, -0.84994915,
       -0.92986696, -0.49200679,  0.83060467, -0.55356844,  0.6038008 ,
       -0.12639531, -0.15213737, -0.14892221,  0.05893087, -0.47106534,
       -0.20237211, -0.50131528,  0.10466511,  0.18508424, -0.2904705 ,
       -0.85180056, -0.16436001, -0.56321083,  0.17494237,  0.81199778,
       -1.07244846])

In [22]:
# Classification with Keras
# Building our Network

In [23]:
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],)))

model.add(layers.Dense(128, activation='relu'))

model.add(layers.Dense(64, activation='relu'))

model.add(layers.Dense(10, activation='softmax'))

2022-07-16 11:11:33.033381: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [24]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [25]:
history = model.fit(X_train,
                    y_train,
                    epochs=20,
                    batch_size=128)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [26]:
test_loss, test_acc = model.evaluate(X_test,y_test)



In [27]:
print('test_acc: ',test_acc)

test_acc:  0.6299999952316284


In [28]:
# Test accuracy is less than training data accuracy. This hints at Overfitting.

In [29]:
# Validating our approach
# Let's set apart 200 samples in our training data to use as a validation set:

In [30]:
x_val = X_train[:200]
partial_x_train = X_train[200:]

y_val = y_train[:200]
partial_y_train = y_train[200:]

In [31]:
# Now let's train our network for 30 epochs:

In [32]:
model = models.Sequential()
model.add(layers.Dense(512, activation='relu', input_shape=(X_train.shape[1],)))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(partial_x_train,
          partial_y_train,
          epochs=30,
          batch_size=512,
          validation_data=(x_val, y_val))
results = model.evaluate(X_test, y_test)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [33]:
results

[1.0999761819839478, 0.6150000095367432]

In [34]:
# Predictions on Test Data

In [35]:
predictions = model.predict(X_test)



In [36]:
predictions[0].shape

(10,)

In [37]:
np.sum(predictions[0])

1.0

In [38]:
np.argmax(predictions[0])

3

In [44]:
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
print(genres[3])
print(genres[y_test[0]])

disco
disco
