In [405]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
import seaborn as sns

In [406]:
# File Directory [Change to your own directory]
text_dir = '/Users/shounakkelkar/Downloads/Data/features_3_sec.csv'
# Read the csv and convert to dataframe(data)
data = pd.read_csv(text_dir)
# Print the row and columns of the dataframe
print(data.shape)
# Print the first 5 rows of the datafranme
print(data.head())

(9990, 60)
            filename  length  chroma_stft_mean  chroma_stft_var  rms_mean  \
0  blues.00000.0.wav   66149          0.335406         0.091048  0.130405   
1  blues.00000.1.wav   66149          0.343065         0.086147  0.112699   
2  blues.00000.2.wav   66149          0.346815         0.092243  0.132003   
3  blues.00000.3.wav   66149          0.363639         0.086856  0.132565   
4  blues.00000.4.wav   66149          0.335579         0.088129  0.143289   

    rms_var  spectral_centroid_mean  spectral_centroid_var  \
0  0.003521             1773.065032          167541.630869   
1  0.001450             1816.693777           90525.690866   
2  0.004620             1788.539719          111407.437613   
3  0.002448             1655.289045          111952.284517   
4  0.001701             1630.656199           79667.267654   

   spectral_bandwidth_mean  spectral_bandwidth_var  ...  mfcc16_var  \
0              1972.744388           117335.771563  ...   39.687145   
1          

In [407]:
# Extract SongID
# Extract songID for every 10th row
song_ids = data['filename'].iloc[::10].apply(lambda x: x.split('.')[1])
# Display the result
print(song_ids)

0       00000
10      00001
20      00002
30      00003
40      00004
        ...  
9940    00095
9950    00096
9960    00097
9970    00098
9980    00099
Name: filename, Length: 999, dtype: object


In [408]:
# Drop unused data (length)
# Filename is needed to do segmentation of the song
# Df is a copy from the original dataframe(data).
df = data.copy()
df = df.drop(['length'], axis = 1)

print(df.columns.tolist())

['filename', 'chroma_stft_mean', 'chroma_stft_var', 'rms_mean', 'rms_var', 'spectral_centroid_mean', 'spectral_centroid_var', 'spectral_bandwidth_mean', 'spectral_bandwidth_var', 'rolloff_mean', 'rolloff_var', 'zero_crossing_rate_mean', 'zero_crossing_rate_var', 'harmony_mean', 'harmony_var', 'perceptr_mean', 'perceptr_var', 'tempo', 'mfcc1_mean', 'mfcc1_var', 'mfcc2_mean', 'mfcc2_var', 'mfcc3_mean', 'mfcc3_var', 'mfcc4_mean', 'mfcc4_var', 'mfcc5_mean', 'mfcc5_var', 'mfcc6_mean', 'mfcc6_var', 'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean', 'mfcc8_var', 'mfcc9_mean', 'mfcc9_var', 'mfcc10_mean', 'mfcc10_var', 'mfcc11_mean', 'mfcc11_var', 'mfcc12_mean', 'mfcc12_var', 'mfcc13_mean', 'mfcc13_var', 'mfcc14_mean', 'mfcc14_var', 'mfcc15_mean', 'mfcc15_var', 'mfcc16_mean', 'mfcc16_var', 'mfcc17_mean', 'mfcc17_var', 'mfcc18_mean', 'mfcc18_var', 'mfcc19_mean', 'mfcc19_var', 'mfcc20_mean', 'mfcc20_var', 'label']


In [409]:
df.describe()

Unnamed: 0,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,rolloff_mean,rolloff_var,...,mfcc16_mean,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var
count,9990.0,9990.0,9990.0,9990.0,9990.0,9990.0,9990.0,9990.0,9990.0,9990.0,...,9990.0,9990.0,9990.0,9990.0,9990.0,9990.0,9990.0,9990.0,9990.0,9990.0
mean,0.379534,0.084876,0.130859,0.002676388,2199.219431,416672.7,2241.385959,118271.1,4566.076592,1628790.0,...,1.44824,49.988755,-4.198706,51.962753,0.739943,52.488851,-2.497306,54.973829,-0.917584,57.322614
std,0.090466,0.009637,0.068545,0.003585628,751.860611,434964.4,543.854449,101350.5,1642.065335,1489398.0,...,5.735149,34.442816,5.677379,36.400669,5.181313,38.17712,5.111799,41.585677,5.253243,46.444212
min,0.107108,0.015345,0.000953,4.379535e-08,472.741636,811.8813,499.16291,1183.52,658.336276,1145.102,...,-26.850016,1.325786,-27.809795,1.624544,-20.733809,3.437439,-27.448456,3.065302,-35.640659,0.282131
25%,0.315698,0.079833,0.083782,0.00061459,1630.680158,123196.1,1887.45579,48765.53,3378.31111,559551.4,...,-2.227478,29.584894,-7.951722,29.863448,-2.516638,29.636197,-5.734123,30.496412,-4.004475,30.011365
50%,0.384741,0.085108,0.121253,0.001491318,2208.628236,265069.2,2230.575595,89960.72,4631.377892,1160080.0,...,1.461623,41.702393,-4.443021,42.393583,0.733772,41.831377,-2.702366,43.435253,-1.030939,44.332155
75%,0.442443,0.091092,0.176328,0.003130862,2712.581884,562415.2,2588.340505,158567.4,5591.634521,2262437.0,...,5.149752,59.274619,-0.726945,61.676964,3.888734,62.033906,0.514246,65.328602,2.216603,68.210421
max,0.749481,0.120964,0.442567,0.03261522,5432.534406,4794119.0,3708.147554,1235143.0,9487.446477,12983200.0,...,39.144405,683.932556,34.048843,529.363342,36.970322,629.729797,31.365425,1143.230591,34.212101,910.473206


In [410]:
df.head()

Unnamed: 0,filename,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,rolloff_mean,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.0.wav,0.335406,0.091048,0.130405,0.003521,1773.065032,167541.630869,1972.744388,117335.771563,3714.560359,...,39.687145,-3.24128,36.488243,0.722209,38.099152,-5.050335,33.618073,-0.243027,43.771767,blues
1,blues.00000.1.wav,0.343065,0.086147,0.112699,0.00145,1816.693777,90525.690866,2010.051501,65671.875673,3869.682242,...,64.748276,-6.055294,40.677654,0.159015,51.264091,-2.837699,97.03083,5.784063,59.943081,blues
2,blues.00000.2.wav,0.346815,0.092243,0.132003,0.00462,1788.539719,111407.437613,2084.565132,75124.921716,3997.63916,...,67.336563,-1.76861,28.348579,2.378768,45.717648,-1.938424,53.050835,2.517375,33.105122,blues
3,blues.00000.3.wav,0.363639,0.086856,0.132565,0.002448,1655.289045,111952.284517,1960.039988,82913.639269,3568.300218,...,47.739452,-3.841155,28.337118,1.218588,34.770935,-3.580352,50.836224,3.630866,32.023678,blues
4,blues.00000.4.wav,0.335579,0.088129,0.143289,0.001701,1630.656199,79667.267654,1948.503884,60204.020268,3469.992864,...,30.336359,0.664582,45.880913,1.689446,51.363583,-3.392489,26.738789,0.536961,29.146694,blues


In [411]:
# This code is used to split data. Every genre have 100 song with 10 segment each,
# Get the songid for each song in every genre, then randomly select.
# A temp list(train_data & test_data) is used to store the part in each genre.
# Then it will append into train_data_all and tesdt_data_all to combine all the train and test data from all the genre
# Get the column name of the dataset
data_column_names = df.columns.values.tolist()
print(data_column_names)

# Get the training data
# Create a dataframe with same columns name but empty row
train_data =  pd.DataFrame(columns = df.columns)
train_data_all =  pd.DataFrame(columns = df.columns)

# Get the training data
# Create a dataframe with same columns name but empty row
test_data =  pd.DataFrame(columns = df.columns)
test_data_all =  pd.DataFrame(columns = df.columns)

['filename', 'chroma_stft_mean', 'chroma_stft_var', 'rms_mean', 'rms_var', 'spectral_centroid_mean', 'spectral_centroid_var', 'spectral_bandwidth_mean', 'spectral_bandwidth_var', 'rolloff_mean', 'rolloff_var', 'zero_crossing_rate_mean', 'zero_crossing_rate_var', 'harmony_mean', 'harmony_var', 'perceptr_mean', 'perceptr_var', 'tempo', 'mfcc1_mean', 'mfcc1_var', 'mfcc2_mean', 'mfcc2_var', 'mfcc3_mean', 'mfcc3_var', 'mfcc4_mean', 'mfcc4_var', 'mfcc5_mean', 'mfcc5_var', 'mfcc6_mean', 'mfcc6_var', 'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean', 'mfcc8_var', 'mfcc9_mean', 'mfcc9_var', 'mfcc10_mean', 'mfcc10_var', 'mfcc11_mean', 'mfcc11_var', 'mfcc12_mean', 'mfcc12_var', 'mfcc13_mean', 'mfcc13_var', 'mfcc14_mean', 'mfcc14_var', 'mfcc15_mean', 'mfcc15_var', 'mfcc16_mean', 'mfcc16_var', 'mfcc17_mean', 'mfcc17_var', 'mfcc18_mean', 'mfcc18_var', 'mfcc19_mean', 'mfcc19_var', 'mfcc20_mean', 'mfcc20_var', 'label']


In [412]:
# Extract song label(every genre)
labels = df['label'].unique()
print(labels)

['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop'
 'reggae' 'rock']


In [413]:
# For every genre in the label
for label in labels:
    # Extract the rows with the current label
    song_data_by_genre = df[df["label"]== label]

    # Change the value of the column filename, remove unused information blues.00000.0.wav -> 00000
    song_data_by_genre.loc[:, 'filename'] = song_data_by_genre['filename'].apply(lambda x: x.split('.')[1])

    # Get the list of all the filename(id) in the current genre
    song_id_by_genre = song_data_by_genre['filename'].unique()

    # Shuffle the ids
    np.random.shuffle(song_id_by_genre)

    # Split ids into training and testing data
    split_song_id_by_genre = int(len(song_id_by_genre) * 0.8)
    # Train takes the front 80% 
    song_train_id = song_id_by_genre[:split_song_id_by_genre]
    # Test take the last 20%
    song_test_id = song_id_by_genre[split_song_id_by_genre:]

    # Filter out the train data and test data according to the shuffled id
    train_data = song_data_by_genre[song_data_by_genre['filename'].isin(song_train_id)]
    test_data = song_data_by_genre[~song_data_by_genre['filename'].isin(song_train_id)]

    # Append to the training dataframe and test dataframe
    train_data_all = pd.concat([train_data_all, train_data], ignore_index = True)
    test_data_all = pd.concat([test_data_all, test_data], ignore_index = True)

# Check the split data
print(train_data_all.shape)
print(test_data_all.shape)

(7992, 59)
(1998, 59)


  train_data_all = pd.concat([train_data_all, train_data], ignore_index = True)
  test_data_all = pd.concat([test_data_all, test_data], ignore_index = True)


In [414]:
# Label Encoding

# All labels in the class
# labels = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]

# Initialize the Label Encoder
label_encoder = LabelEncoder()

# Transform the label column on training set
train_data_all['label'] = label_encoder.fit_transform(train_data_all['label'])
test_data_all['label'] = label_encoder.fit_transform(test_data_all['label'])

In [415]:
# Split X and y(label)
# Drop filename, it is not required in training, during training we train the model with segments, not as a song
X_train = train_data_all.drop(["label", "filename"], axis = 1)

y_train = train_data_all["label"]
y_test = test_data_all["label"]


print(X_train.shape)
print(y_train.shape)

(7992, 57)
(7992,)


In [416]:
scaler = StandardScaler()

# Fit the scaler on the data and transform it
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns = X_train.columns)
X_test = pd.DataFrame(scaler.fit_transform(X_test), columns = X_test.columns)

print(X_train.head())

   chroma_stft_mean  chroma_stft_var  rms_mean   rms_var  \
0         -0.492324         0.637011  0.000342  0.247086   
1         -0.407118         0.131553 -0.257978 -0.338130   
2         -0.365406         0.760195  0.023661  0.557702   
3         -0.178238         0.204729  0.031851 -0.056196   
4         -0.490399         0.335934  0.188311 -0.267157   

   spectral_centroid_mean  spectral_centroid_var  spectral_bandwidth_mean  \
0               -0.554056              -0.570685                -0.477589   
1               -0.495656              -0.752946                -0.408468   
2               -0.533342              -0.703528                -0.270411   
3               -0.711707              -0.702239                -0.501128   
4               -0.744679              -0.778643                -0.522502   

   spectral_bandwidth_var  rolloff_mean  rolloff_var  ...  mfcc16_mean  \
0               -0.005583     -0.503802    -0.359721  ...    -0.760593   
1               -0.508463   

In [417]:
X_train = X_train.values
y_train = y_train.values

In [418]:
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()
model.fit(X_train, y_train)
pred = model.predict(X_test)






In [419]:
from sklearn.metrics import classification_report, accuracy_score
accuracy = accuracy_score(y_test, pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Detailed classification report
print(classification_report(y_test, pred))

Accuracy: 55.31%
              precision    recall  f1-score   support

           0       0.43      0.27      0.33       200
           1       0.74      0.94      0.82       200
           2       0.40      0.51      0.45       198
           3       0.47      0.47      0.47       200
           4       0.66      0.42      0.51       200
           5       0.64      0.42      0.51       200
           6       0.53      0.95      0.68       200
           7       0.68      0.78      0.73       200
           8       0.48      0.34      0.40       200
           9       0.27      0.24      0.25       200

    accuracy                           0.53      1998
   macro avg       0.53      0.53      0.51      1998
weighted avg       0.53      0.53      0.51      1998

