## Importing Libraries

In [4]:
# Key functions mainly by Ruinan Ma, started from Code by Varrel Tantio (for data augumentation) and ideas from George Tzanetakis
# Should run on kaggle, since there are issues in my local pydub lib.
# References: 
# https://www.cs.cmu.edu/~gtzan/work/pubs/tsap02gtzan.pdf
# https://www.kaggle.com/datasets/andradaolteanu/gtzan-dataset-music-genre-classification
# https://www.kaggle.com/code/varreltantio/music-genre-feature-extraction-classification

import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
import csv
import base64
from IPython.display import HTML
# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import sklearn.model_selection as skms
#Keras
import keras
from keras import models
from keras import layers
from tensorflow import keras
from warnings import filterwarnings
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/gtzan-genre-collection/genres/disco/disco.00031.au
/kaggle/input/gtzan-genre-collection/genres/disco/disco.00091.au
/kaggle/input/gtzan-genre-collection/genres/disco/disco.00043.au
/kaggle/input/gtzan-genre-collection/genres/disco/disco.00014.au
/kaggle/input/gtzan-genre-collection/genres/disco/disco.00066.au
/kaggle/input/gtzan-genre-collection/genres/disco/disco.00049.au
/kaggle/input/gtzan-genre-collection/genres/disco/disco.00005.au
/kaggle/input/gtzan-genre-collection/genres/disco/disco.00093.au
/kaggle/input/gtzan-genre-collection/genres/disco/disco.00034.au
/kaggle/input/gtzan-genre-collection/genres/disco/disco.00087.au
/kaggle/input/gtzan-genre-collection/genres/disco/disco.00004.au
/kaggle/input/gtzan-genre-collection/genres/disco/disco.00029.au
/kaggle/input/gtzan-genre-collection/genres/disco/disco.00075.au
/kaggle/input/gtzan-genre-collection/genres/disco/disco.00064.au
/kaggle/input/gtzan-genre-collection/genres/disco/disco.00025.au
/kaggle/input/gtzan-genre

In [5]:
!ls /kaggle

input  lib  working


In [6]:
general_path = '../input/gtzan-genre-collection'
print(list(os.listdir(f'{general_path}/genres/')))

['disco', 'metal', 'reggae', 'blues', 'rock', 'classical', 'jazz', 'hiphop', 'country', 'pop']


Before we split the audio files make empty directories for each genre

In [7]:
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'
genres = genres.split()

for g in genres:
  path_audio = os.path.join('/kaggle/working/content/audio3sec',f'{g}')
  os.makedirs(path_audio)

# Data Augumentation: 
### Split the original 30-seconds audios to 3-seconds audios, and do training and testing on 3-seconds audios instead

In [8]:
from pydub import AudioSegment
from tqdm import tqdm
i = 0
for g in genres:
    j=0
    for filename in tqdm(os.listdir(f'{general_path}/genres/{g}')):
        song = f'{general_path}/genres/{g}/{filename}'
        j = j+1
        for w in range(0,10):
            i = i+1
            t1 = 3*(w)*1000
            t2 = 3*(w+1)*1000
            newAudio = AudioSegment.from_file(song, 'au')
            new = newAudio[t1:t2]
            new.export(f'/kaggle/working/content/audio3sec/{g}/{g+str(j)+str(w)}.wav', format="wav") 

100%|██████████| 100/100 [03:03<00:00,  1.84s/it]
100%|██████████| 100/100 [03:04<00:00,  1.84s/it]
100%|██████████| 100/100 [03:05<00:00,  1.86s/it]
100%|██████████| 100/100 [03:05<00:00,  1.86s/it]
100%|██████████| 100/100 [03:04<00:00,  1.84s/it]
100%|██████████| 100/100 [03:03<00:00,  1.83s/it]
100%|██████████| 100/100 [03:03<00:00,  1.84s/it]
100%|██████████| 100/100 [03:03<00:00,  1.83s/it]
100%|██████████| 100/100 [03:03<00:00,  1.83s/it]
100%|██████████| 100/100 [03:03<00:00,  1.83s/it]


# Feature Extraction

In [9]:
##### should be defined in feature.py #####

# Since there is a bug in my system to include a certain library, 
# I directly copy this code to my kaggle workspace to do feature extraction.


def feature(path, audio):
    # Load the audio data
    y, sr = librosa.load(path)
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    rms = librosa.feature.rms(y=y)
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zero_crossing_rate = librosa.feature.zero_crossing_rate(y)
    harmony = librosa.effects.harmonic(y=y)
    tempo = librosa.beat.tempo(y=y,sr=sr)
    flatness = librosa.feature.spectral_flatness(y=y)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    label = audio.split('.')[0]
    # Create a dataframe with the features
    data = {"filename": audio,
            "chroma_stft_mean": np.mean(chroma_stft),
            "chroma_stft_var": np.var(chroma_stft),
            "rms_mean": np.mean(rms),
            "rms_var": np.var(rms),
            "spectral_centroid_mean": np.mean(spectral_centroid),
            "spectral_centroid_var": np.var(spectral_centroid),
            "spectral_bandwidth_mean": np.mean(spectral_bandwidth),
            "spectral_bandwidth_var": np.var(spectral_bandwidth),
            "rolloff_mean": np.mean(rolloff),
            "rolloff_var": np.var(rolloff),
            "zero_crossing_rate_mean": np.mean(zero_crossing_rate),
            "zero_crossing_rate_var": np.var(zero_crossing_rate),
            "harmony_mean": np.mean(harmony),
            "harmony_var": np.var(harmony),
            "tempo_mean": np.mean(tempo),
            "tempo_var": np.var(tempo),
            "flatness_mean": np.mean(flatness),
            "flatness_var": np.mean(flatness),
            "mfcc1_mean": np.mean(mfccs[0]),
            "mfcc1_var": np.var(mfccs[0]),
            "mfcc2_mean": np.mean(mfccs[1]),
            "mfcc2_var": np.var(mfccs[1]),
            "mfcc3_mean": np.mean(mfccs[2]),
            "mfcc3_var": np.var(mfccs[2]),
            "mfcc4_mean": np.mean(mfccs[3]),
            "mfcc4_var": np.var(mfccs[3]),
            "mfcc5_mean": np.mean(mfccs[4]),
            "mfcc5_var": np.var(mfccs[4]),
            "mfcc6_mean": np.mean(mfccs[5]),
            "mfcc6_var": np.var(mfccs[5]),
            "mfcc7_mean": np.mean(mfccs[6]),
            "mfcc7_var": np.var(mfccs[6]),
            "mfcc8_mean": np.mean(mfccs[7]),
            "mfcc8_var": np.var(mfccs[7]),
            "mfcc9_mean": np.mean(mfccs[8]),
            "mfcc9_var": np.var(mfccs[8]),
            "mfcc10_mean": np.mean(mfccs[9]),
            "mfcc10_var": np.var(mfccs[9]),
            "mfcc11_mean": np.mean(mfccs[10]),
            "mfcc11_var": np.var(mfccs[10]),
            "mfcc12_mean": np.mean(mfccs[11]),
            "mfcc12_var": np.var(mfccs[11]),
            "mfcc13_mean": np.mean(mfccs[12]),
            "mfcc13_var": np.var(mfccs[12]),
            "mfcc14_mean": np.mean(mfccs[13]),
            "mfcc14_var": np.var(mfccs[13]),
            "mfcc15_mean": np.mean(mfccs[14]),
            "mfcc15_var": np.var(mfccs[14]),
            "mfcc16_mean": np.mean(mfccs[15]),
            "mfcc16_var": np.var(mfccs[15]),
            "mfcc17_mean": np.mean(mfccs[16]),
            "mfcc17_var": np.var(mfccs[16]),
            "mfcc18_mean": np.mean(mfccs[17]),
            "mfcc18_var": np.var(mfccs[17]),
            "mfcc19_mean": np.mean(mfccs[18]),
            "mfcc19_var": np.var(mfccs[18]),
            "mfcc20_mean": np.mean(mfccs[19]),
            "mfcc20_var": np.var(mfccs[19]),
            "label": label
            }
    return data

In [13]:
header = ['filename', 'chroma_stft_mean', 'chroma_stft_var', 'rms_mean',
       'rms_var', 'spectral_centroid_mean', 'spectral_centroid_var',
       'spectral_bandwidth_mean', 'spectral_bandwidth_var', 'rolloff_mean',
       'rolloff_var', 'zero_crossing_rate_mean', 'zero_crossing_rate_var',
       'harmony_mean', 'harmony_var', 'tempo_mean', 'tempo_var', 'flatness_mean',
       'flatness_var', 'mfcc1_mean', 'mfcc1_var', 'mfcc2_mean', 'mfcc2_var',
       'mfcc3_mean', 'mfcc3_var', 'mfcc4_mean', 'mfcc4_var', 'mfcc5_mean',
       'mfcc5_var', 'mfcc6_mean', 'mfcc6_var', 'mfcc7_mean', 'mfcc7_var',
       'mfcc8_mean', 'mfcc8_var', 'mfcc9_mean', 'mfcc9_var', 'mfcc10_mean',
       'mfcc10_var', 'mfcc11_mean', 'mfcc11_var', 'mfcc12_mean', 'mfcc12_var',
       'mfcc13_mean', 'mfcc13_var', 'mfcc14_mean', 'mfcc14_var', 'mfcc15_mean',
       'mfcc15_var', 'mfcc16_mean', 'mfcc16_var', 'mfcc17_mean', 'mfcc17_var',
       'mfcc18_mean', 'mfcc18_var', 'mfcc19_mean', 'mfcc19_var', 'mfcc20_mean',
       'mfcc20_var', 'label']
print(type(header), len(header))

<class 'list'> 60


In [14]:
file = open('data-3s.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
from tqdm import tqdm    
for g in genres:
    for filename in tqdm(os.listdir(f'/kaggle/working/content/audio3sec/{g}')):
        songname = f'/kaggle/working/content/audio3sec/{g}/{filename}'
        features = feature(songname, filename)
        to_append = [features[d] for d in header]
        file = open('data-3s.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append)        

100%|██████████| 1000/1000 [05:37<00:00,  2.96it/s]
100%|██████████| 1000/1000 [05:35<00:00,  2.98it/s]
100%|██████████| 1000/1000 [05:35<00:00,  2.98it/s]
100%|██████████| 1000/1000 [05:35<00:00,  2.98it/s]
100%|██████████| 1000/1000 [05:35<00:00,  2.98it/s]
100%|██████████| 1000/1000 [05:35<00:00,  2.98it/s]
100%|██████████| 1000/1000 [05:36<00:00,  2.97it/s]
100%|██████████| 1000/1000 [05:35<00:00,  2.98it/s]
100%|██████████| 1000/1000 [05:32<00:00,  3.00it/s]
100%|██████████| 1000/1000 [05:35<00:00,  2.98it/s]


## Data Preparation

In [15]:
data_3s = pd.read_csv('data-3s.csv')
data_3s.head()

Unnamed: 0,filename,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,rolloff_mean,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues810.wav,0.266717,0.091159,0.067603,0.000382,718.186379,43897.372947,1250.744213,75543.709434,992.929124,...,39.790016,-17.315372,55.94787,-11.601478,32.9158,-1.419717,46.05692,7.421787,79.10219,blues810
1,blues408.wav,0.214856,0.088592,0.163221,0.007192,973.313121,46802.449463,1719.137506,85492.147835,1831.895846,...,23.157764,-9.054044,28.358007,-7.412502,38.389027,-9.103967,34.785324,-12.626125,16.607857,blues408
2,blues652.wav,0.348218,0.096253,0.086131,0.003488,1552.410808,227017.750188,1659.347207,58910.160318,3130.182354,...,68.48949,-6.59731,78.24664,-5.778485,89.38062,9.248326,168.04453,7.589011,168.99272,blues652
3,blues568.wav,0.404134,0.077326,0.211266,0.002832,1989.807594,83200.565639,2014.831258,40459.160485,3960.701435,...,43.447445,-7.004477,27.45833,1.654551,28.001158,-6.260219,30.18679,-2.245863,36.343906,blues568
4,blues770.wav,0.324225,0.083822,0.243702,0.003717,1963.528709,65431.113113,2115.324771,26177.174226,4304.404485,...,19.525223,-3.578293,23.694075,5.232549,24.88118,-4.767172,55.85287,-1.364094,27.123945,blues770


In [16]:
data_3s.columns

Index(['filename', 'chroma_stft_mean', 'chroma_stft_var', 'rms_mean',
       'rms_var', 'spectral_centroid_mean', 'spectral_centroid_var',
       'spectral_bandwidth_mean', 'spectral_bandwidth_var', 'rolloff_mean',
       'rolloff_var', 'zero_crossing_rate_mean', 'zero_crossing_rate_var',
       'harmony_mean', 'harmony_var', 'tempo_mean', 'tempo_var',
       'flatness_mean', 'flatness_var', 'mfcc1_mean', 'mfcc1_var',
       'mfcc2_mean', 'mfcc2_var', 'mfcc3_mean', 'mfcc3_var', 'mfcc4_mean',
       'mfcc4_var', 'mfcc5_mean', 'mfcc5_var', 'mfcc6_mean', 'mfcc6_var',
       'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean', 'mfcc8_var', 'mfcc9_mean',
       'mfcc9_var', 'mfcc10_mean', 'mfcc10_var', 'mfcc11_mean', 'mfcc11_var',
       'mfcc12_mean', 'mfcc12_var', 'mfcc13_mean', 'mfcc13_var', 'mfcc14_mean',
       'mfcc14_var', 'mfcc15_mean', 'mfcc15_var', 'mfcc16_mean', 'mfcc16_var',
       'mfcc17_mean', 'mfcc17_var', 'mfcc18_mean', 'mfcc18_var', 'mfcc19_mean',
       'mfcc19_var', 'mfcc20_mean', '

Show columns and rows in the feature extraction data

In [17]:
data_3s.shape

(10000, 60)

Create download link for the feature extraction data

In [18]:
data_3s.to_csv('features_3_sec.csv')

In [19]:
def create_download_link(df, title = "Download CSV file", filename = "data-features.csv"):  
    csv = df.to_csv()
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
#     print(html)
    return HTML(html)

create_download_link(data_3s)