In [55]:
import librosa
import matplotlib as plt
import numpy as np
import pandas as pd

from pathlib import Path
import os

#!pip install pydub

from pydub import AudioSegment
from pydub.utils import make_chunks

In [23]:
features = [
    'filename',
    'length',
    'chroma_stft_mean',
    'chroma_stft_var',
    'rms_mean',
    'rms_var',
    'spectral_centroid_mean',
    'spectral_centroid_var',
    'spectral_bandwidth_mean',
    'spectral_bandwidth_var',
    'rolloff_mean',
    'rolloff_var',
    'zero_crossing_rate_mean',
    'zero_crossing_rate_var',
    'harmony_mean',
    'harmony_var',
    'tempo',
    'mfcc1_mean',
    'mfcc1_var',
    'mfcc2_mean',
    'mfcc2_var',
    'mfcc3_mean',
    'mfcc3_var',
    'mfcc4_mean',
    'mfcc4_var',
    'mfcc5_mean',
    'mfcc5_var',
    'mfcc6_mean',
    'mfcc6_var',
    'mfcc7_mean',
    'mfcc7_var',
    'mfcc8_mean',
    'mfcc8_var',
    'mfcc9_mean',
    'mfcc9_var',
    'mfcc10_mean',
    'mfcc10_var',
    'mfcc11_mean',
    'mfcc11_var',
    'mfcc12_mean',
    'mfcc12_var',
    'mfcc13_mean',
    'mfcc13_var',
    'mfcc14_mean',
    'mfcc14_var',
    'mfcc15_mean',
    'mfcc15_var',
    'mfcc16_mean',
    'mfcc16_var',
    'mfcc17_mean',
    'mfcc17_var',
    'mfcc18_mean',
    'mfcc18_var',
    'mfcc19_mean',
    'mfcc19_var',
    'mfcc20_mean',
    'mfcc20_var'
]

In [24]:
labels = [
    'afrobeat',
    'blues',
    'classical',
    'country',
    'disco',
    'hiphop',
    'jazz',
    'kpop',
    'metal',
    'pop',
    'reggae',
    'rock'
]

In [140]:
for label in labels:
    for i in range(0,100):
        
        file = "Data/genres_original/{}/{}.000{}.wav".format(label,label, i if i>9 else "0"+str(i))
        #print(file)
                
        if (file != "Data/genres_original/jazz/jazz.00054.wav"): # jazz 54 is corrupt?
            
            myaudio = AudioSegment.from_file(file , "wav") 
            chunk_length_ms = 3000 # millisec
            chunks = make_chunks(myaudio, chunk_length_ms)

            for i, chunk in enumerate(chunks):
                chunk_name = "{}_{}.wav".format(file.replace(".wav","").replace("genres_original","genres_3sec"),i)
                if("_10" not in chunk_name):
                    #print ("exporting", chunk_name)
                    chunk.export(chunk_name, format="wav")

collect features:

In [44]:
def get_features(file):
    y, sr = librosa.load(file)

    # values:
    length = librosa.get_duration(y=y, sr=sr)
    
    chroma_stft_mean = librosa.feature.chroma_stft(y=y, sr=sr).mean()
    chroma_stft_var = librosa.feature.chroma_stft(y=y, sr=sr).var()
    
    rms_mean = librosa.feature.rms(y=y).mean()
    rms_var = librosa.feature.rms(y=y).var()
    
    spectral_centroid_mean = librosa.feature.spectral_centroid(y=y, sr=sr).mean()
    spectral_centroid_var = librosa.feature.spectral_centroid(y=y, sr=sr).var()
    
    spectral_bandwidth_mean = librosa.feature.spectral_bandwidth(y=y, sr=sr).mean()
    spectral_bandwidth_var = librosa.feature.spectral_bandwidth(y=y, sr=sr).var()
    
    rolloff_mean = librosa.feature.spectral_rolloff(y=y, sr=sr).mean()
    rolloff_var = librosa.feature.spectral_rolloff(y=y, sr=sr).var()
    
    zero_crossing_rate_mean = librosa.feature.zero_crossing_rate(y).mean()
    zero_crossing_rate_var = librosa.feature.zero_crossing_rate(y).var()

    harmonic,percussive = librosa.effects.hpss(y)
    harmony_mean = np.mean(harmonic)
    harmony_var = np.var(harmonic)

    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    tempo = librosa.feature.tempo(onset_envelope=onset_env, sr=sr)[0]

    record_values = [
        file ,
        length ,
        chroma_stft_mean ,
        chroma_stft_var ,
        rms_mean ,
        rms_var ,
        spectral_centroid_mean ,
        spectral_centroid_var ,
        spectral_bandwidth_mean ,
        spectral_bandwidth_var ,
        rolloff_mean ,
        rolloff_var ,
        zero_crossing_rate_mean ,
        zero_crossing_rate_var ,
        
        harmony_mean,
        harmony_var,
        
        tempo
    ]

    for i in range(1,21):
        mfcci_mean = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=i)[i-1:].mean()
        mfcci_var = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=i)[i-1:].var()
        record_values.append(mfcci_mean)
        record_values.append(mfcci_var)

    #print(record_values, len(record_values))

    record = pd.DataFrame(
        [ record_values ],
        columns=features
    )
    
    return record

In [45]:
z = get_features("Data/genres_3sec/blues/blues.00000_0.wav")
print(z.iloc[0,:])

filename                   Data/genres_3sec/blues/blues.00000_0.wav
length                                                          3.0
chroma_stft_mean                                           0.335555
chroma_stft_var                                            0.090997
rms_mean                                                   0.130189
rms_var                                                    0.003559
spectral_centroid_mean                                  1773.358004
spectral_centroid_var                                  169450.82952
spectral_bandwidth_mean                                 1972.334258
spectral_bandwidth_var                                117272.640189
rolloff_mean                                            3714.063439
rolloff_var                                          1083179.450351
zero_crossing_rate_mean                                    0.081851
zero_crossing_rate_var                                     0.000558
harmony_mean                                    

In [47]:
collected_samples = pd.DataFrame(data=None, columns=(features + ['label']) )

for label in labels:
    print(label)
    for filename in os.listdir("Data/genres_3sec/{}/".format(label)):
        f = os.path.join("Data/genres_3sec/{}/".format(label), filename)
        rec = get_features(f)
        rec['label'] = f.split("/")[2]
        rec['filename'] = f.split("/")[3]
        collected_samples = pd.concat([collected_samples, rec])
        #print(collected_samples)
        
collected_samples


afrobeat


  collected_samples = pd.concat([collected_samples, rec])


blues
classical
country
disco
hiphop
jazz
kpop
metal
pop
reggae
rock


Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,afrobeat-01.wav,3.0,0.247399,0.092160,0.117964,0.002480,1524.740624,1.273835e+06,1750.105207,116583.835541,...,82.328667,-6.182023,38.484234,-2.735594,67.163925,-2.318325,51.975788,5.334535,95.716629,afrobeat
0,afrobeat-02.wav,3.0,0.196392,0.087288,0.185788,0.003104,1392.760011,1.085616e+05,1870.220893,99923.195705,...,56.805122,-20.886059,160.053680,-17.094584,165.353333,-1.315495,33.918022,6.706286,133.253967,afrobeat
0,afrobeat-03.wav,3.0,0.238844,0.089254,0.179397,0.002011,1204.599201,1.730626e+05,1729.341434,125313.219497,...,77.913330,-12.942533,41.259251,-10.912020,38.106586,-4.811698,59.304146,2.797769,90.411766,afrobeat
0,afrobeat-04.wav,3.0,0.272595,0.087415,0.226402,0.004534,1951.367999,5.666106e+05,2227.771907,171472.609785,...,72.256477,-11.386300,69.025261,-7.530372,78.782074,-7.850230,53.096050,-1.605986,67.128181,afrobeat
0,afrobeat-05.wav,3.0,0.308192,0.091026,0.206161,0.009170,2219.625357,9.538628e+05,2394.614862,208108.166883,...,71.410011,-12.198662,43.392902,-10.530150,35.972332,-6.583600,42.849930,4.042652,67.810913,afrobeat
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,rock.00099_5.wav,3.0,0.348781,0.080553,0.049680,0.000098,1503.238681,1.634190e+05,1725.514991,85476.555705,...,44.396152,-9.084437,38.929996,-4.216517,31.358320,-5.681930,47.543434,1.725083,38.689384,rock
0,rock.00099_6.wav,3.0,0.372556,0.082633,0.057615,0.000096,1852.706840,2.772362e+05,1914.481257,97378.034054,...,32.134659,-12.323830,68.098274,-3.130632,53.029484,-11.916322,63.618256,0.384525,18.866629,rock
0,rock.00099_7.wav,3.0,0.347815,0.089140,0.051906,0.000674,1348.383673,6.623592e+05,1566.961070,137898.245964,...,79.231087,-2.683275,22.830885,4.947066,25.907824,1.718840,47.727337,-0.214869,41.377605,rock
0,rock.00099_8.wav,3.0,0.386801,0.084649,0.065967,0.000312,2086.306423,2.007324e+05,2021.144027,20772.677177,...,27.924114,-5.355850,17.127596,6.417248,21.935259,2.347247,24.930906,0.666209,12.577224,rock


In [49]:
collected_samples.to_csv("Data/collected_samples_labeled.csv", index=False)

In [50]:
labeled_data_path = "Data/collected_samples_labeled.csv"
labeled_data = pd.read_csv(labeled_data_path)
labeled_data

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,afrobeat-01.wav,3.0,0.247399,0.092160,0.117964,0.002480,1524.740624,1.273835e+06,1750.105207,116583.835541,...,82.328670,-6.182023,38.484234,-2.735594,67.163925,-2.318325,51.975790,5.334535,95.716630,afrobeat
1,afrobeat-02.wav,3.0,0.196392,0.087288,0.185788,0.003104,1392.760011,1.085616e+05,1870.220893,99923.195705,...,56.805122,-20.886059,160.053680,-17.094584,165.353330,-1.315495,33.918022,6.706286,133.253970,afrobeat
2,afrobeat-03.wav,3.0,0.238844,0.089254,0.179397,0.002011,1204.599201,1.730626e+05,1729.341434,125313.219497,...,77.913330,-12.942533,41.259250,-10.912020,38.106586,-4.811698,59.304146,2.797769,90.411766,afrobeat
3,afrobeat-04.wav,3.0,0.272595,0.087415,0.226402,0.004534,1951.367999,5.666106e+05,2227.771907,171472.609785,...,72.256480,-11.386300,69.025260,-7.530372,78.782074,-7.850230,53.096050,-1.605986,67.128180,afrobeat
4,afrobeat-05.wav,3.0,0.308192,0.091026,0.206161,0.009170,2219.625357,9.538628e+05,2394.614862,208108.166883,...,71.410010,-12.198662,43.392902,-10.530150,35.972332,-6.583600,42.849930,4.042652,67.810910,afrobeat
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14601,rock.00099_5.wav,3.0,0.348781,0.080553,0.049680,0.000098,1503.238681,1.634190e+05,1725.514991,85476.555705,...,44.396152,-9.084437,38.929996,-4.216517,31.358320,-5.681930,47.543434,1.725083,38.689384,rock
14602,rock.00099_6.wav,3.0,0.372556,0.082633,0.057615,0.000096,1852.706840,2.772362e+05,1914.481257,97378.034054,...,32.134660,-12.323830,68.098274,-3.130632,53.029484,-11.916322,63.618256,0.384525,18.866629,rock
14603,rock.00099_7.wav,3.0,0.347815,0.089140,0.051906,0.000674,1348.383673,6.623592e+05,1566.961070,137898.245964,...,79.231090,-2.683275,22.830885,4.947066,25.907824,1.718840,47.727337,-0.214869,41.377605,rock
14604,rock.00099_8.wav,3.0,0.386801,0.084649,0.065967,0.000312,2086.306423,2.007324e+05,2021.144027,20772.677177,...,27.924114,-5.355850,17.127596,6.417248,21.935259,2.347247,24.930906,0.666209,12.577224,rock
