In [58]:
import librosa
import matplotlib as plt
import numpy as np
import pandas as pd

from pathlib import Path
import os

#!pip install pydub

from pydub import AudioSegment
from pydub.utils import make_chunks

In [59]:
features = [
    'filename',
    'length',
    'chroma_stft_mean',
    'chroma_stft_var',
    'rms_mean',
    'rms_var',
    'spectral_centroid_mean',
    'spectral_centroid_var',
    'spectral_bandwidth_mean',
    'spectral_bandwidth_var',
    'rolloff_mean',
    'rolloff_var',
    'zero_crossing_rate_mean',
    'zero_crossing_rate_var',
    'tempo',
    'mfcc1_mean',
    'mfcc1_var',
    'mfcc2_mean',
    'mfcc2_var',
    'mfcc3_mean',
    'mfcc3_var',
    'mfcc4_mean',
    'mfcc4_var',
    'mfcc5_mean',
    'mfcc5_var',
    'mfcc6_mean',
    'mfcc6_var',
    'mfcc7_mean',
    'mfcc7_var',
    'mfcc8_mean',
    'mfcc8_var',
    'mfcc9_mean',
    'mfcc9_var',
    'mfcc10_mean',
    'mfcc10_var',
    'mfcc11_mean',
    'mfcc11_var',
    'mfcc12_mean',
    'mfcc12_var',
    'mfcc13_mean',
    'mfcc13_var',
    'mfcc14_mean',
    'mfcc14_var',
    'mfcc15_mean',
    'mfcc15_var',
    'mfcc16_mean',
    'mfcc16_var',
    'mfcc17_mean',
    'mfcc17_var',
    'mfcc18_mean',
    'mfcc18_var',
    'mfcc19_mean',
    'mfcc19_var',
    'mfcc20_mean',
    'mfcc20_var'
]

In [60]:
labels = [
    'blues',
    'classical',
    'country',
    'disco',
    'hiphop',
    'jazz',
    'metal',
    'pop',
    'reggae',
    'rock'
]

In [61]:
for label in labels:
    for i in range(0,100):
        
        file = "Data/genres_original/{}/{}.000{}.wav".format(label,label, i if i>9 else "0"+str(i))
        print(file)
                
        if (file != "Data/genres_original/jazz/jazz.00054.wav"): # jazz 54 is corrupt?
            
            myaudio = AudioSegment.from_file(file , "wav") 
            chunk_length_ms = 3000 # millisec
            chunks = make_chunks(myaudio, chunk_length_ms)

            for i, chunk in enumerate(chunks):
                chunk_name = "{}_{}.wav".format(file.replace(".wav","").replace("genres_original","genres_3sec"),i)
                print ("exporting", chunk_name)
                chunk.export(chunk_name, format="wav")

Data/genres_original/blues/blues.00000.wav
exporting Data/genres_3sec/blues/blues.00000_0.wav
exporting Data/genres_3sec/blues/blues.00000_1.wav
exporting Data/genres_3sec/blues/blues.00000_2.wav
exporting Data/genres_3sec/blues/blues.00000_3.wav
exporting Data/genres_3sec/blues/blues.00000_4.wav
exporting Data/genres_3sec/blues/blues.00000_5.wav
exporting Data/genres_3sec/blues/blues.00000_6.wav
exporting Data/genres_3sec/blues/blues.00000_7.wav
exporting Data/genres_3sec/blues/blues.00000_8.wav
exporting Data/genres_3sec/blues/blues.00000_9.wav
exporting Data/genres_3sec/blues/blues.00000_10.wav
Data/genres_original/blues/blues.00001.wav
exporting Data/genres_3sec/blues/blues.00001_0.wav
exporting Data/genres_3sec/blues/blues.00001_1.wav
exporting Data/genres_3sec/blues/blues.00001_2.wav
exporting Data/genres_3sec/blues/blues.00001_3.wav
exporting Data/genres_3sec/blues/blues.00001_4.wav
exporting Data/genres_3sec/blues/blues.00001_5.wav
exporting Data/genres_3sec/blues/blues.00001_6

collect features:

In [62]:
def get_features(file):
    y, sr = librosa.load(file)

    # values:
    length = librosa.get_duration(y=y, sr=sr)
    
    chroma_stft_mean = librosa.feature.chroma_stft(y=y, sr=sr).mean()
    chroma_stft_var = librosa.feature.chroma_stft(y=y, sr=sr).var()
    
    rms_mean = librosa.feature.rms(y=y).mean()
    rms_var = librosa.feature.rms(y=y).var()
    
    spectral_centroid_mean = librosa.feature.spectral_centroid(y=y, sr=sr).mean()
    spectral_centroid_var = librosa.feature.spectral_centroid(y=y, sr=sr).var()
    
    spectral_bandwidth_mean = librosa.feature.spectral_bandwidth(y=y, sr=sr).mean()
    spectral_bandwidth_var = librosa.feature.spectral_bandwidth(y=y, sr=sr).var()
    
    rolloff_mean = librosa.feature.spectral_rolloff(y=y, sr=sr).mean()
    rolloff_var = librosa.feature.spectral_rolloff(y=y, sr=sr).var()
    
    zero_crossing_rate_mean = librosa.feature.zero_crossing_rate(y).mean()
    zero_crossing_rate_var = librosa.feature.zero_crossing_rate(y).var()
    
    #chroma_cens_mean = librosa.feature.chroma_cens(y=y, sr=sr).mean() 
    #chroma_cens_var = librosa.feature.chroma_cens(y=y, sr=sr).var()
    
    #perceptr_mean = 3
    #perceptr_var = 3

    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    tempo = librosa.feature.tempo(onset_envelope=onset_env, sr=sr)[0]

    record_values = [
        file ,
        length ,
        chroma_stft_mean ,
        chroma_stft_var ,
        rms_mean ,
        rms_var ,
        spectral_centroid_mean ,
        spectral_centroid_var ,
        spectral_bandwidth_mean ,
        spectral_bandwidth_var ,
        rolloff_mean ,
        rolloff_var ,
        zero_crossing_rate_mean ,
        zero_crossing_rate_var ,
        tempo
    ]

    for i in range(1,21):
        mfcci_mean = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=i).mean()
        mfcci_var = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=i).var()
        record_values.append(mfcci_mean)
        record_values.append(mfcci_var)

    #print(record_values, len(record_values))

    record = pd.DataFrame(
        [ record_values ],
        columns=features
    )
    
    return record

In [63]:
collected = pd.DataFrame(data=None, columns=features)
collected

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_mean,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var


In [64]:
for label in labels:
    for filename in os.listdir("Data/genres_3sec/{}/".format(label)):
        f = os.path.join("Data/genres_3sec/{}/".format(label), filename)
        print(f)
        rec = get_features(f)
        collected = pd.concat([collected, rec])

collected

Data/genres_3sec/blues/blues.00000_0.wav


  collected = pd.concat([collected, rec])


Data/genres_3sec/blues/blues.00000_1.wav
Data/genres_3sec/blues/blues.00000_10.wav
Data/genres_3sec/blues/blues.00000_2.wav




Data/genres_3sec/blues/blues.00000_3.wav
Data/genres_3sec/blues/blues.00000_4.wav
Data/genres_3sec/blues/blues.00000_5.wav
Data/genres_3sec/blues/blues.00000_6.wav
Data/genres_3sec/blues/blues.00000_7.wav
Data/genres_3sec/blues/blues.00000_8.wav
Data/genres_3sec/blues/blues.00000_9.wav
Data/genres_3sec/blues/blues.00001_0.wav
Data/genres_3sec/blues/blues.00001_1.wav
Data/genres_3sec/blues/blues.00001_10.wav
Data/genres_3sec/blues/blues.00001_2.wav
Data/genres_3sec/blues/blues.00001_3.wav
Data/genres_3sec/blues/blues.00001_4.wav
Data/genres_3sec/blues/blues.00001_5.wav
Data/genres_3sec/blues/blues.00001_6.wav
Data/genres_3sec/blues/blues.00001_7.wav
Data/genres_3sec/blues/blues.00001_8.wav
Data/genres_3sec/blues/blues.00001_9.wav
Data/genres_3sec/blues/blues.00002_0.wav
Data/genres_3sec/blues/blues.00002_1.wav
Data/genres_3sec/blues/blues.00002_10.wav
Data/genres_3sec/blues/blues.00002_2.wav
Data/genres_3sec/blues/blues.00002_3.wav
Data/genres_3sec/blues/blues.00002_4.wav
Data/genres_3s

  return pitch_tuning(


Data/genres_3sec/blues/blues.00011_3.wav
Data/genres_3sec/blues/blues.00011_4.wav
Data/genres_3sec/blues/blues.00011_5.wav
Data/genres_3sec/blues/blues.00011_6.wav
Data/genres_3sec/blues/blues.00011_7.wav
Data/genres_3sec/blues/blues.00011_8.wav
Data/genres_3sec/blues/blues.00011_9.wav
Data/genres_3sec/blues/blues.00012_0.wav
Data/genres_3sec/blues/blues.00012_1.wav
Data/genres_3sec/blues/blues.00012_10.wav
Data/genres_3sec/blues/blues.00012_2.wav
Data/genres_3sec/blues/blues.00012_3.wav
Data/genres_3sec/blues/blues.00012_4.wav
Data/genres_3sec/blues/blues.00012_5.wav
Data/genres_3sec/blues/blues.00012_6.wav
Data/genres_3sec/blues/blues.00012_7.wav
Data/genres_3sec/blues/blues.00012_8.wav
Data/genres_3sec/blues/blues.00012_9.wav
Data/genres_3sec/blues/blues.00013_0.wav
Data/genres_3sec/blues/blues.00013_1.wav
Data/genres_3sec/blues/blues.00013_10.wav
Data/genres_3sec/blues/blues.00013_2.wav
Data/genres_3sec/blues/blues.00013_3.wav
Data/genres_3sec/blues/blues.00013_4.wav
Data/genres_3s



Data/genres_3sec/classical/classical.00043_3.wav
Data/genres_3sec/classical/classical.00043_4.wav
Data/genres_3sec/classical/classical.00043_5.wav
Data/genres_3sec/classical/classical.00043_6.wav
Data/genres_3sec/classical/classical.00043_7.wav
Data/genres_3sec/classical/classical.00043_8.wav
Data/genres_3sec/classical/classical.00043_9.wav
Data/genres_3sec/classical/classical.00044_0.wav
Data/genres_3sec/classical/classical.00044_1.wav
Data/genres_3sec/classical/classical.00044_10.wav
Data/genres_3sec/classical/classical.00044_2.wav
Data/genres_3sec/classical/classical.00044_3.wav
Data/genres_3sec/classical/classical.00044_4.wav
Data/genres_3sec/classical/classical.00044_5.wav
Data/genres_3sec/classical/classical.00044_6.wav
Data/genres_3sec/classical/classical.00044_7.wav
Data/genres_3sec/classical/classical.00044_8.wav
Data/genres_3sec/classical/classical.00044_9.wav
Data/genres_3sec/classical/classical.00045_0.wav
Data/genres_3sec/classical/classical.00045_1.wav
Data/genres_3sec/cl



Data/genres_3sec/classical/classical.00046_3.wav
Data/genres_3sec/classical/classical.00046_4.wav
Data/genres_3sec/classical/classical.00046_5.wav
Data/genres_3sec/classical/classical.00046_6.wav
Data/genres_3sec/classical/classical.00046_7.wav
Data/genres_3sec/classical/classical.00046_8.wav
Data/genres_3sec/classical/classical.00046_9.wav
Data/genres_3sec/classical/classical.00047_0.wav
Data/genres_3sec/classical/classical.00047_1.wav
Data/genres_3sec/classical/classical.00047_10.wav
Data/genres_3sec/classical/classical.00047_2.wav
Data/genres_3sec/classical/classical.00047_3.wav
Data/genres_3sec/classical/classical.00047_4.wav
Data/genres_3sec/classical/classical.00047_5.wav
Data/genres_3sec/classical/classical.00047_6.wav
Data/genres_3sec/classical/classical.00047_7.wav
Data/genres_3sec/classical/classical.00047_8.wav
Data/genres_3sec/classical/classical.00047_9.wav
Data/genres_3sec/classical/classical.00048_0.wav
Data/genres_3sec/classical/classical.00048_1.wav
Data/genres_3sec/cl



Data/genres_3sec/classical/classical.00048_3.wav
Data/genres_3sec/classical/classical.00048_4.wav
Data/genres_3sec/classical/classical.00048_5.wav
Data/genres_3sec/classical/classical.00048_6.wav
Data/genres_3sec/classical/classical.00048_7.wav
Data/genres_3sec/classical/classical.00048_8.wav
Data/genres_3sec/classical/classical.00048_9.wav
Data/genres_3sec/classical/classical.00049_0.wav
Data/genres_3sec/classical/classical.00049_1.wav
Data/genres_3sec/classical/classical.00049_2.wav
Data/genres_3sec/classical/classical.00049_3.wav
Data/genres_3sec/classical/classical.00049_4.wav
Data/genres_3sec/classical/classical.00049_5.wav
Data/genres_3sec/classical/classical.00049_6.wav
Data/genres_3sec/classical/classical.00049_7.wav
Data/genres_3sec/classical/classical.00049_8.wav
Data/genres_3sec/classical/classical.00049_9.wav
Data/genres_3sec/classical/classical.00050_0.wav
Data/genres_3sec/classical/classical.00050_1.wav
Data/genres_3sec/classical/classical.00050_10.wav
Data/genres_3sec/cl



Data/genres_3sec/classical/classical.00056_3.wav
Data/genres_3sec/classical/classical.00056_4.wav
Data/genres_3sec/classical/classical.00056_5.wav
Data/genres_3sec/classical/classical.00056_6.wav
Data/genres_3sec/classical/classical.00056_7.wav
Data/genres_3sec/classical/classical.00056_8.wav
Data/genres_3sec/classical/classical.00056_9.wav
Data/genres_3sec/classical/classical.00057_0.wav
Data/genres_3sec/classical/classical.00057_1.wav
Data/genres_3sec/classical/classical.00057_10.wav
Data/genres_3sec/classical/classical.00057_2.wav
Data/genres_3sec/classical/classical.00057_3.wav
Data/genres_3sec/classical/classical.00057_4.wav
Data/genres_3sec/classical/classical.00057_5.wav
Data/genres_3sec/classical/classical.00057_6.wav
Data/genres_3sec/classical/classical.00057_7.wav
Data/genres_3sec/classical/classical.00057_8.wav
Data/genres_3sec/classical/classical.00057_9.wav
Data/genres_3sec/classical/classical.00058_0.wav
Data/genres_3sec/classical/classical.00058_1.wav
Data/genres_3sec/cl



Data/genres_3sec/country/country.00000_3.wav
Data/genres_3sec/country/country.00000_4.wav
Data/genres_3sec/country/country.00000_5.wav
Data/genres_3sec/country/country.00000_6.wav
Data/genres_3sec/country/country.00000_7.wav
Data/genres_3sec/country/country.00000_8.wav
Data/genres_3sec/country/country.00000_9.wav
Data/genres_3sec/country/country.00001_0.wav
Data/genres_3sec/country/country.00001_1.wav
Data/genres_3sec/country/country.00001_10.wav
Data/genres_3sec/country/country.00001_2.wav
Data/genres_3sec/country/country.00001_3.wav
Data/genres_3sec/country/country.00001_4.wav
Data/genres_3sec/country/country.00001_5.wav
Data/genres_3sec/country/country.00001_6.wav
Data/genres_3sec/country/country.00001_7.wav
Data/genres_3sec/country/country.00001_8.wav
Data/genres_3sec/country/country.00001_9.wav
Data/genres_3sec/country/country.00002_0.wav
Data/genres_3sec/country/country.00002_1.wav
Data/genres_3sec/country/country.00002_10.wav
Data/genres_3sec/country/country.00002_2.wav
Data/gen



Data/genres_3sec/jazz/jazz.00064_3.wav
Data/genres_3sec/jazz/jazz.00064_4.wav
Data/genres_3sec/jazz/jazz.00064_5.wav
Data/genres_3sec/jazz/jazz.00064_6.wav
Data/genres_3sec/jazz/jazz.00064_7.wav
Data/genres_3sec/jazz/jazz.00064_8.wav
Data/genres_3sec/jazz/jazz.00064_9.wav
Data/genres_3sec/jazz/jazz.00065_0.wav
Data/genres_3sec/jazz/jazz.00065_1.wav
Data/genres_3sec/jazz/jazz.00065_10.wav
Data/genres_3sec/jazz/jazz.00065_2.wav
Data/genres_3sec/jazz/jazz.00065_3.wav
Data/genres_3sec/jazz/jazz.00065_4.wav
Data/genres_3sec/jazz/jazz.00065_5.wav
Data/genres_3sec/jazz/jazz.00065_6.wav
Data/genres_3sec/jazz/jazz.00065_7.wav
Data/genres_3sec/jazz/jazz.00065_8.wav
Data/genres_3sec/jazz/jazz.00065_9.wav
Data/genres_3sec/jazz/jazz.00066_0.wav
Data/genres_3sec/jazz/jazz.00066_1.wav
Data/genres_3sec/jazz/jazz.00066_10.wav
Data/genres_3sec/jazz/jazz.00066_2.wav
Data/genres_3sec/jazz/jazz.00066_3.wav
Data/genres_3sec/jazz/jazz.00066_4.wav
Data/genres_3sec/jazz/jazz.00066_5.wav
Data/genres_3sec/jazz/j

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_mean,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var
0,Data/genres_3sec/blues/blues.00000_0.wav,3.000000,0.335555,0.090997,0.130189,0.003559,1773.358004,169450.829520,1972.334258,117272.640189,...,1.847295,2362.774902,1.548628,2227.368652,1.501281,2105.762207,1.157216,1998.835205,1.087376,1901.177490
0,Data/genres_3sec/blues/blues.00000_1.wav,3.000000,0.343523,0.086782,0.112119,0.001491,1817.244034,90766.297254,2010.751494,65940.666243,...,2.362022,2452.354980,1.868640,2314.378906,1.771890,2188.797607,1.530792,2079.760742,1.742847,1979.645386
0,Data/genres_3sec/blues/blues.00000_10.wav,0.012971,0.581960,0.094200,0.048685,0.000000,1733.209205,0.000000,2097.208689,0.000000,...,2.940918,2046.989502,2.651569,1927.917969,2.073924,1826.484131,1.378681,1739.053711,0.844467,1657.523071
0,Data/genres_3sec/blues/blues.00000_2.wav,3.000000,0.347746,0.092495,0.130895,0.004552,1790.722357,110071.206973,2088.184750,73391.498001,...,3.073477,2505.148438,2.791831,2360.711182,2.766150,2232.194336,2.519214,2118.593506,2.516604,2014.322266
0,Data/genres_3sec/blues/blues.00000_3.wav,3.000000,0.363863,0.087207,0.131349,0.002338,1660.545231,109496.936296,1967.920582,79805.901351,...,2.256836,2439.380615,1.901317,2299.581299,1.859761,2173.838623,1.577565,2063.523438,1.677718,1962.153809
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,Data/genres_3sec/rock/rock.00099_5.wav,3.000000,0.348781,0.080553,0.049680,0.000098,1503.238681,163419.049525,1725.514991,85476.555705,...,-2.231523,4923.151855,-2.634636,4638.445312,-2.722517,4382.626953,-2.878276,4154.901855,-2.648109,3950.097412
0,Data/genres_3sec/rock/rock.00099_6.wav,3.000000,0.372556,0.082633,0.057615,0.000096,1852.706840,277236.230048,1914.481257,97378.034054,...,-3.281360,3708.217773,-3.813271,3498.619873,-3.775346,3307.222656,-4.203818,3139.811279,-3.974402,2984.764160
0,Data/genres_3sec/rock/rock.00099_7.wav,3.000000,0.347815,0.089140,0.051906,0.000674,1348.383673,662359.245401,1566.961070,137898.245964,...,-6.174457,7897.867676,-5.969096,7435.306152,-5.362641,7029.924316,-4.989933,6664.940918,-4.751178,6334.845703
0,Data/genres_3sec/rock/rock.00099_8.wav,3.000000,0.386801,0.084649,0.065967,0.000312,2086.306423,200732.442407,2021.144027,20772.677177,...,-1.360686,3130.227539,-1.595695,2947.988037,-1.150531,2788.798340,-0.966437,2643.941895,-0.884805,2512.499756


In [96]:
abc_collected = collected

for filename in os.listdir("Data/genres_3sec/kpop/"):
    f = os.path.join("Data/genres_3sec/kpop/", filename)
    print(f)
    rec = get_features(f)
    abc_collected = pd.concat([abc_collected, rec])

Data/genres_3sec/kpop/kpop.-01.wav
Data/genres_3sec/kpop/kpop.-02.wav
Data/genres_3sec/kpop/kpop.-03.wav
Data/genres_3sec/kpop/kpop.-04.wav
Data/genres_3sec/kpop/kpop.-05.wav
Data/genres_3sec/kpop/kpop.-06.wav
Data/genres_3sec/kpop/kpop.-07.wav
Data/genres_3sec/kpop/kpop.-08.wav
Data/genres_3sec/kpop/kpop.-09.wav
Data/genres_3sec/kpop/kpop.-10.wav
Data/genres_3sec/kpop/kpop.-100.wav
Data/genres_3sec/kpop/kpop.-1000.wav
Data/genres_3sec/kpop/kpop.-1001.wav
Data/genres_3sec/kpop/kpop.-1002.wav
Data/genres_3sec/kpop/kpop.-1003.wav
Data/genres_3sec/kpop/kpop.-1004.wav
Data/genres_3sec/kpop/kpop.-1005.wav
Data/genres_3sec/kpop/kpop.-1006.wav
Data/genres_3sec/kpop/kpop.-1007.wav
Data/genres_3sec/kpop/kpop.-1008.wav
Data/genres_3sec/kpop/kpop.-1009.wav
Data/genres_3sec/kpop/kpop.-101.wav
Data/genres_3sec/kpop/kpop.-1010.wav
Data/genres_3sec/kpop/kpop.-1011.wav
Data/genres_3sec/kpop/kpop.-1012.wav
Data/genres_3sec/kpop/kpop.-1013.wav
Data/genres_3sec/kpop/kpop.-1014.wav
Data/genres_3sec/kpop/k

In [103]:
collected_new = collected[collected['filename'].str.contains('.10')==False] 

def find_label(filename):
    return filename.split("/")[2]
collected_new['label'] = collected_new['filename'].apply(find_label)


def remove_extra_filename(filename):
    print(filename.split("/")[3])
collected_new['filename'] = collected_new['filename'].apply(remove_extra_filename)

print(collected_new) 

kpop.-996.wav
                                    filename  length  chroma_stft_mean  \
0   Data/genres_3sec/blues/blues.00000_0.wav     3.0          0.335555   
0   Data/genres_3sec/blues/blues.00000_1.wav     3.0          0.343523   
0   Data/genres_3sec/blues/blues.00000_2.wav     3.0          0.347746   
0   Data/genres_3sec/blues/blues.00000_3.wav     3.0          0.363863   
0   Data/genres_3sec/blues/blues.00000_4.wav     3.0          0.335481   
..                                       ...     ...               ...   
0     Data/genres_3sec/rock/rock.00099_5.wav     3.0          0.348781   
0     Data/genres_3sec/rock/rock.00099_6.wav     3.0          0.372556   
0     Data/genres_3sec/rock/rock.00099_7.wav     3.0          0.347815   
0     Data/genres_3sec/rock/rock.00099_8.wav     3.0          0.386801   
0     Data/genres_3sec/rock/rock.00099_9.wav     3.0          0.368721   

    chroma_stft_var  rms_mean   rms_var  spectral_centroid_mean  \
0          0.090997  0.130189 

In [108]:
#collected_new2 = abc_collected[abc_collected['filename'].str.contains('.10')==False] 

collected_new2 = abc_collected

def find_label(filename):
    return filename.split("/")[2]
collected_new2['label'] = collected_new2['filename'].apply(find_label)

def remove_extra_filename(filename):
    return filename.split("/")[3]
collected_new2['filename'] = collected_new2['filename'].apply(remove_extra_filename)

print(collected_new2) 

IndexError: list index out of range

In [109]:
collected_new2 = collected_new2[collected_new2['filename'].str.contains('.10')==False] 

In [89]:
collected_new.to_csv("Data/collected_features.csv", index=False)

In [111]:
collected_new2.to_csv("Data/collected_features_and_kpop2.csv", index=False)

In [93]:
collected_features_path = "Data/collected_features.csv"
collected_features = pd.read_csv(collected_features_path)
collected_features.head()

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000_0.wav,3.0,0.335555,0.090997,0.130189,0.003559,1773.358004,169450.82952,1972.334258,117272.640189,...,2362.775,1.548628,2227.3687,1.501281,2105.7622,1.157216,1998.8352,1.087376,1901.1775,blues
1,blues.00000_1.wav,3.0,0.343523,0.086782,0.112119,0.001491,1817.244034,90766.297254,2010.751494,65940.666243,...,2452.355,1.86864,2314.379,1.77189,2188.7976,1.530792,2079.7607,1.742847,1979.6454,blues
2,blues.00000_2.wav,3.0,0.347746,0.092495,0.130895,0.004552,1790.722357,110071.206973,2088.18475,73391.498001,...,2505.1484,2.791831,2360.7112,2.76615,2232.1943,2.519214,2118.5935,2.516604,2014.3223,blues
3,blues.00000_3.wav,3.0,0.363863,0.087207,0.131349,0.002338,1660.545231,109496.936296,1967.920582,79805.901351,...,2439.3806,1.901317,2299.5813,1.859761,2173.8386,1.577565,2063.5234,1.677718,1962.1538,blues
4,blues.00000_4.wav,3.0,0.335481,0.088482,0.14237,0.001734,1634.465077,77425.419232,1954.633566,57359.695604,...,2281.6558,3.439414,2150.558,3.33673,2034.1184,2.984012,1930.6893,2.859887,1835.9031,blues
