### Step 2: Extracting Features from the Audio series 


Loading the generated audio data

In [2]:
import numpy as np

# Load data
X = np.load('X_Data.npy', allow_pickle=True)  # Audio data
y = np.load('y_Genres.npy')  
file_names = np.load('file_names.npy')


### Feature Extraction Function

The `extract_features` function processes a single audio waveform and extracts several audio features from it. These include MFCCs, spectral features, ZCR, RMS and Chroma features. These features capture both temporal and spectral characteristics of the sound.


In [9]:
import librosa
def extract_features(audio, sr=22050, n_mfcc=13):
    #mfcc features
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
    mfcc_mean = np.mean(mfcc, axis=1)
    mfcc_std = np.std(mfcc, axis=1)

    # Zero-Crossing Rate
    zcr = np.mean(librosa.feature.zero_crossing_rate(audio))

    # RMS Energy
    rms = np.mean(librosa.feature.rms(y=audio))

    # Spectral Features
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=audio, sr=sr))
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=audio, sr=sr))
    spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=audio, sr=sr), axis=1)
    spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=audio, sr=sr))
    chroma_stft = np.mean(librosa.feature.chroma_stft(y=audio, sr=sr), axis=1)

    # Combine all features
    features = np.concatenate([
        mfcc_mean, mfcc_std, [zcr, rms, spectral_centroid, spectral_bandwidth, spectral_rolloff],
        spectral_contrast, chroma_stft
    ])
    return features



This code extracts the features from all the songs in the dataset and stores them into a dataframe called `feature_df`

In [10]:
import pandas as pd
data_rows = []

for i, audio in enumerate(X):
    try:
        # Extract features from each audio sample
        features = extract_features(audio)
        # Append features and the corresponding label as a row
        data_rows.append(np.append(features, y[i]))
    except Exception as e:
        print(f"Error processing sample {i}: {e}")

# Define column names
mfcc_mean_cols = [f"mfcc_mean_{i}" for i in range(13)]
mfcc_std_cols = [f"mfcc_std_{i}" for i in range(13)]
spectral_contrast_cols = [f"spectral_contrast_{i}" for i in range(7)]  # Adjust for 7 contrast bands
chroma_cols = [f"chroma_{i}" for i in range(12)]  # 12 pitch classes

# Combine all column names
columns = (
    mfcc_mean_cols +
    mfcc_std_cols +
    ["zcr", "rms", "spectral_centroid", "spectral_bandwidth", "spectral_rolloff"] +
    spectral_contrast_cols +
    chroma_cols +
    ["label"]
)

# Create a DataFrame from the extracted features
feature_df = pd.DataFrame(data_rows, columns=columns)


Conducting some basic Exploratory Data analysis (EDA) on the dataset

In [12]:
feature_df.insert(0, "file_name", file_names) #adding the filenames as the first column
print(feature_df.head()) #View the first 5 datapoints


       file_name          mfcc_mean_0        mfcc_mean_1          mfcc_mean_2  \
0  pop.00027.wav   -75.46332550048828  83.29920959472656   10.706214904785156   
1  pop.00033.wav   -69.11246490478516  73.77897644042969  -12.471774101257324   
2  pop.00032.wav  -113.25811767578125  41.56414794921875    21.54595947265625   
3  pop.00026.wav    -73.4219970703125  97.42221069335938    16.88865089416504   
4  pop.00030.wav  -18.320768356323242  72.32365417480469  -4.7909040451049805   

            mfcc_mean_3            mfcc_mean_4          mfcc_mean_5  \
0    14.456714630126953     14.260170936584473    7.435577392578125   
1     9.525618553161621  -0.051145948469638824   12.708080291748047   
2    11.081443786621094     17.457849502563477   0.6241185665130615   
3   -0.5528331398963928       7.17862606048584    4.888375282287598   
4  -0.32402387261390686     1.7181305885314941  -0.8661431670188904   

           mfcc_mean_6           mfcc_mean_7         mfcc_mean_8  ...  \
0   5.2846293

In [21]:
feature_df.sample(5) #print 5 random samples


Unnamed: 0,file_name,mfcc_mean_0,mfcc_mean_1,mfcc_mean_2,mfcc_mean_3,mfcc_mean_4,mfcc_mean_5,mfcc_mean_6,mfcc_mean_7,mfcc_mean_8,...,chroma_3,chroma_4,chroma_5,chroma_6,chroma_7,chroma_8,chroma_9,chroma_10,chroma_11,label
118,metal.00032.wav,-61.99877548217773,93.09228515625,-40.01924514770508,56.13471603393555,-19.69083023071289,23.339555740356445,-7.605926036834717,17.7100887298584,-19.262510299682617,...,0.4598876535892486,0.5962894558906555,0.4548431634902954,0.4383601248264313,0.4774225354194641,0.3944322764873504,0.3967834115028381,0.3296359479427337,0.4235557019710541,metal
618,rock.00015.wav,-175.00186157226562,90.869140625,-18.649656295776367,41.629154205322266,-8.871862411499023,9.110029220581056,-12.46670913696289,13.11325740814209,-9.687481880187988,...,0.3369596302509308,0.2433425039052963,0.2477693110704422,0.2107998430728912,0.3139735162258148,0.2896701395511627,0.3484962880611419,0.3676571249961853,0.3264009952545166,rock
853,country.00037.wav,-173.4692840576172,105.00334167480467,1.1836669445037842,24.118816375732425,22.087299346923828,2.548550605773926,-1.5425983667373655,0.1971702426671981,-5.315374851226807,...,0.4344953298568725,0.35472172498703,0.3937427699565887,0.3396260142326355,0.3038133680820465,0.4161155521869659,0.3978264033794403,0.4573054909706116,0.3230088353157043,country
64,pop.00099.wav,-119.3389892578125,85.91064453125,33.9141960144043,10.003548622131348,17.694215774536133,0.5320995450019836,9.540327072143556,-5.267251014709473,4.154889583587647,...,0.3102399706840515,0.4197053611278534,0.2933597862720489,0.3589567542076111,0.3349076807498932,0.4340332448482513,0.3705759644508362,0.338215708732605,0.4470361471176147,pop
886,country.00078.wav,-302.9194641113281,150.2154998779297,-3.646185398101807,26.83581924438477,-0.2450169771909713,13.532997131347656,7.308869361877441,4.174530506134033,1.7951788902282717,...,0.4016408026218414,0.4411232769489288,0.3138564229011535,0.3249040246009826,0.2584686279296875,0.3581264913082123,0.4125753343105316,0.4033679664134979,0.4712213575839996,country


In [22]:
# Describe each column
feature_df.describe(include='all')


Unnamed: 0,file_name,mfcc_mean_0,mfcc_mean_1,mfcc_mean_2,mfcc_mean_3,mfcc_mean_4,mfcc_mean_5,mfcc_mean_6,mfcc_mean_7,mfcc_mean_8,...,chroma_3,chroma_4,chroma_5,chroma_6,chroma_7,chroma_8,chroma_9,chroma_10,chroma_11,label
count,1000,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,...,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000
unique,1000,986.0,986.0,986.0,986.0,986.0,985.0,986.0,986.0,986.0,...,986.0,986.0,986.0,986.0,986.0,986.0,985.0,986.0,986.0,10
top,pop.00027.wav,-110.89625549316406,83.1821517944336,-45.22563171386719,56.59567642211914,-7.469169616699219,19.975831985473636,-13.41921615600586,21.209272384643555,-15.719744682312012,...,0.4189111590385437,0.5022823214530945,0.4845082759857178,0.4885303676128387,0.4222660362720489,0.4775139987468719,0.4679335057735443,0.3955163657665252,0.4066154062747955,pop
freq,1,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,100


In [23]:
feature_df.isnull().sum() #check for any missing values

file_name              0
mfcc_mean_0            0
mfcc_mean_1            0
mfcc_mean_2            0
mfcc_mean_3            0
mfcc_mean_4            0
mfcc_mean_5            0
mfcc_mean_6            0
mfcc_mean_7            0
mfcc_mean_8            0
mfcc_mean_9            0
mfcc_mean_10           0
mfcc_mean_11           0
mfcc_mean_12           0
mfcc_std_0             0
mfcc_std_1             0
mfcc_std_2             0
mfcc_std_3             0
mfcc_std_4             0
mfcc_std_5             0
mfcc_std_6             0
mfcc_std_7             0
mfcc_std_8             0
mfcc_std_9             0
mfcc_std_10            0
mfcc_std_11            0
mfcc_std_12            0
zcr                    0
rms                    0
spectral_centroid      0
spectral_bandwidth     0
spectral_rolloff       0
spectral_contrast_0    0
spectral_contrast_1    0
spectral_contrast_2    0
spectral_contrast_3    0
spectral_contrast_4    0
spectral_contrast_5    0
spectral_contrast_6    0
chroma_0               0


In [24]:
# Save the dataframe as a csv file
feature_df.to_csv("extracted_features.csv", index=False)
