In [2]:
import os
import glob
import csv
import numpy as np
import pandas as pd
import librosa

In [3]:
# headers of csv
header = 'filename genre tempo beats chroma_stft rms spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header = header.split()
print(header)

['filename', 'genre', 'tempo', 'beats', 'chroma_stft', 'rms', 'spectral_centroid', 'spectral_bandwidth', 'rolloff', 'zero_crossing_rate', 'mfcc1', 'mfcc2', 'mfcc3', 'mfcc4', 'mfcc5', 'mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11', 'mfcc12', 'mfcc13', 'mfcc14', 'mfcc15', 'mfcc16', 'mfcc17', 'mfcc18', 'mfcc19', 'mfcc20']


In [13]:
main_folder = r"Music_audio"

# Recursively search for MP3 and AU files in subfolders
file = open('data/music_feature.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
for root, _, files in os.walk(main_folder):

    # handle mp3 file
    mp3_files = [f for f in files if f.endswith(".mp3")]
    for mp3_file in mp3_files:
        mp3_path = os.path.join(root, mp3_file)
        
        # Load the audio file
        y, sr = librosa.load(mp3_path)
       
        # Extract audio features using Librosa
        tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
        chroma_stft_ = librosa.feature.chroma_stft(y=y, sr=sr)
        rms = librosa.feature.rms(y=y)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc_ = librosa.feature.mfcc(y=y, sr=sr)
        musicgenre = os.path.basename(mp3_path)[0: os.path.basename(mp3_path).find('-')]
        
        # Append the features and file name to the respective lists
        append = f'{os.path.basename(mp3_path)} {musicgenre} {tempo} {beats.shape[0]} {np.mean(chroma_stft_)} {np.mean(rms)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}' 
        for mfcc in mfcc_:
            append += f' {np.mean(mfcc)}'
        file = open('data/music_feature.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(append.split())

        
    # handle au file
    au_files = [f for f in files if f.endswith(".au")]
    for au_file in au_files:
        au_path = os.path.join(root, au_file)
        
        # Load the audio file
        y, sr = librosa.load(au_path)
       
        # Extract audio features using Librosa
        tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
        chroma_stft_ = librosa.feature.chroma_stft(y=y, sr=sr)
        rms = librosa.feature.rms(y=y)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc_ = librosa.feature.mfcc(y=y, sr=sr)
        musicgenre = os.path.basename(au_path)[0: os.path.basename(au_path).find('.0')]
        
        # Append the features and file name to the respective lists
        append = f'{os.path.basename(au_path)} {musicgenre} {tempo} {beats.shape[0]} {np.mean(chroma_stft_)} {np.mean(rms)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'
        for mfcc in mfcc_:
            append += f' {np.mean(mfcc)}'
        file = open('data/music_feature.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(append.split())
            

In [4]:
music_features = pd.read_csv('data/music_feature.csv')
music_features

Unnamed: 0,filename,genre,tempo,beats,chroma_stft,rms,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,...,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20
0,Pop-TRWVLJG128F9307271.mp3,Pop,69.837416,34,0.408293,0.224007,2858.783819,2568.014706,5614.349384,0.157585,...,-2.239609,0.568685,-5.098747,5.126700,-6.031083,-0.664729,-5.482568,-0.785592,-0.707880,-2.069501
1,Pop-TROXCBC128F14529AE.mp3,Pop,129.199219,61,0.383293,0.116162,2588.175574,2546.518787,5442.600423,0.127652,...,-4.317134,3.003993,-2.611651,-4.571542,-4.442684,0.995466,-3.879921,1.871848,-3.352596,-1.624789
2,Pop-TRXFYTA128F429364C.mp3,Pop,143.554688,73,0.326810,0.174422,2263.045968,2466.713542,4915.211945,0.098146,...,-12.453062,1.115649,-6.475195,-2.844059,-11.737099,-5.944115,-9.832664,1.110383,-5.764701,-2.384817
3,Pop-TRKXVHR128F428B826.mp3,Pop,143.554688,70,0.364481,0.197582,1805.438441,2146.999805,3776.777162,0.078926,...,-0.345862,-11.273629,-6.434109,4.662772,-4.449849,-3.275714,-1.916390,-7.136373,-3.354441,-3.122026
4,Pop-TRYSIIR128F42333FF.mp3,Pop,89.102909,43,0.417577,0.296791,2622.265385,2567.002404,5549.991440,0.123443,...,0.335742,2.190076,-1.105045,0.857418,-1.936657,0.905115,-4.770248,-1.962681,-5.928404,3.378405
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2495,jazz.00027.au,jazz,143.554688,68,0.325170,0.040594,1769.428268,2113.360398,3966.439331,0.062681,...,-10.986870,11.923827,-3.433859,7.191704,-8.850467,4.982307,-4.036403,-0.111063,-0.818210,-0.612190
2496,jazz.00076.au,jazz,117.453835,56,0.303217,0.119098,1733.735682,2068.216050,3379.971802,0.068700,...,-3.630606,-5.505905,0.538993,-0.509386,-4.806393,-11.206601,-3.936800,-1.469241,-4.413507,-10.597857
2497,jazz.00085.au,jazz,80.749512,38,0.325428,0.071379,2860.683151,2566.170784,5605.161054,0.147559,...,6.401213,3.414421,-2.899613,3.227199,-1.338827,0.875506,3.679272,0.648434,-0.353716,6.321081
2498,jazz.00042.au,jazz,73.828125,34,0.285201,0.099492,1344.620126,1548.989660,2773.095193,0.061186,...,-7.660741,-1.378526,-9.349154,-7.501379,-2.871112,-6.142300,-0.939825,-1.298267,-2.241966,-3.270761
