In [2]:
# Libraries for feature extraction:
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image
import pathlib
from tqdm import tqdm


In [15]:
#Audio data directory:
audio_dir = './Data/genres/'
test_dir = './Data/test_songs/'

In [4]:
#Dictionary for genres label encoding:
genres = {'blues': 0, 'classical': 1, 'country': 2, 'disco': 3, 'hiphop': 4, 
          'jazz': 5, 'metal': 6, 'pop': 7, 'reggae': 8, 'rock': 9}

In [5]:
#Get selected features from each song using librosa and numpy:

def get_features(y, sr, n_fft = 1024, hop_length = 512):
    # Selected features:
    features = {'centroid': None, 'roloff': None, 'flux': None, 'rmse': None, 'zcr': None, 'chroma':None}
    
    # Using librosa to calculate the features
    features['centroid'] = librosa.feature.spectral_centroid(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    features['roloff'] = librosa.feature.spectral_rolloff(y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    features['zcr'] = librosa.feature.zero_crossing_rate(y, frame_length=n_fft, hop_length=hop_length).ravel()
    features['rmse'] = librosa.feature.rms(y, frame_length=n_fft, hop_length=hop_length).ravel()
    features['flux'] = librosa.onset.onset_strength(y=y, sr=sr).ravel()
    features['chroma'] = librosa.feature.chroma_stft(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length).ravel()
    
    # Treatment of MFCC feature
    mfcc = librosa.feature.mfcc(y, n_fft = n_fft, hop_length = hop_length, n_mfcc=13)
    for idx, v_mfcc in enumerate(mfcc):
        features['mfcc_{}'.format(idx)] = v_mfcc.ravel()
        
    # Calculate statistics for each feature:
    def get_moments(descriptors):
        result = {}
        for k, v in descriptors.items():
            result['{}_mean'.format(k)] = np.mean(v)
            result['{}_std'.format(k)] = np.std(v)
        return result
    
    dict_agg_features = get_moments(features)
    
    #Calculating one more feature:
    dict_agg_features['tempo'] = librosa.beat.tempo(y, sr=sr)[0]
    
    return dict_agg_features

In [8]:
#Reading all audio files and calculating features for each of them. This function generates a row for each song in the directory:

def read_process_songs(src_dir, debug = True):
    arr_features = []

    # Read files from the folders
    for x,_ in genres.items():
        folder = src_dir + x
    
        for root, subdirs, files in os.walk(folder):
            for file in files:
                # Read the audio file
                file_name = folder + "/" + file
                signal, sr = librosa.load(file_name)

                # Debug process
                if debug:
                    print("Reading file: {}".format(file_name))

                # Append the result to the data structure
                features = get_features(signal, sr)
                features['genre'] = genres[x]
                arr_features.append(features)
    return arr_features

In [9]:
#Reading all audio files and calculating features for each of them. 
#This function generates a 2 rows for each song, separating each of them into 2 15s audio clips:

def read_process_songs2(src_dir, debug = True):
    arr_features = []

    # Read files from the folders
    for x,_ in genres.items():
        folder = src_dir + x
    
        for root, subdirs, files in os.walk(folder):
            for file in files:
                # Read the audio file
                file_name = folder + "/" + file
                signal1, sr1 = librosa.load(file_name,duration=15)
                signal2, sr2 = librosa.load(file_name,offset=15)
                samples=[(signal1,sr1),(signal2,sr2)]

                # Debug process
                if debug:
                    print("Reading file: {}".format(file_name))

                # Append the result to the data structure
                for s in samples:
                    features = get_features(s[0], s[1])
                    features['genre'] = genres[x]
                    arr_features.append(features)
    return arr_features

In [6]:
#Reading all audio files and calculating features for each of them. 
#This function generates a 2 rows for each song, separating each of them into 2 15s audio clips:

def read_process_songs3(src_dir, debug = True):
    arr_features = []

    # Read files from the folders
    for x,_ in genres.items():
        folder = src_dir + x
    
        for root, subdirs, files in os.walk(folder):
            for file in files:
                # Read the audio file
                file_name = folder + "/" + file
                signal, sr = librosa.load(file_name)
                samples=[(signal[:220000],sr),(signal[220000:440000],sr),(signal[440000:],sr)]

                # Debug process
                if debug:
                    print("Reading file: {}".format(file_name))

                # Append the result to the data structure
                for s in samples:
                    features = get_features(s[0], s[1])
                    features['genre'] = genres[x]
                    arr_features.append(features)
    return arr_features

In [25]:
# Get list of dicts with features and convert to dataframe (1 row per song)
features = read_process_songs(audio_dir,debug=False)

In [26]:
# Get list of dicts with features and convert to dataframe (2 rows per song, each song split in half):
features2 = read_process_songs2(audio_dir,debug=False)

In [10]:
#Convert to dataframe:
df_features1 = pd.DataFrame(features)

In [12]:
#Save as .csv:
df_features1.to_csv('./Features/features1.csv', index=False)


In [16]:
df_features2=pd.DataFrame(features2)

In [23]:
df_features2.to_csv('./Features/features2.csv', index=False)

In [7]:
test_dir='./Data/test_songs/'

In [9]:
features_test = read_process_songs2(test_dir,debug=True)

In [9]:
df_test=pd.DataFrame(features_test)

In [11]:
df_test.to_csv('./Features/features_test_2.csv', index=False)

In [12]:
features_test = read_process_songs(test_dir,debug=True)

Reading file: ./Data/test_songs/blues/blues.00099.wav
Reading file: ./Data/test_songs/classical/classical.00099.wav
Reading file: ./Data/test_songs/country/country.00099.wav
Reading file: ./Data/test_songs/disco/disco.00099.wav
Reading file: ./Data/test_songs/hiphop/hiphop.00099.wav
Reading file: ./Data/test_songs/jazz/jazz.00099.wav
Reading file: ./Data/test_songs/metal/metal.00099.wav
Reading file: ./Data/test_songs/pop/pop.00099.wav
Reading file: ./Data/test_songs/reggae/reggae.00099.wav
Reading file: ./Data/test_songs/rock/rock.00099.wav


In [13]:
df_test=pd.DataFrame(features_test)

In [15]:
df_test.to_csv('./Features/features_test_1.csv', index=False)

In [7]:
features3 = read_process_songs3(audio_dir,debug=True)

In [None]:
df_test.to_csv('./Features/features_test_1.csv', index=False)

In [12]:
df3=pd.DataFrame(features3)

In [14]:
df3.to_csv('./Features/features3.csv', index=False)

In [16]:
testf3 = read_process_songs3(test_dir,debug=True)

Reading file: ./Data/test_songs/blues/blues.00099.wav
Reading file: ./Data/test_songs/classical/classical.00099.wav
Reading file: ./Data/test_songs/country/country.00099.wav
Reading file: ./Data/test_songs/disco/disco.00099.wav
Reading file: ./Data/test_songs/hiphop/hiphop.00099.wav
Reading file: ./Data/test_songs/jazz/jazz.00099.wav
Reading file: ./Data/test_songs/metal/metal.00099.wav
Reading file: ./Data/test_songs/pop/pop.00099.wav
Reading file: ./Data/test_songs/reggae/reggae.00099.wav
Reading file: ./Data/test_songs/rock/rock.00099.wav


In [17]:
tf3=pd.DataFrame(testf3)

In [18]:
tf3.to_csv('./Features/features_test_3.csv', index=False)