In [15]:
import os, warnings, csv
import IPython.display as ipd
from glob import glob

import librosa, librosa.display

import numpy as np, pandas as pd
from scipy.stats import kurtosis, skew

import hvplot.pandas


In [16]:
# Functions to help with extraction

# Get various moments from the vectors
def get_moments(descriptors):
    result = {}
    for k, v in descriptors.items():
        result['{}_max'.format(k)] = np.max(v)
        result['{}_min'.format(k)] = np.min(v)
        result['{}_mean'.format(k)] = np.mean(v)
        result['{}_std'.format(k)] = np.std(v)
        result['{}_kurtosis'.format(k)] = kurtosis(v)
        result['{}_skew'.format(k)] = skew(v)
    return result

def get_features(wave, sr):                            # Collect features from a waveform
    
    features = {
                'centroid': None,                      # start with only vector features for which we need moments
                'rolloff': None, 
                'flux': None, 
                'rmse': None,
                'zcr': None, 
                'contrast': None, 
                'bandwidth': None, 
                'flatness': None, 
                'chroma_stft': None
               }

# Built-in librosa functions
    features['centroid'] = librosa.feature.spectral_centroid(wave, sr=sr).ravel()
    features['rolloff'] = librosa.feature.spectral_rolloff(wave, sr=sr,).ravel()
    features['flux'] = librosa.onset.onset_strength(y=wave, sr=sr).ravel()
    features['rmse'] = librosa.feature.rms(wave).ravel()
    features['zcr'] = librosa.feature.zero_crossing_rate(wave).ravel()
    features['contrast'] = librosa.feature.spectral_contrast(wave, sr=sr).ravel()
    features['bandwidth'] = librosa.feature.spectral_bandwidth(wave, sr=sr).ravel()
    features['flatness'] = librosa.feature.spectral_flatness(wave).ravel()
    features['chroma_stft']=librosa.feature.chroma_stft(y=wave, sr=sr).ravel()
    
# special MFCC treatment
    mfcc = librosa.feature.mfcc(wave, sr=sr, n_mfcc=13)
    for idx, v_mfcc in enumerate(mfcc):
        features['mfcc_{}'.format(idx)] = v_mfcc.ravel()
   
    dict_agg_features = get_moments(features)
    
# Now add non-vector features
    dict_agg_features['total_frames'] = len(wave)
    
    audible, _ = librosa.effects.trim(wave)
    dict_agg_features['silent_frames'] = len(wave) - len(audible)  
    
    dict_agg_features['tempo'] = librosa.beat.beat_track(wave, sr=sr)[0]   # beats per minute
    
    return dict_agg_features

In [18]:
# Analyze the whole genre now

feature_repo = []                                                 # Drop all features into big array

files = glob('/home/stu/final_capstone/music_library/folk/**/*.mp3', recursive=True)

files = files[:200]                                                # limit to stay within memory constraints

for file in files:

    with warnings.catch_warnings():                               # Avoid warning about reading MP3s
        warnings.simplefilter("ignore")
        sound, sr = librosa.load(file)

    print('Processing:', file,'...', end = '')

    features = get_features(sound,sr)
    features["filename"] = file                                   # Include filename for reference
    features["genre"] = 'folk'                                   # ***change for each separate routine***

    feature_repo.append(features)                                 # Add to the array
    print("Done.")

pd.DataFrame(feature_repo).to_parquet('/home/stu/final_capstone/features/folk.parq')               # Write out to file            
print('folk.parq is now ready.')

Processing: /home/stu/final_capstone/music_library/folk/John Fahey/Essential John Fahey/13_Fight on Christians, Fight On.mp3 ...Done.
Processing: /home/stu/final_capstone/music_library/folk/John Fahey/Essential John Fahey/09_Commemorative Transfiguration and Communion at Magruder Park.mp3 ...Done.
Processing: /home/stu/final_capstone/music_library/folk/John Fahey/Essential John Fahey/06_Dance of the Inhabitants of the Invisible City of Bladensburg.mp3 ...Done.
Processing: /home/stu/final_capstone/music_library/folk/John Fahey/Essential John Fahey/10_Requiem for John Hurt.mp3 ...Done.
Processing: /home/stu/final_capstone/music_library/folk/John Fahey/Essential John Fahey/02_View (East from the Top of the Riggs Road-B&O Trestle).mp3 ...Done.
Processing: /home/stu/final_capstone/music_library/folk/John Fahey/Essential John Fahey/12_When the Catfish Is in Bloom.mp3 ...Done.
Processing: /home/stu/final_capstone/music_library/folk/John Fahey/Essential John Fahey/08_Irish Setter.mp3 ...Done.


Processing: /home/stu/final_capstone/music_library/folk/Jonathan Edwards/Jonathan Edwards/01_Everybody Knows Her.mp3 ...Done.
Processing: /home/stu/final_capstone/music_library/folk/Jonathan Edwards/Jonathan Edwards/09_Don't Cry Blue.mp3 ...Done.
Processing: /home/stu/final_capstone/music_library/folk/Jonathan Edwards/Jonathan Edwards/04_Dusty Morning.mp3 ...Done.
Processing: /home/stu/final_capstone/music_library/folk/Jonathan Edwards/Jonathan Edwards/02_Cold Snow.mp3 ...Done.
Processing: /home/stu/final_capstone/music_library/folk/Jonathan Edwards/Jonathan Edwards/06_Shanty.mp3 ...Done.
Processing: /home/stu/final_capstone/music_library/folk/Jonathan Edwards/Jonathan Edwards/03_Athens County.mp3 ...Done.
Processing: /home/stu/final_capstone/music_library/folk/Jonathan Edwards/Jonathan Edwards/10_Jesse.mp3 ...Done.
Processing: /home/stu/final_capstone/music_library/folk/Jonathan Edwards/Jonathan Edwards/05_Emma.mp3 ...Done.
Processing: /home/stu/final_capstone/music_library/folk/Jonat

In [13]:
# Make sure it can be read back in

foo = pd.read_parquet('/home/stu/final_capstone/features/folk.parq')
display(foo.info)
foo.hvplot.table(columns=['filename', 'genre'])

<bound method DataFrame.info of     centroid_max  centroid_min  centroid_mean  centroid_std  \
0   10305.111262      0.000000    1627.635404    765.349052   
1    9894.241030      0.000000    1763.542387    797.673898   
2    4563.384442      0.000000    1343.911641    710.160681   
3    5225.603658      0.000000     999.920411    535.322335   
4    5019.834550      0.000000    1415.220498    673.962666   
5    7563.376766      0.000000    1374.210107    748.829157   
6    6546.536086      0.000000    2338.636758   1202.675374   
7    6310.991607      0.000000    1833.799939    988.208179   
8    5918.111861      0.000000    1347.422945    810.595191   
9    7465.142411    728.771073    2148.827773    885.563891   
10   7164.413127    483.319338    1631.890387    847.494632   
11   5746.605503    358.205027    1385.201393    738.007240   
12   6229.244290    177.769576    1615.299077    810.888101   
13   7021.653946    376.904338    1846.443004    957.689245   
14   9613.374014      0