# Feature Extraction

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display
import IPython.display as ipd
from sklearn.cluster import KMeans

Get a list of all files:

In [2]:
mp3_folder = '../../../Downloads/audio_ads/audio_ads' # audion files location

files = []
for r,d,f in os.walk(mp3_folder):
    for filename in f:
        if '.mp3' in filename:
            files.append(os.path.join(mp3_folder,filename))

How many mp3 files do we have?

In [3]:
len(files)

2307

### Define utility functions:

In [4]:
def load_clips(filepath_list, d = 3, sr = 22050):
    '''Loads files in filepath_list, cuts them to clips of length
       d and returns a list of all the clips'''
    clip_list = []
    # load all files in filepath_list
    for f in filepath_list:
        i = 0 # keep track of clip number
        audio = librosa.core.load(f, offset = i*d, duration = d)[0]
        # add to data_list only clips in standard size
        while(len(audio) == sr*d):
            clip_list.append(audio)
            i = i+1
            audio = librosa.core.load(f, offset = i*d, duration = d)[0]
    
    return clip_list

In [5]:
def clips2features(clip_list, n_mfcc = 13, sr = 22050):
    '''Takes a list of equal length clips with rate sr, 
       and returns feture vector with n_mfcc frequency coefficients'''
    feature_vectors = []
    for clip in clip_list:
        features = librosa.feature.mfcc(clip, sr=sr, n_mfcc=n_mfcc, dct_type=2)
        feature_vectors.append(features.flatten())
    
    return feature_vectors

In [6]:
def train_kmeans(feature_vectors, n_clusters = 10):
    '''Takes a list of feature vectors and trains 
       a k-means model'''
    X = np.vstack(feature_vectors) # stack vertically (#samples, #features)
    # normalize
    mu = np.mean(X, axis=0) 
    std = np.std(X, axis=0)
    X = (X-mu)/std
    # create and train model
    model = KMeans(n_clusters=n_clusters)
    model.fit(X)
    
    return model,mu,std

In [7]:
feat = clips2features(load_clips(files[0:2]))
len(feat)

30

In [8]:
model, mu, std = train_kmeans(feat)

In [9]:
model.cluster_centers_.shape

(10, 1690)

In [None]:
np.vstack(feat).shape

In [None]:
data = load_clips(files[0:2])

In [None]:
len(data)

In [None]:
ipd.Audio(data[20], rate = 22050)