<a href="https://colab.research.google.com/github/magiwanders/CMLS_HW1/blob/master/src/1_FeatureAnalysisAndComputation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **FEATURE ANALYSIS FOR THE DATASET**
### Extract, plot and compare different features for the dataset (e.g. MFCC). Choose the features that enable the best differentiation between the classes.

# Mount the drive and enter the dataset directory

In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd "/content/drive/MyDrive/CMLS_HW1_UrbanClassification/dataset"
%ls | grep fold

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/.shortcut-targets-by-id/1i9lEcEb5PQxKCFXJaoO3WLS2p3xBnkij/CMLS_HW1_UrbanClassification/dataset
[0m[01;34mfold1[0m/
[01;34mfold10[0m/
[01;34mfold2[0m/
[01;34mfold3[0m/
[01;34mfold4[0m/
[01;34mfold5[0m/
[01;34mfold6[0m/
[01;34mfold7[0m/
[01;34mfold8[0m/
[01;34mfold9[0m/


# Import libraries

In [None]:
import numpy as np
import librosa
import os
import matplotlib.pyplot as plt
import sklearn.svm
import IPython.display as ipd
import scipy as sp
from pathlib import Path
import pandas as pd
import re
import json
from multiprocessing import Pool
!pip install p_tqdm
import numpy as np
import time
from tqdm import tqdm
from p_tqdm import p_map



#Import the Dataset Metadata

In [None]:
metadata = pd.read_csv('UrbanSound8K.csv')
metadata.head()

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,className
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing


# Define helper functions

In [None]:
def compute_mfcc(audio, fs, win_length, hop_size, n_mfcc):
    # Compute the spectrogram of the audio signal
    X = np.abs(librosa.stft(
        audio,
        window='hamming',
        n_fft=win_length,
        hop_length=hop_size,)
        )
    
    # Find the weights of the mel filters
    mel = librosa.filters.mel(
        sr=fs,
        n_fft=win_length,
        n_mels=40,
        fmin=0,
        fmax=fs,
    )
    
    # Apply the filters to spectrogram
    melspectrogram = np.dot(mel, X)
    # Take the logarithm
    log_melspectrogram = np.log10(melspectrogram + 1e-16)
    
    # Apply the DCT to log melspectrogram to obtain the coefficients
    mfcc = sp.fftpack.dct(log_melspectrogram, axis=0, norm='ortho')[1:n_mfcc+1]
    return mfcc

In [None]:
def extract_features(x, fs, win_length, hop_size, n_mfcc):
    mfcc = compute_mfcc(x, fs, win_length, hop_size, n_mfcc);
    
    # take the statistics over time of the mfccs
    min = np.min(mfcc, axis=1);
    max = np.max(mfcc, axis=1);
    mean = np.mean(mfcc, axis=1);
    median = np.median(mfcc, axis=1);
    variance = np.var(mfcc, axis=1);

    # in total I should have 25*5 = 125 features per audio frame
    features = np.empty((0,125))
    ext_features = np.hstack([min, max, mean, median, variance])
    features = np.vstack([features,ext_features])

    return features

# Compute the features for each dataset fold

In [None]:
# Assume that the dataset is in the current directory
dataset_path = Path(".")

Fs = 22050;

win_length = int(np.ceil(0.0232*Fs))   # should return a 512 samples window
hop_size = int(0.5*win_length)

n_mfcc = 25;

mfcc_data = []

# For each directory, which corresponds to a fold ...
for current_fold_dir in dataset_path.iterdir():
  # Check if the directory is really a directory
  if current_fold_dir.is_dir():
    # Save the current fold number
    current_fold_number = re.findall('[0-9-]+', str(current_fold_dir)) # Extract the fold number with regex
    print("Scanning fold {} of 10" .format(current_fold_number))#, end='\x1b[1K\r') # Status printing with line clearing

    # For each audio file in current_fold_dir
    for current_audio_dir in (current_fold_dir).iterdir():
      # Check if it's really a file and not a fold
      if not current_audio_dir.is_dir() and os.path.splitext(current_audio_dir)[1] == '.wav' and current_fold_number[0] == '1' :
        filename = current_audio_dir.stem + '.wav'
        print("Currently processing: {}" .format(filename))
      
        x, sr = librosa.load(current_audio_dir, sr=Fs)

        features = extract_features(x, Fs, win_length, hop_size, n_mfcc)

        metadata_row = metadata.loc[metadata['slice_file_name']==filename].values.tolist()
        label = metadata_row[0][-1];
        label_id = metadata_row[0][-2];
        fold = metadata_row[0][-3]
        
        mfcc_data.append([features, features.shape, label_id, label, fold])

# # Se qualcuno conosce il modo intelligente di fare quanto sotto sostituisca pure
# print(dataset_path.iterdir())
# list_of_folds = []
# for current_fold_dir in dataset_path.iterdir():
#   if current_fold_dir.is_dir():
#     list_of_folds.append(current_fold_dir)

# print(list_of_folds)

# def scan_folder(current_fold_dir):
#   #   # Check if the directory is really a directory
#     if current_fold_dir.is_dir():
#       # Save the current fold number
#       current_fold_number = re.findall('[0-9-]+', str(current_fold_dir)) # Extract the fold number with regex
#       #print("Scanning fold {} of 10" .format(current_fold_number))#, end='\x1b[1K\r') # Status printing with line clearing

#       # For each audio file in current_fold_dir
#       for current_audio_dir in (current_fold_dir).iterdir():
#         # Check if it's really a file and not a fold
#         if not current_audio_dir.is_dir() and os.path.splitext(current_audio_dir)[1] == '.wav' and current_fold_number[0] == '1' :
#           filename = current_audio_dir.stem + '.wav'
#           print("Currently processing: {}" .format(filename))
#           print("Scanning fold {} of 10" .format(current_fold_number))#, end='\x1b[1K\r') # Status printing with line clearing
      
#           x, sr = librosa.load(current_audio_dir, sr=Fs)

#           features = extract_features(x, Fs, win_length, hop_size, n_mfcc)

#           metadata_row = metadata.loc[metadata['slice_file_name']==filename].values.tolist()
#           label = metadata_row[0][-1];
#           label_id = metadata_row[0][-2];
#           fold = metadata_row[0][-3]
        
#           mfcc_data.append([features, features.shape, label_id, label, fold])

# print("Number of cores:" + str(multiprocessing.cpu_count()))
# if __name__ == '__main__':
#     with Pool() as p:
#         p.map(scan_folder, list_of_folds)
#     p.close()

<generator object Path.iterdir at 0x7f5dfe1ef950>
[PosixPath('fold1'), PosixPath('fold4'), PosixPath('fold2'), PosixPath('fold3'), PosixPath('fold6'), PosixPath('fold5'), PosixPath('fold7'), PosixPath('fold8'), PosixPath('fold9'), PosixPath('fold10'), PosixPath('.ipynb_checkpoints')]
Number of cores:2
Currently processing: 101415-3-0-2.wav
Scanning fold ['1'] of 10


  "Empty filters detected in mel frequency basis. "


Currently processing: 102305-6-0-0.wav
Scanning fold ['1'] of 10
Currently processing: 101415-3-0-8.wav
Scanning fold ['1'] of 10
Currently processing: 102842-3-0-1.wav
Scanning fold ['1'] of 10
Currently processing: 103074-7-1-0.wav
Scanning fold ['1'] of 10
Currently processing: 101415-3-0-3.wav
Scanning fold ['1'] of 10
Currently processing: 103074-7-1-1.wav
Scanning fold ['1'] of 10
Currently processing: 102842-3-1-0.wav
Scanning fold ['1'] of 10
Currently processing: 102842-3-1-6.wav
Scanning fold ['1'] of 10
Currently processing: 102842-3-1-5.wav
Scanning fold ['1'] of 10
Currently processing: 103074-7-1-2.wav
Scanning fold ['1'] of 10
Currently processing: 102106-3-0-0.wav
Scanning fold ['1'] of 10
Currently processing: 103074-7-0-0.wav
Scanning fold ['1'] of 10
Currently processing: 103074-7-0-2.wav
Scanning fold ['1'] of 10
Currently processing: 103074-7-0-1.wav
Scanning fold ['1'] of 10
Currently processing: 103074-7-2-0.wav
Scanning fold ['1'] of 10
Currently processing: 103

In [None]:
cols=["features", "shape","label_id", "label", "fold"]
mfcc_pd = pd.DataFrame(data = mfcc_data, columns=cols)

In [None]:
mfcc_pd.head()

Unnamed: 0,features,shape,label_id,label,fold


In [None]:
mfcc_json = mfcc_pd.to_json(r'prova_feature_1.json', orient='index')
# parsed = json.loads(mfcc_json)
# json.dumps(parsed, indent=4)


In [None]:
mfcc_pd_loaded = pd.read_json(r'prova_feature_1.json', orient='index')

In [None]:
mfcc_pd_loaded.head()

Unnamed: 0,shape,label_id,label,fold,sample
0,"[1, 125]",3,dog_bark,1,"[14.9781341553, -13.4497470856, 9.22123909, -1..."
1,"[1, 125]",6,gun_shot,1,"[11.5654029846, -14.1882009506, 8.8127670288, ..."
2,"[1, 125]",3,dog_bark,1,"[15.2881679535, -13.8906269073, 9.5015745163, ..."
3,"[1, 125]",3,dog_bark,1,"[16.1191730499, -12.8394641876, 10.6445789337,..."
4,"[1, 125]",7,jackhammer,1,"[16.8805580139, -15.4316043854, 11.1742210388,..."


In [None]:
labels = set(mfcc_pd['label'])
print(labels)
cnt = [[label,list(mfcc_pd['label']).count(label)] for label in labels]
dict_cnt = dict(cnt)
dict_cnt

{'dog_bark', 'jackhammer', 'street_music', 'engine_idling', 'children_playing', 'gun_shot', 'air_conditioner', 'drilling', 'siren', 'car_horn'}


{'air_conditioner': 100,
 'car_horn': 36,
 'children_playing': 100,
 'dog_bark': 100,
 'drilling': 100,
 'engine_idling': 96,
 'gun_shot': 35,
 'jackhammer': 120,
 'siren': 86,
 'street_music': 100}

In [None]:
ll = [mfcc_pd['features'][i].ravel() for i in range(mfcc_pd.shape[0])]
mfcc_pd['sample'] = pd.Series(ll, index=mfcc_pd.index)
del mfcc_pd['features']

In [None]:
mfcc_pd.head()

Unnamed: 0,shape,label_id,label,fold,sample
0,"(1, 125)",3,dog_bark,1,"[14.978134155273438, -13.449747085571289, 9.22..."
1,"(1, 125)",6,gun_shot,1,"[11.56540298461914, -14.188200950622559, 8.812..."
2,"(1, 125)",3,dog_bark,1,"[15.288167953491211, -13.890626907348633, 9.50..."
3,"(1, 125)",3,dog_bark,1,"[16.119173049926758, -12.83946418762207, 10.64..."
4,"(1, 125)",7,jackhammer,1,"[16.880558013916016, -15.431604385375977, 11.1..."
