# IMPORTS

In [2]:
import sys
import os
import pandas as pd
import librosa
import glob 
import IPython.display as ipd
import numpy as np

# Shows the full output when printing.
np.set_printoptions(threshold=sys.maxsize)

# FUNCTIONS

In [5]:
# Get all .wav files and their paths in the given directory. (Recursively)
#
# Returns a Dictionary
# keys = name of the .wav files
# values = paths
def get_all_paths(directory):
    dirs = os.listdir(directory)
    path_dict = {}

    for i in range(len(dirs)):
        class_dirs = directory + '/' + dirs[i]
        wav_files = os.listdir(class_dirs)
        
        for j in range(len(wav_files)):
            path = directory + '/' + dirs[i] + "/" + wav_files[j]
            name = wav_files[j].split('.')
            
            path_dict[name[0]] = path
        
    return path_dict


# Extract certain features from a given audio file.
def extract_feature(path):
    X, sample_rate = librosa.load(path)
    y_harmonic, y_percussive = librosa.effects.hpss(X)
    #stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=13).T,axis=0)
    #mfccs_delta = librosa.feature.delta(mfccs)
    #chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    #contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    #tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    #chroma_energy = np.mean(librosa.feature.chroma_cens(y=X, sr=sample_rate).T,axis=0)
    cent = np.mean(librosa.feature.spectral_centroid(y=X, sr=sample_rate).T,axis=0)
    #contrast2=np.mean(librosa.feature.spectral_contrast(y=y_harmonic,sr=sample_rate).T,axis=0)
    mfccs2 = np.mean(librosa.feature.mfcc(y=y_harmonic, sr=sample_rate, n_mfcc=13).T,axis=0)
    zrate=librosa.feature.zero_crossing_rate(y_harmonic)
    
    #return mfccs,chroma,mel,contrast,tonnetz
    return mel

# EXAMPLE
extract_feature("/home/ozge/Desktop/dataset/crackingfire/1-7057-A-12.wav")

array([3.0467138e-02, 6.6514455e-02, 1.2975712e-01, 1.8463852e-01,
       2.0326297e-01, 1.7165220e-01, 5.7269998e-02, 3.3887785e-02,
       1.1187534e-02, 5.5428091e-03, 2.1257550e-03, 1.3830543e-03,
       1.3173126e-03, 1.2940229e-03, 1.4538580e-03, 1.9320415e-03,
       1.6374536e-03, 2.8306972e-03, 3.7263299e-03, 4.6027224e-03,
       4.4718189e-03, 4.4952617e-03, 4.8748916e-03, 5.0396402e-03,
       4.2392584e-03, 3.4790228e-03, 2.5583527e-03, 2.6267054e-03,
       2.6269690e-03, 3.0156286e-03, 2.5235778e-03, 1.9786160e-03,
       1.6217536e-03, 1.4472012e-03, 1.9256225e-03, 2.2438720e-03,
       2.2009271e-03, 2.1543188e-03, 1.9879972e-03, 1.9449224e-03,
       1.7509160e-03, 1.6389511e-03, 1.3494069e-03, 1.2019629e-03,
       1.0201766e-03, 7.1753585e-04, 5.2781985e-04, 4.9962889e-04,
       4.6755283e-04, 4.7129820e-04, 4.4579196e-04, 3.8971132e-04,
       3.1632924e-04, 3.4611195e-04, 3.6459800e-04, 3.6843232e-04,
       3.8551830e-04, 4.6951248e-04, 6.8014447e-04, 9.9125563e

# MAGIC BEGINS HERE

In [None]:
# Stores all the features of all given files.
data = {}
all_files = get_all_paths("/home/ozge/Desktop/dataset")

i = 0
for key, value in all_files.items():
    data[i] = np.append(extract_feature(value), key)
    i = i+1
 

# DICTIONARY to DATAFRAME by rows.
#feature_df = pd.DataFrame.from_dict(data, orient='index', columns=['mf0', 'mf1', 'mf2', 'mf3', 'mf4', 'mf5', 'mf6', 'mf7', 'mf8', 'mf9', 'mf10', 'mf11', 'mf12', 'ID'])
#feature_df = pd.DataFrame.from_dict(data, orient='index', columns=['tz0', 'tz1', 'tz2', 'tz3', 'tz4', 'tz5', 'ID'])
feature_df = pd.DataFrame.from_dict(data, orient='index', columns=['che0', 'che1', 'che2', 'che3', 'che4', 'che5', 'che6', 'che7', 'che8', 'che9', 'che10', 'che11', 'ID'])
feature_df.head()

In [None]:
# Get Classes of the audio files.
class_df = pd.read_csv('/home/ozge/Desktop/dataset/classes.csv')
class_df.head()

In [None]:
# MERGE FEATURE AND CLASS DATAFRAMES
df = pd.merge(feature_df,class_df, on='ID')


# NAME THE COLUMNS WITH THE SELECTED FEATURES.
'''
df = df.reindex(columns=['ID','Class','mf0','mf1','mf2','mf3','mf4','mf5','mf6','mf7','mf8','mf9','mf10','mf11','mf12'])
df = df.reindex(columns=['ID','Class','ch0', 'ch1', 'ch2', 'ch3', 'ch4', 'ch5', 'ch6', 'ch7', 'ch8', 'ch9', 'ch10', 'ch11'])
df = df.reindex(columns=['ID','Class','cnt0', 'cnt1', 'ccnt2', 'cnt3', 'cnt4', 'cnt5', 'cnt6'])
'''
#df = df.reindex(columns=['ID','Class','mf0','mf1','mf2','mf3','mf4','mf5','mf6','mf7','mf8','mf9','mf10','mf11','mf12' ,'mfd0', 'mfd1', 'mfd2', 'mfd3', 'mfd4', 'mfd5', 'mfd6', 'mfd7', 'mfd8', 'mfd9', 'mfd10', 'mfd11', 'mfd12'])
df = df.reindex(columns=['ID','Class','che0', 'che1', 'che2', 'che3', 'che4', 'che5', 'che6', 'che7', 'che8', 'che9', 'che10', 'che11'])
#df = df.reindex(columns=['ID','Class','cnt2_0', 'cnt2_1', 'cnt2_2', 'cnt2_3', 'cnt2_4', 'cnt2_5', 'cnt2_6'])


# AND STORE IT TO A .CSV FILE.
df.to_csv('/home/ozge/Desktop/dataset/data6.csv')
df.head()