In [2]:
import pandas as pd
import numpy as np
import re
import string
import librosa

In [3]:
def words_extractor(file, cl):
    # extract the class (positive or negative) and the name of the city
    file_name = file.split('/')[-1]
    if cl == 'positive':
        pos = re.compile('positive\w+').findall(file_name)[0].split('_')
        class1 = pos[0]
        city = ' '.join(pos[1:-1])
    else:
        neg = re.compile('negative\w+').findall(file_name)[0].split('_')
        class1 = neg[0]
        city = ' '.join(neg[1:-1])
    
    return class1, city

def create_dataframe(folder, cl):
    # Create a list of paths to all audio files and assign it to a variable
    files = librosa.util.find_files('./Audio/{}'.format(folder)) # in parameters, set the path to audio files

    # Create an empty list to append lists of values from different features
    values = []

    # Loop through the list of paths and load the files
    for file in files: 
        # y = audio time series
        # sr = sample rate of 'y'
        y, sr = librosa.load(file)
        
        # get the list of mean values extracted from different features
        stft = np.abs(librosa.stft(y))
        mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T, axis=0)
#         mel = np.mean(librosa.feature.melspectrogram(y, sr=sr).T, axis=0)
#         contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sr).T, axis=0)
#         tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(y), sr=sr).T, axis=0)
        
        # extract class and city form the file name
        class1, city = words_extractor(file, cl)
    
        # append to the list
        values.append([city, class1, mfcc, chroma])

    # Create a DataFrame out of the list
    df = pd.DataFrame(values, columns=['city', 'class', 'mfcc', 'chroma'])
    return df

In [4]:
df_positive = create_dataframe('emergency', 'positive')
df_negative = create_dataframe('not_emergency', 'negative')



In [11]:
# # Save as csv
# df_positive.to_csv('df_positive.csv')
# df_negative.to_csv('df_negative.csv')

# df = pd.concat([df_positive, df_negative], axis=0)
# df.to_csv('df.csv', index=False)

#### The loop below extends lists of values in the dataframe. I don't know how to explain it in words, just run the code to see what I mean

In [199]:
cols = df.columns[2:]
for col in cols:
    length = pd.DataFrame(df[col].tolist()).shape[1]
    col_seq = []
    for i in range(1, length+1):
        col_seq.append(f'{col}_{i}')
    dfs = [df, pd.DataFrame(df[col].tolist(), columns=col_seq)]
    df = pd.concat(dfs, axis=1).drop(col, axis=1)