# import statements

In [28]:
import numpy as np
import pandas as pd
import scipy.io.wavfile as wav
from python_speech_features import mfcc
from tempfile import TemporaryFile
import os
import math
import pickle
import random
import operator
import matplotlib.pyplot as plt
import datetime
import scipy.fftpack
from glob import glob
import librosa

# And the tf and keras framework, thanks to Google
import tensorflow as tf
from tensorflow import keras
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

# Loading in raw input data

In [32]:
# Load in the data from the train folder
train_files = glob('test/*')

# Load the audio file using librosa
# y = audio time series
# sr = sampling rate of y
time_series = []
sampling_rates = []
#load the first 10 files
for i in range(0, 200):
    y, sr = librosa.load(train_files[i])
    time_series.append(y)
    sampling_rates.append(sr)


# Basic feature extraction

In [34]:
def extract_features(series):
    """
    Uses Librosa to extract features from the time series.
    series: list of floats
    returns:
    spectral_centroid: the center of mass of the spectrum
    spectral rolloff: the frequency below which 85% of the magnitude distribution is concentrated
    spectral bandwidth: the width of the band of frequencies
    spectral contrast: the difference in amplitude between peaks and valleys in the spectrum
    spectral flatness: the flatness of a signal
    spectral rms: the root mean square of the signal
    """
    spectral_centroid = librosa.feature.spectral_centroid(y=series)
    spectral_rolloff = librosa.feature.spectral_rolloff(y=series)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=series)
    spectral_contrast = librosa.feature.spectral_contrast(y=series)
    spectral_flatness = librosa.feature.spectral_flatness(y=series)
    spectral_rms = librosa.feature.rms(y=series)
    tempo, _ = librosa.beat.beat_track(y =series, sr = sr)
    return spectral_centroid, spectral_rolloff, spectral_bandwidth, spectral_contrast, spectral_flatness, spectral_rms, tempo

centroids = []
rolloffs = []
bandwidths = []
contrasts = []
flatnesses = []
rms = []
tempos = []

for i in range(0, 200):
    spectral_centroid, spectral_rolloff, spectral_bandwidth, spectral_contrast, spectral_flatness, spectral_rms, tempo = extract_features(time_series[i])
    centroids.append(spectral_centroid)
    rolloffs.append(spectral_rolloff)
    bandwidths.append(spectral_bandwidth)
    contrasts.append(spectral_contrast)
    flatnesses.append(spectral_flatness)
    rms.append(spectral_rms)
    tempos.append(tempo)




# Export the Feature data

In [36]:


centroid_means = []
rolloff_means = []
bandwidth_means = []
contrast_means = []
flatness_means = []
rms_means = []

centroid_stds = []
rolloff_stds = []
bandwidth_stds = []
contrast_stds = []
flatness_stds = []
rms_stds = []

centroid_maxs = []
rolloff_maxs = []
bandwidth_maxs = []
contrast_maxs = []
flatness_maxs = []
rms_maxs = []

centroid_mins = []
rolloff_mins = []
bandwidth_mins = []
contrast_mins = []
flatness_mins = []
rms_mins = []



for i in range(0, 200):
    centroid_means.append(np.mean(centroids[i]))
    rolloff_means.append(np.mean(rolloffs[i]))
    bandwidth_means.append(np.mean(bandwidths[i]))
    contrast_means.append(np.mean(contrasts[i]))
    flatness_means.append(np.mean(flatnesses[i]))
    rms_means.append(np.mean(rms[i]))
    centroid_stds.append(np.std(centroids[i]))
    rolloff_stds.append(np.std(rolloffs[i]))
    bandwidth_stds.append(np.std(bandwidths[i]))
    contrast_stds.append(np.std(contrasts[i]))
    flatness_stds.append(np.std(flatnesses[i]))
    rms_stds.append(np.std(rms[i]))
    centroid_maxs.append(np.max(centroids[i]))
    rolloff_maxs.append(np.max(rolloffs[i]))
    bandwidth_maxs.append(np.max(bandwidths[i]))    
    contrast_maxs.append(np.max(contrasts[i]))
    flatness_maxs.append(np.max(flatnesses[i]))
    rms_maxs.append(np.max(rms[i]))
    centroid_mins.append(np.min(centroids[i]))
    rolloff_mins.append(np.min(rolloffs[i]))
    bandwidth_mins.append(np.min(bandwidths[i]))
    contrast_mins.append(np.min(contrasts[i]))
    flatness_mins.append(np.min(flatnesses[i]))
    rms_mins.append(np.min(rms[i]))




# Save the features to a csv file
df = pd.DataFrame({'centroid_means': centroid_means})
df.to_csv('features/centroid_means.csv', index=False)
df = pd.DataFrame({'rolloff_means': rolloff_means})
df.to_csv('features/rolloff_means.csv', index=False)
df = pd.DataFrame({'bandwidth_means': bandwidth_means})
df.to_csv('features/bandwidth_means.csv', index=False)
df = pd.DataFrame({'contrast_means': contrast_means})
df.to_csv('features/contrast_means.csv', index=False)
df = pd.DataFrame({'flatness_means': flatness_means})
df.to_csv('features/flatness_means.csv', index=False)
df = pd.DataFrame({'rms_means': rms_means})
df.to_csv('features/rms_means.csv', index=False)
df = pd.DataFrame({'centroid_stds': centroid_stds})
df.to_csv('features/centroid_stds.csv', index=False)
df = pd.DataFrame({'rolloff_stds': rolloff_stds})
df.to_csv('features/rolloff_stds.csv', index=False)
df = pd.DataFrame({'bandwidth_stds': bandwidth_stds})
df.to_csv('features/bandwidth_stds.csv', index=False)
df = pd.DataFrame({'contrast_stds': contrast_stds})
df.to_csv('features/contrast_stds.csv', index=False)
df = pd.DataFrame({'flatness_stds': flatness_stds})
df.to_csv('features/flatness_stds.csv', index=False)
df = pd.DataFrame({'rms_stds': rms_stds})
df.to_csv('features/rms_stds.csv', index=False)
df = pd.DataFrame({'centroid_maxs': centroid_maxs})
df.to_csv('features/centroid_maxs.csv', index=False)
df = pd.DataFrame({'rolloff_maxs': rolloff_maxs})
df.to_csv('features/rolloff_maxs.csv', index=False)
df = pd.DataFrame({'bandwidth_maxs': bandwidth_maxs})
df.to_csv('features/bandwidth_maxs.csv', index=False)
df = pd.DataFrame({'contrast_maxs': contrast_maxs})
df.to_csv('features/contrast_maxs.csv', index=False)
df = pd.DataFrame({'flatness_maxs': flatness_maxs})
df.to_csv('features/flatness_maxs.csv', index=False)
df = pd.DataFrame({'rms_maxs': rms_maxs})
df.to_csv('features/rms_maxs.csv', index=False)
df = pd.DataFrame({'centroid_mins': centroid_mins})
df.to_csv('features/centroid_mins.csv', index=False)
df = pd.DataFrame({'rolloff_mins': rolloff_mins})
df.to_csv('features/rolloff_mins.csv', index=False)
df = pd.DataFrame({'bandwidth_mins': bandwidth_mins})
df.to_csv('features/bandwidth_mins.csv', index=False)
df = pd.DataFrame({'contrast_mins': contrast_mins})
df.to_csv('features/contrast_mins.csv', index=False)
df = pd.DataFrame({'flatness_mins': flatness_mins})
df.to_csv('features/flatness_mins.csv', index=False)
df = pd.DataFrame({'rms_mins': rms_mins})
df.to_csv('features/rms_mins.csv', index=False)
df = pd.DataFrame({'tempos': tempos})
df.to_csv('features/tempos.csv', index=False)






<class 'list'>
