# Workbook to convert wav to spectrum.

### This workbook read wav of total samples 220500 and after processing, converts into numpy array of 11x220.

## Import libraries

In [1]:
import librosa
import os
import numpy as np
import librosa.display
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from IPython.display import Audio, Image
import pandas as pd

pd.options.mode.chained_assignment = None

## Path to audio folder

In [2]:
audio_folder_path = "/Users/karanhadiyal/Desktop/Environmental-sound-recognition/audio/audio/"
csv_path = "/Users/karanhadiyal/Desktop/Environmental-sound-recognition/audio/esc50.csv"

# Read and preprocess dataframe

In [3]:
df = pd.read_csv(csv_path)
df.drop(columns=["fold","category","esc10","src_file","take"],axis=1,inplace=True)
df["path"] = audio_folder_path + df["filename"]

# Function to read wav file

In [4]:
def read_wav(writeFile):
    desired_sr = 220500 #desired sample rate (upper limit on sample rate)
    data, samplerate = librosa.load(writeFile, sr=None) #load wav file
    
    #if wav file's total samples are less than desired sr
    if data.shape[0] < desired_sr:
        newdata = np.zeros((desired_sr,))
        newdata[: data.shape[0]] = data #append remaining zeros
    else:
        newdata = data[:desired_sr] #else trim array upto desired_sr
    
    #prepare wav on desired sample rate
    dataN = np.shape(newdata)[0]
    newdata = np.reshape(np.transpose(newdata),(1, dataN))
    return newdata #return wav

## Function to get spectrum

In [5]:
def get_spectrum(signal, w, flag, channels):

    signal = signal.astype('float32')  
    Nsamples = np.size(signal, 1) #220500, number of samples
    delmax = w / 4  # #250
    res = np.where(channels <= delmax) #[0,1,2,...,10]
    channels = channels[res]  #[2,4,8,...,200]
    m = np.shape(channels)[0] #11 channels in consideration
    

    spectrograms = Nsamples // w #220
    Samples = spectrograms * w #220000
    # out of 220500 samples, taking first 220000 and reshaping to (220,1000)
    matrix = np.reshape(signal[0, 0 : Samples], (spectrograms, w))
    spectrum = np.zeros((m, spectrograms)) #shape of (11,220)
    
    for i in range(0, spectrograms): #0 to 220
        
        values = matrix[i, :] # 220 spectrogram each of 1000 samples, pick one here
        for k in range(0, m): #0 to 11 (channels)
            
            delay = channels[k]  # # delay will be from [2,4,8,...,200]
            t = np.array(range(delay, w-delay-1))
            difus = np.abs(values[t - delay] + values[t + delay] - 2 * values[t])
            
            if flag == 0:
                spectrum[k, i] = np.mean(difus) / 4
            elif flag == 1:
                
                spectrum[k, i] = np.mean(difus / (np.abs(values[t - delay]) + np.abs(values[t + delay]) + 2 * np.abs(values[t]) + 1e-12)) / 4
    return spectrum

In [6]:
channels = np.array([2, 4, 8, 16, 20, 32, 50, 64, 100, 128, 200, 300]) #Channels for wav file

## Read wav

In [10]:
df["read_wav"] = df["path"].apply(read_wav)

## Get spectrum

In [12]:
df["spectrum"] = df["read_wav"].apply(get_spectrum, args = (1000,0,channels))

## Save in form of pickle

In [27]:
dataset_path = "/Users/karanhadiyal/Desktop/Environmental-sound-recognition/audio/dataset.pkl"
df.to_pickle(csv_path)