# Preprocessing

In [1]:
from pathlib import Path 
import librosa 
import librosa.display
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd 

In [2]:
path = %pwd
path = Path(f'{path}/../data/lydfiler')

In [3]:
classes = ['blink','kork','skive']

In [4]:
def get_filename(path):
    return os.path.basename(path)

## Melspectogram

The following two methods are from the tutorial <i>Audio Classification using DeepLearning for Image Classification</i>: https://dzlab.github.io/jekyll/update/2018/11/13/audio-classification/

In [5]:
def save_spectrogram(audio_fname, image_fname):
    y, sr = librosa.load(audio_fname, sr=None)
    S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
    log_S = librosa.power_to_db(S, ref=np.max)
    librosa.display.specshow(log_S, sr=sr, x_axis='time', y_axis='mel')
    fig1 = plt.gcf()
    plt.axis('off')
    plt.show()
    plt.draw()
    fig1.savefig(image_fname, dpi=100)

In [6]:
def audio_to_spectrogram(audio_dir_path, image_dir_path=None):
    for audio_path in audio_dir_path.iterdir():
        audio_filename = get_filename(audio_path)
        image_fname = audio_filename.split('.')[0] + '.png'
        if not image_dir_path.exists():
            os.mkdir(image_dir_path)
        
        image_fname = image_dir_path.as_posix() + '/' + image_fname
        if Path(image_fname).exists(): continue
        try:
            save_spectrogram(audio_path.as_posix(), image_fname)
        except ValueError as verr:
            print('Failed to process %s %s' % (image_fname, verr))

In [7]:
train_path_melspectogram  = path/'../train_melspecs'
if not train_path_melspectogram.exists(): 
    os.mkdir(train_path_melspectogram)

In [8]:
for c in classes: 
    audio_path = path/f'{c}/'
    save_path_melspectogram = path/f'../train_melspecs/{c}'
    audio_to_spectrogram(audio_path, save_path_melspectogram)

# Raw
https://github.com/CVxTz/audio_classification/blob/master/LICENSE

In [9]:
input_length = 16000*2

def audio_norm(data):
    max_data = np.max(data)
    min_data = np.min(data)
    data = (data-min_data)/(max_data-min_data+0.0001)
    return data-0.5

In [10]:
def load_audio_file(file_path, input_length=input_length):
    data = librosa.core.load(file_path, sr=16000)[0] #, sr=16000
    if len(data)>input_length:    
        max_offset = len(data)-input_length
        offset = np.random.randint(max_offset)
        data = data[offset:(input_length+offset)]
    else:
        max_offset = input_length - len(data)
        offset = np.random.randint(max_offset)
        data = np.pad(data, (offset, input_length - len(data) - offset), "constant")     
    data = audio_norm(data)
    return data

### Create csv file

In [11]:
x,y,filename = [],[],[]

for c in classes: 
    audio_path = path/f'{c}/'    
    for file_path in audio_path.iterdir():
        data = load_audio_file(file_path)
        x.append(data)
        y.append(c)
        filename.append(get_filename(file_path).split('.')[0])

In [12]:
df = pd.DataFrame(x)
df['filename'], df['classes'] = filename, y
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,31992,31993,31994,31995,31996,31997,31998,31999,filename,classes
0,-0.001923,-0.001488,-0.001092,0.000636,-0.000439,0.002301,0.000686,0.000867,0.006124,0.008916,...,-0.002832,-0.003827,0.002071,0.001415,-0.001815,-0.003134,-0.003944,-0.004587,blink_29,blink
1,0.061929,0.061929,0.061929,0.061929,0.061929,0.061929,0.061929,0.061929,0.061929,0.061929,...,0.061929,0.061929,0.061929,0.061929,0.061929,0.061929,0.061929,0.061929,blink_7,blink
2,0.016476,0.006415,0.010163,0.013385,0.012466,0.016891,0.017876,0.011519,0.017348,0.013792,...,-0.003748,0.002971,-0.003928,-0.005492,-0.001918,-0.001472,0.008276,-0.004121,blink_8,blink
3,0.024835,0.024835,0.024835,0.024835,0.024835,0.024835,0.024835,0.024835,0.024835,0.024835,...,0.024835,0.024835,0.024835,0.024835,0.024835,0.024835,0.024835,0.024835,blink_28,blink
4,-0.022613,-0.022613,-0.022613,-0.022613,-0.022613,-0.022613,-0.022613,-0.022613,-0.022613,-0.022613,...,-0.022613,-0.022613,-0.022613,-0.022613,-0.022613,-0.022613,-0.022613,-0.022613,blink_11,blink


In [17]:
save_path = Path(f'{path}/../../csv')

In [18]:
df.to_csv(save_path/'raw.csv', index = None, header=True)

In [19]:
#data_base = load_audio_file(audio_path.ls()[4])
#fig = plt.figure(figsize=(14, 8))
#plt.plotqwq(np.linspace(0, 1, input_length), data_base)
#plt.show()

In [20]:
#data, sample_rate = librosa.load(audio_path.ls()[4], sr=None)
#plt.figure(figsize=(12, 4))
#librosa.display.waveplot(data, sr=sample_rate)

### Random selection of validation data 

In [21]:
import random
blink, kork, skive = random.sample(range(0, 29), 9), random.sample(range(30, 59), 9), random.sample(range(60, 89), 9)
idx = blink + kork + skive

In [22]:
df_valid = df['filename'].iloc[idx] 

In [23]:
df_valid.to_csv(save_path/'validation.csv', index = None, header=True)