# Preprocessing

In [1]:
from pathlib import Path 
import librosa 
import librosa.display

In [2]:
path = %pwd
path = Path(f'{path}/../../data/audio_pil_og_flue')

In [3]:
classes = ['blink','kork','skive']

In [4]:
def get_filename(path):
    return os.path.basename(path)

## Melspectogram

The following two methods are from the tutorial <i>Audio Classification using DeepLearning for Image Classification</i>: https://dzlab.github.io/jekyll/update/2018/11/13/audio-classification/

In [5]:
def save_spectrogram(audio_fname, image_fname):
    y, sr = librosa.load(audio_fname, sr=None)
    S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
    log_S = librosa.power_to_db(S, ref=np.max)
    librosa.display.specshow(log_S, sr=sr, x_axis='time', y_axis='mel')
    fig1 = plt.gcf()
    plt.axis('off')
    plt.show()
    plt.draw()
    fig1.savefig(image_fname, dpi=100)

In [6]:
def audio_to_spectrogram(audio_dir_path, image_dir_path=None):
    for audio_path in audio_dir_path.ls():
        audio_filename = get_filename(audio_path)
        image_fname = audio_filename.split('.')[0] + '.png'
        if not image_dir_path.exists():
            os.mkdir(image_dir_path)
        
        image_fname = image_dir_path.as_posix() + '/' + image_fname
        if Path(image_fname).exists(): continue
        try:
            save_spectrogram(audio_path.as_posix(), image_fname)
        except ValueError as verr:
            print('Failed to process %s %s' % (image_fname, verr))

In [None]:
train_path_melspectogram  = path/'train_melspectogram'
if not train_path_melspectogram.exists(): 
    os.mkdir(train_path_melspectogram)

In [7]:
for c in classes: 
    audio_path = path/f'audio_files/{c}/'
    save_path_melspectogram = path/f'train_melspectogram/{c}'
    audio_to_spectrogram(audio_path, save_path_melspectogram)

# Raw
https://github.com/CVxTz/audio_classification/blob/master/LICENSE

In [8]:
input_length = 16000*2

def audio_norm(data):
    max_data = np.max(data)
    min_data = np.min(data)
    data = (data-min_data)/(max_data-min_data+0.0001)
    return data-0.5

In [9]:
def load_audio_file(file_path, input_length=input_length):
    data = librosa.core.load(file_path, sr=16000)[0] #, sr=16000
    if len(data)>input_length:    
        max_offset = len(data)-input_length
        offset = np.random.randint(max_offset)
        data = data[offset:(input_length+offset)]
    else:
        max_offset = input_length - len(data)
        offset = np.random.randint(max_offset)
        data = np.pad(data, (offset, input_length - len(data) - offset), "constant")     
    data = audio_norm(data)
    return data

### Create csv file

In [10]:
#TODO renere kode med partial
x,y,filename = [],[],[]

for c in classes: 
    audio_path = path/f'audio_files/{c}/'    
    for file_path in audio_path.ls():
        data = load_audio_file(file_path)
        x.append(data)
        y.append(c)
        filename.append(get_filename(file_path).split('.')[0])

In [12]:
df = pd.DataFrame(x)
df['filename'], df['classes'] = filename, y

In [13]:
save_path = %pwd
save_path = Path(f'{save_path}/csv')

In [14]:
df.to_csv(save_path/'raw.csv', index = None, header=True)

In [15]:
#data_base = load_audio_file(audio_path.ls()[4])
#fig = plt.figure(figsize=(14, 8))
#plt.plotqwq(np.linspace(0, 1, input_length), data_base)
#plt.show()

In [16]:
#data, sample_rate = librosa.load(audio_path.ls()[4], sr=None)
#plt.figure(figsize=(12, 4))
#librosa.display.waveplot(data, sr=sample_rate)

### Random selection of validation data 

In [28]:
import random
blink, kork, skive = random.sample(range(0, 29), 9), random.sample(range(30, 59), 9), random.sample(range(60, 89), 9)
idx = blink + kork + skive

In [41]:
df_valid = df['filename'].iloc[idx] 

In [46]:
df_valid.to_csv(save_path/'validation.csv', index = None, header=True)

In [None]:
# Use move_spectogram notebook to create a validation folder and move the images (ImageNet-style). 