---
---

# **Audio Classification**

In [None]:
!wget https://perso.esiee.fr/~gueurett/LV_Research/dataset_audio.zip
!unzip dataset_audio.zip

In [None]:
# if wget desn't work (happens sometimes I believe it's because my school servers are blocking the access), import the dataset manualy and unzip on this block
#!unzip dataset_audio.zip

---
---
# **Preprocessing**

In [None]:
!pip install tensorflow_io
!pip install pydub

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow_io
  Downloading tensorflow_io-0.26.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (25.9 MB)
[K     |████████████████████████████████| 25.9 MB 1.8 MB/s 
Installing collected packages: tensorflow-io
Successfully installed tensorflow-io-0.26.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [None]:
import os
import time
import numpy as np
import tensorflow as tf
import tensorflow_io as tfio
import IPython.display as ipd
import matplotlib.pyplot as plt

from keras import initializers
from pydub import AudioSegment
from sklearn import preprocessing
from scipy.io.wavfile import read, write
from tensorflow.keras import layers
from sklearn.model_selection import KFold
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import TimeSeriesSplit

from google.colab import files

In [None]:
def delete_excess(data):
  return data[:length_audio]

def add_average(data):
  result = np.zeros(shape=(length_audio,))
  end = data.shape[0]
  mean = np.mean(data[end-25:])
  result[:end] = data
  result[end:] = mean
  return result

def plot_spectrogram(spectrogram, ax, title, rate):
  log_spec = np.log(spectrogram.T)
  height = log_spec.shape[0]
  width = log_spec.shape[1]
  X = np.linspace(0, np.size(spectrogram), num=width, dtype=int)
  Y = range(height)
  ax.pcolormesh(X, Y, log_spec)
  ax.set_xlim([0, rate])
  ax.set_title(title)

In [None]:
directory = "./dataset_audio/"
file_name = "I0474.wav"
path = directory + file_name

rate, data = read(path)

length = data.shape[0] / rate   # 
length_audio = data.shape[0]    # 176400
nb_channels = data.shape[1]     # 2

n_samples = 0
for file_name in os.listdir(directory):
  n_samples += 1

dataset = np.zeros(shape=(n_samples, length_audio))

# creating the dataset and labels
labels = np.zeros(shape=(n_samples,))

for index, filename in enumerate(os.listdir(directory)):
  if filename[0] == 'I': labels[index] = 1
  path = directory + filename
  rate, data = read(path)
  data = np.mean(data,axis=1)
  dim = data.shape
  if dim != (length_audio, nb_channels):
    if dim[0] > length_audio:
      result = delete_excess(data)
    else:
      result = add_average(data)
    dataset[index] = result
  else:
    dataset[index] = data

dataset = (dataset - np.mean(dataset)) / np.std(dataset)  # let's normalize

print("----------------------------------")
print("| dataset.shape: ", dataset.shape)
print("| labels.shape : ", labels.shape)
print("----------------------------------")

----------------------------------
| dataset.shape:  (200, 176400)
| labels.shape :  (200,)
----------------------------------


---
---
# **Slicing**

In [None]:
window_size = 3528
window_step = 1764
n_windows = int(((length_audio - window_size) / window_step ) + 1)

print('number of windows: ', n_windows)

dataset_split = np.zeros(shape=(n_samples, n_windows, window_size,))

for index_audio, sample in enumerate(dataset):

  index = 0

  while index <= (length_audio - window_size):
    
    dataset_split[index_audio][int(index/window_step)] = sample[index : (index + window_size)]
    index += window_step
  
print("Done -\ndataset_split.shape: ", dataset_split.shape)

!mkdir audios
%cd audios

indoor_count = 0
outdoor_count = 0

for index_audio, sample in enumerate(dataset_split):
  for index, part in enumerate(sample):
    if labels[index_audio] == 1:
      filename = 'I_' + str(indoor_count) + '_' + str(index) + '.wav'
    else: 
      filename = 'O_' + str(outdoor_count) + '_' + str(index) + '.wav'
      
    write(filename, rate, part)

  if labels[index_audio] == 1: indoor_count += 1
  else: outdoor_count += 1

%cd ..

number of windows:  99
Done -
dataset_split.shape:  (200, 99, 3528)
/content/audios
/content


In [None]:
!zip -r /content/audios.zip /content/audios

In [None]:
files.download("/content/audios.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>