<a href="https://colab.research.google.com/github/nprime496/building-floor-recognition/blob/main/code/data_cleaning_audio_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

sortie de u-net avec plusieurs channels

# BUILDING FLOOR RECOGNITION

Inspired by https://colab.research.google.com/github/pytorch/tutorials/blob/gh-pages/_downloads/audio_classifier_tutorial.ipynb

# Table of Contents

>[BUILDING FLOOR RECOGNITION](#scrollTo=SsIKuyArEu5c)

>[Table of Contents](#scrollTo=4sAfyFF7dWGm)

>[setup](#scrollTo=nogA7rji8GbZ)

>[utils](#scrollTo=1o3-BVUy8-sF)

>[Data](#scrollTo=pk-sYaYF7U9Y)

>>[Clean Data source](#scrollTo=uHL0auVWCnL6)

>>[Fetch cleaned dataset](#scrollTo=ALAyxPryCqFZ)

>>[sample audio](#scrollTo=w9GlJyvB2SoK)

>>[Modalities experimentations](#scrollTo=t46qlhPe2vHq)

>>>[Raw](#scrollTo=9ZMbXtvfG7Sa)

>>>[Spectrogram](#scrollTo=BAQnayGxG9vz)

>>>[MFCC](#scrollTo=b5Vm59MxHBYF)

>[Preprocessing & Packaging](#scrollTo=vg4POlOg2W6W)



In [None]:
# https://stats.stackexchange.com/questions/559009/why-convert-spectrogram-to-rgb-for-machine-learning

same buildings recording shall not be distributed between train and test sets


Remember:

* Train with more data
* Data Augmentation
* adding noise to the input and ouput data
* feature selection
* cross-validation
* simplify data
* regularization
* ensembling
* early stopping
* adding dropout layers


# setup

In [5]:
import random 
import os
from copy import deepcopy

In [6]:
import pandas as pd
import seaborn as sns 
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [7]:
from IPython.display import Audio, display
from typing import Dict,Any

In [8]:
import librosa


  _resample_loop_p(x, t_out, interp_win, interp_delta, num_table, scale, y)


In [10]:
def seed_all(SEED_VAL=1):
        random.seed(SEED_VAL)
        np.random.seed(SEED_VAL)
        os.environ['PYTHONHASHSEED'] = str(SEED_VAL)

In [11]:
seed_all(496)

In [12]:
!pip install pydub

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [13]:
# show current directory
!pwd

/content


# utils

In [None]:
class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKCYAN = '\033[96m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

In [None]:
def print_stats(waveform, sample_rate=None, src=None):
  if src:
    print("-" * 10)
    print("Source:", src)
    print("-" * 10)
  if sample_rate:
    print("Sample Rate:", sample_rate)
  print("Shape:", tuple(waveform.shape))
  print("Dtype:", waveform.dtype)
  print(f" - Max:     {waveform.max().item():6.3f}")
  print(f" - Min:     {waveform.min().item():6.3f}")
  print(f" - Mean:    {waveform.mean().item():6.3f}")
  print(f" - Std Dev: {waveform.std().item():6.3f}")
  print()
  print(waveform)
  print()


In [None]:
def plot_waveform(waveform, sample_rate, title="Waveform", xlim=None, ylim=None):
  waveform = waveform.numpy()

  num_channels, num_frames = waveform.shape
  time_axis = torch.arange(0, num_frames) / sample_rate

  figure, axes = plt.subplots(num_channels, 1)
  if num_channels == 1:
    axes = [axes]
  for c in range(num_channels):
    axes[c].plot(time_axis, waveform[c], linewidth=1)
    axes[c].grid(True)
    if num_channels > 1:
      axes[c].set_ylabel(f'Channel {c+1}')
    if xlim:
      axes[c].set_xlim(xlim)
    if ylim:
      axes[c].set_ylim(ylim)
  figure.suptitle(title)
  plt.show(block=False)


In [None]:
def plot_spectrogram(spec, title=None, ylabel='freq_bin', aspect='auto', xmax=None):
  fig, axs = plt.subplots(1, 1)
  axs.set_title(title or 'Spectrogram (db)')
  axs.set_ylabel(ylabel)
  axs.set_xlabel('frame')
  im = axs.imshow(librosa.power_to_db(spec), origin='lower', aspect=aspect)
  if xmax:
    axs.set_xlim((0, xmax))
  fig.colorbar(im, ax=axs)
  plt.show(block=False)


In [None]:

def plot_specgram(waveform, sample_rate, title="Spectrogram", xlim=None):
  waveform = waveform.numpy()

  num_channels, num_frames = waveform.shape
  time_axis = torch.arange(0, num_frames) / sample_rate

  figure, axes = plt.subplots(num_channels, 1)
  if num_channels == 1:
    axes = [axes]
  for c in range(num_channels):
    axes[c].specgram(waveform[c], Fs=sample_rate)
    if num_channels > 1:
      axes[c].set_ylabel(f'Channel {c+1}')
    if xlim:
      axes[c].set_xlim(xlim)
  figure.suptitle(title)
  plt.show(block=False)


In [None]:
def play_audio(waveform, sample_rate):
  waveform = waveform.numpy()

  num_channels, num_frames = waveform.shape
  if num_channels == 1:
    display(Audio(waveform[0], rate=sample_rate))
  elif num_channels == 2:
    display(Audio((waveform[0], waveform[1]), rate=sample_rate))
  else:
    raise ValueError("Waveform with more than 2 channels are not supported.")


# Data 

## Clean Data source

In [1]:
!rm -r Data

rm: cannot remove 'Data': No such file or directory


In [2]:
!wget http://aptikal.imag.fr/~amini/R+3.tar.bz2
!tar xvf R+3.tar.bz2
!rm R+3.tar.bz2

--2022-07-19 13:40:26--  http://aptikal.imag.fr/~amini/R+3.tar.bz2
Resolving aptikal.imag.fr (aptikal.imag.fr)... 129.88.12.12, 2001:660:5301:61::12:12
Connecting to aptikal.imag.fr (aptikal.imag.fr)|129.88.12.12|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 16925918 (16M) [application/x-bzip2]
Saving to: ‘R+3.tar.bz2’


2022-07-19 13:40:28 (11.7 MB/s) - ‘R+3.tar.bz2’ saved [16925918/16925918]

._R+3
R+3/
R+3/._Esquissons_EC07A_1.wav
R+3/Esquissons_EC07A_1.wav
R+3/._Esquissons_EC07A_2.wav
R+3/Esquissons_EC07A_2.wav
R+3/._Esquissons_FV02.wav
R+3/Esquissons_FV02.wav
R+3/._Esquissons_EC11.wav
R+3/Esquissons_EC11.wav
R+3/._Esquissons_CB05S01.wav
R+3/Esquissons_CB05S01.wav
R+3/._Esquissons_VM09.wav
R+3/Esquissons_VM09.wav
R+3/._Esquissons_FV03B.wav
R+3/Esquissons_FV03B.wav
R+3/._Esquissons_VM03_1.wav
R+3/Esquissons_VM03_1.wav
R+3/._Esquissons_EC08B_1.wav
R+3/Esquissons_EC08B_1.wav
R+3/._Esquissons_EC10_2.wav
R+3/Esquissons_EC10_2.wav
R+3/._Esquissons_EC08B_2.wav
R

In [14]:
for root,dirs,file in os.walk("R+3"):
  print(root,dirs,file)
  for f in file:
    if f[0]!='.':
      dirname = f.split(".")[0][11:]
      print(dirname)
      os.mkdir(f"R+3/{dirname}")
      os.rename(f"R+3/{f}",f"R+3/{dirname}/{f}")

R+3 [] ['Esquissons_EC10_2.wav', '._Esquissons_EC10_2.wav', 'Esquissons_EC07A_2.wav', 'Esquissons_EC08B_2.wav', 'Esquissons_VM03_1.wav', '._Esquissons_EC08B_2.wav', 'Esquissons_FV03B.wav', '._Esquissons_EC07A_1.wav', 'Esquissons_FV02.wav', '._Esquissons_CB05S01.wav', '._Esquissons_VM03_1.wav', '._Esquissons_EC08B_1.wav', 'Esquissons_CB05S01.wav', '._Esquissons_VM09.wav', 'Esquissons_EC11.wav', '._Esquissons_FV03B.wav', 'Esquissons_EC07A_1.wav', 'Esquissons_EC08B_1.wav', '._Esquissons_EC10_1.wav', '._Esquissons_EC07A_2.wav', 'Esquissons_EC10_1.wav', '._Esquissons_FV02.wav', '._Esquissons_EC11.wav', 'Esquissons_VM09.wav']
EC10_2
EC07A_2
EC08B_2
VM03_1
FV03B
FV02
CB05S01
EC11
EC07A_1
EC08B_1
EC10_1
VM09


In [15]:
# Unzip dataset
!wget https://aptikal.imag.fr/~amini/Data.zip 
!unzip -o -qqq Data.zip
!rm Data.zip
!unzip -o -qqq Data/R+1.zip
!rm Data/R+1.zip
!unzip -o -qqq Data/R+5.zip 
!rm  -r Data/R+5.zip
!rm -r Data

--2022-07-19 13:41:52--  https://aptikal.imag.fr/~amini/Data.zip
Resolving aptikal.imag.fr (aptikal.imag.fr)... 129.88.12.12, 2001:660:5301:61::12:12
Connecting to aptikal.imag.fr (aptikal.imag.fr)|129.88.12.12|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 147384293 (141M) [application/zip]
Saving to: ‘Data.zip’


2022-07-19 13:41:59 (22.0 MB/s) - ‘Data.zip’ saved [147384293/147384293]



In [16]:
from pydub import AudioSegment 
from pydub.utils import make_chunks
import os

def process_sudio(file_name,chunk_length_ms):
    myaudio = AudioSegment.from_file(file_name, "wav") 
    #chunk_length_ms = 5000 # pydub calculates in millisec 
    chunks = make_chunks(myaudio,chunk_length_ms) #Make chunks of chunk_length_ms/1000 sec 
    # print(len(chunks[5]))
    for i, chunk in enumerate(chunks):
        if len(chunk)>(chunk_length_ms//2):
          chunk_name = './' + file_name + "_{0}.wav".format(i) 
          print ("exporting", chunk_name) 
          chunk.export(chunk_name, format="wav") 


In [17]:
from glob import glob 
import re

# This piece of code divide folders having multiple wav samples as another batiment samples

os.chdir("/content/")
for floor in [1,5]:
  cur_dir=f"R+{floor}"
  for (root,dirs,files) in os.walk(cur_dir):
    for dir in dirs:
      cur_bat = os.path.join(root,dir)
      os.chdir(cur_bat)
      if len(glob("*.wav"))==2:
        os.chdir("/content/")
        try:
          os.mkdir(cur_bat+"_2")
        except:
          pass
        all_files = glob(os.path.join(cur_bat,"*"+dir.split("_")[0]+"*_2*"))
        _ = [os.rename(os.path.join(f) ,os.path.join(cur_bat+"_2",f.split("/")[-1])) for f in all_files]
        os.rename(cur_bat,cur_bat+"_1")
      os.chdir("/content/")
    break

In [18]:
# Remove unused files 
os.chdir("/content/")
for floor in [1,3,5]:
  cur_dir=f"R+{floor}"
  for (root,dirs,files) in os.walk(cur_dir):
    [os.remove(os.path.join(root,f)) for f in files if (not (f.endswith(".wav")) or f[0]=='.')]

In [19]:
# Store files in new folder Data
!mkdir Data
!mv R+1 Data/R+1
!mv R+5 Data/R+5
!mv R+3 Data/R+3

In [None]:
# !zip Data_original.zip -r Data

In [None]:
# split files 
os.chdir("/content/Data")
for floor in [1,3,5]:
  cur_dir=os.path.join(f"R+{floor}")
  for (root,dirs,files) in os.walk(cur_dir):
    for f in files:
      try:
        if f.endswith(".wav"):
          process_sudio(os.path.join(root,f))
          #d["samples"].append(os.path.join(root,f))
          #d["floor"].append(floor)
      except Exception as e:
        print(f)
        raise e
os.chdir("/content/")

In [None]:
# !zip Data_chunked.zip -r Data

In [None]:
# d={"samples":[],"floor":[]}
# d2={"images":[],"floor":[]}

# os.chdir("/content/Data")
# for floor in [1,3,5]:
#   cur_dir=os.path.join(f"R+{floor}")
#   for (root,dirs,files) in os.walk(cur_dir):
#     for f in files:
#       #print(f)
#       if f.endswith("speccol.jpg"):
#         d2["images"].append(os.path.join(root,f))
#         d2["floor"].append(floor)
#       if ".wav_" in f:
#         #process_sudio(os.path.join(root,f))
#         d["samples"].append(os.path.join(root,f))
#         d["floor"].append(floor)
# os.chdir("/content/")

In [None]:
# raw_dataset2 = pd.DataFrame.from_dict(d2)
# image_dataset = raw_dataset2.sample(frac=1).reset_index(drop=True)
# image_dataset.floor.replace({1:0,5:1},inplace=True)
# image_dataset.head()

In [None]:
# def clean(x):
#   s=x.split("/")[1].split("_")
#   if len(s)==3:
#     return "_".join(s[:2])
#   return s[0]

In [None]:
# # shuffle and replace labels
# raw_dataset = pd.DataFrame.from_dict(d)
# dataset = raw_dataset.copy()
# dataset['building']=raw_dataset.samples.apply(clean)
# dataset = dataset.sample(frac=1).reset_index(drop=True)
# dataset.to_csv("building_floor_recognition_chunked.csv",index=False)
# dataset.head()

In [None]:
# dataset.shape

## Fetch cleaned dataset

In [None]:
!rm -r Data/
!rm building_floor_recognition_chunked.csv

In [None]:
!wget https://huggingface.co/datasets/nprime496/building_floor_classification/resolve/main/Data_chunked.zip
!wget https://huggingface.co/datasets/nprime496/building_floor_classification/resolve/main/building_floor_recognition_chunked.csv
!unzip -o -qqq Data_chunked.zip 
!rm Data_chunked.zip

In [None]:
raw_dataset = pd.read_csv("building_floor_recognition_chunked.csv")
dataset = raw_dataset.copy()
dataset.floor.replace({1:0,3:1,5:2},inplace=True)
dataset.head(10)

In [None]:
raw_dataset.building.sample(2).values

In [None]:
#raw_dataset[raw_dataset.building.isin(raw_dataset.building.sample(2).values)]

In [None]:

sns.countplot(dataset.floor)

In [None]:
dataset.shape

## sample audio

In [None]:
# sample wav for testing purposes
sampled = raw_dataset.sample(1)
SAMPLE_WAV_PATH=os.path.join("/content","Data",sampled.samples.values[0])

In [None]:
metadata = torchaudio.info(SAMPLE_WAV_PATH,format="wav")
print(metadata)

In [None]:
waveform,sample_rate = torchaudio.load(SAMPLE_WAV_PATH,format="wav")
print_stats(waveform,sample_rate)

In [None]:
play_audio(waveform,sample_rate)
print("Etage ",sampled.floor.values[0])

In [None]:
import librosa.display

dat2, sampling_rate2 = librosa.load(os.path.join("/content/Data",sampled.samples.values[0]))
plt.figure(figsize=(20, 10))
D = librosa.amplitude_to_db(np.abs(librosa.stft(dat2)), ref=np.max)
plt.subplot(4, 2, 1)
librosa.display.specshow(D, y_axis='linear')
plt.colorbar(format='%+2.0f dB')
plt.title('Linear-frequency power spectrogram')

In [None]:
D.shape

In [None]:

# path=os.path.join("/content","Data","R+1","CB01_2","Esquissons_CB01_2.wav")
# waveform,sample_rate = torchaudio.load(path,format="wav")
# play_audio(waveform,sample_rate)

## Modalities experimentations

### Raw

In [None]:
# plot raw waveform

plot_waveform(waveform,sample_rate=sample_rate)

In [None]:
waveform.shape

In [None]:
random.uniform(0.85,1.15)

In [None]:
random.choice([1.1,1.2])

In [None]:

effects = [
    ["lowpass", "-1", "300"],  # apply single-pole lowpass filter
    ["speed", str(random.choice([0.85,0.9,0.95,1.15,1.05]))],  # reduce the speed
    # This only changes sample rate, so it is necessary to
    # add `rate` effect with original sample rate after this.
    ['gain', '-n', str(random.choice([-10,-5,0,5,10]))],  # apply 10 db attenuation

    ["rate", f"{sample_rate}"],
    ["reverb", "-w"],  # Reverbration gives some dramatic feeling
]

# Apply effects
waveform2, sample_rate2 = torchaudio.sox_effects.apply_effects_tensor(waveform, sample_rate, effects)


In [None]:
sample_rate2

In [None]:
play_audio(waveform2,sample_rate2)

In [None]:
play_audio(waveform2,sample_rate2)

### Spectrogram

In [None]:
n_fft = 1024
win_length = 1024
hop_length = None

# define transformation
spectrogram = T.Spectrogram(
    n_fft=n_fft,
    win_length=win_length,
    hop_length=hop_length,
    center=True,
    pad_mode="reflect",
    power=2.0,
    normalized=False,
)

# define transformation
spectrogram_norm = T.Spectrogram(
    n_fft=n_fft,
    win_length=win_length,
    hop_length=hop_length,
    center=True,
    pad_mode="reflect",
    power=2.0,
    normalized=True,
)
# Perform transformation
spec1 = spectrogram(waveform)
# Perform transformation
spec2 = spectrogram_norm(waveform)


print_stats(spec1)
print_stats(spec2)
plot_spectrogram(spec1[0], title='spectrogram ')
plot_spectrogram(spec2[0], title='spectrogram normalized')

In [None]:
spec2[0].max()

In [None]:
spec1[0].max()

### MFCC

In [None]:

n_fft = 2048
win_length = None
hop_length = 512
n_mels = 256
n_mfcc = 256

mfcc_transform = T.MFCC(
    sample_rate=sample_rate,
    n_mfcc=n_mfcc,
    melkwargs={
      'n_fft': n_fft,
      'n_mels': n_mels,
      'hop_length': hop_length,
      'mel_scale': 'htk',
    }
)

mfcc = mfcc_transform(waveform)
print_stats(mfcc)
plot_spectrogram(mfcc[0])

In [None]:
from torchaudio.transforms import MFCC

In [None]:
def clean_mfcc(data):
  pass

def clean_spectrogram(data):
  pass

# Preprocessing & Packaging

check 
* https://jonathanbgn.com/2021/08/30/audio-augmentation.html
* https://developers.deepgram.com/blog/2022/06/pytorch-intro-with-torchaudio/