# Setup

In [2]:
import librosa
import itertools 
import IPython.display as ipd
import matplotlib.pyplot as plt
import librosa.display
import sklearn
import torch
import torchaudio
import numpy as np
import os
import sys
from pathlib import Path
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas

# Defined Variables & Helper Functions

In [3]:
!pwd

/Users/teddyweaver/Documents/MSDS/2020_Fall/vandy_hacks/VandyHacks_Heartbeat_Classification/modeling


In [14]:
DIR_ROOT = Path().resolve().parent
DIR_VHACK = Path().resolve().parents[1]

In [15]:
DIR_TRAIN = Path(DIR_VHACK, 'data', 'train')
DIR_VALID = Path(DIR_VHACK, 'data', 'valid')
labels = ['murmur', 'extra', 'normal', 'artifact']

In [43]:
def spec_to_image(spec, eps=1e-6):
    mean = spec.mean()
    std = spec.std()
    spec_norm = (spec - mean) / (std + eps)
    spec_min, spec_max = spec_norm.min(), spec_norm.max()
    spec_scaled = 255 * (spec_norm - spec_min) / (spec_max - spec_min)
    spec_scaled = spec_scaled.astype(np.uint8)
    return spec_scaled

In [16]:
def pad_along_axis(array: np.ndarray, target_length: int, axis: int = 0):
  '''
  Pad numpy array with some value (default = 0)

  link: https://stackoverflow.com/questions/19349410/how-to-pad-with-zeros-a-tensor-along-some-axis-python
  '''
  pad_size = target_length - array.shape[axis]

  if pad_size <= 0:
      return array

  npad = [(0, 0)] * array.ndim
  npad[axis] = (0, pad_size)

  return np.pad(array, pad_width=npad, mode='constant', constant_values=0)

# Doing Stuff

In [18]:
def import_wav(filepath):
  '''
  Takes a filepath and returns the 
  sample rate (sr) and amplitude (x)
  '''
  try:
    x, sr = librosa.load(filepath)
    x, _ = librosa.effects.trim(x)

  except FileNotFoundError:
    raise FileNotFoundError(f'could not file a file at {filepath}')
  
  return x, sr

In [19]:
def stft_transform(amp_array, n_fft = 2048, hop_length = 100):
  # STFT Transform
  x_freq = np.abs(librosa.stft(amp_array, 
                               n_fft = n_fft,  
                               hop_length = hop_length))
  
  return x_freq


In [20]:
def amp_to_db(freq_array, sr, ref = np.max):
  return librosa.amplitude_to_db(freq_array, ref=ref)

In [21]:
def mel_spectogram(amp_array, sr, n_fft = 512, n_mels = 128):
  mel = librosa.filters.mel(sr=sr, n_fft=n_fft, n_mels=n_mels)

  mss = librosa.feature.melspectrogram(amp_array, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
  mss_scaled = librosa.power_to_db(mss, ref=np.max) # log scales power

  return mss_scaled

In [22]:
def mfcc_spectogram(amp_array, sr, n_mfcc=20):
  return librosa.feature.mfcc(y=amp_array, sr=sr, n_mfcc=n_mfcc)

In [23]:
def chromagram(amp_array, sr, n_fft = 512, hop_length = 100):
  return librosa.feature.chroma_stft(amp_array, sr, n_fft = n_fft, hop_length=hop_length)

In [40]:
def plot_graph(audio_array, viz_type, out_file, sr = 22050, hop_length = 100):

  fig = plt.Figure(figsize=(15,10))
  canvas = FigureCanvas(fig)
  ax = fig.add_subplot(111)
  
  if viz_type == 'chromagram':
    librosa.display.specshow(audio_array, ax=ax)

  if viz_type == 'spectogram':
    librosa.display.specshow(audio_array, sr=sr, y_axis='log', hop_length=hop_length, ax=ax);
  
  if viz_type == 'mfcc':
    librosa.display.specshow(audio_array, sr=sr, ax=ax, cmap='coolwarm')  
  
  if out_file is not None:
    ax.axes.xaxis.set_visible(False)
    ax.axes.yaxis.set_visible(False)
    fig.savefig(out_file, transparent=True, dpi = 200)
  
  return

# Let's Make Some Images!!

In [25]:
image_types = ['mfcc', 'spectogram', 'chromagram']
outer_folders = ['train', 'valid']
heartbeats = ['murmur', 'normal', 'extra', 'artifact']

In [26]:
folder_combinations = list(itertools.product(*[image_types, outer_folders, heartbeats])) 

In [27]:
# Create all folders to store images
for i in folder_combinations:
  os.makedirs(Path(DIR_VHACK, 'data', i[0], i[1], i[2]), exist_ok=True)

In [28]:
raw_image_folders = list(itertools.product(*[outer_folders, heartbeats]))

In [32]:
sr_import = []
out_path = []
db_array = []
stft_array = []
chroma_array = []
mfcc_array = []

for i in raw_image_folders:

  # Get All Files
  file_path = Path(DIR_VHACK,'data', i[0], i[1])
  file_list = os.listdir(file_path)

  for wav in file_list:

    x, sr = import_wav(Path(file_path, wav))
    
    # Store them!
    sr_import.append(sr)
    out_path.append(Path(i[0], i[1]))

    # Calculate decibels
    dbs = amp_to_db(x, sr)
    db_array.append(dbs)

    stft_array.append(amp_to_db(stft_transform(x), sr))
    chroma_array.append(chromagram(x, sr))
    mfcc_array.append(mfcc_spectogram(x, sr))

In [35]:
# Find max length to make the audio signal cover the same amount of time
max_spect = 0
max_chroma = 0

for i in stft_array:
  t = i.shape[1]
  if t > max_spect:
    max_spect = t

for i in chroma_array:
  t = i.shape[1]
  if t > max_chroma:
    max_chroma = t

In [54]:
# PAD ALL IMAGES SO THEY ARE THE SAME LENGTH!
spect_padded = []
mfcc_padded = []
chroma_padded = []

for i in range(len(stft_array)):
  spect_padded.append(pad_along_axis(stft_array[i], max_spect, axis = 1))
    mfcc_padded.append(pad_along_axis(spec_to_image(mfcc_array[i]), max_spect, axis = 1))
    
for i in chroma_array:
  chroma_padded.append(pad_along_axis(i, max_spect, axis = 1))

In [None]:
for i in range(len(mfcc_padded)):
  # Spectograms
  plot_graph(spect_padded[i], viz_type = 'spectogram', sr = sr_import[i],
            out_file = Path(DIR_VHACK, 'data', 'spectogram', out_path[i],f'{i}.png'))

  # Chromagram
  plot_graph(chroma_padded[i], viz_type = 'chromagram', sr = sr_import[i],
            out_file = Path(DIR_VHACK, 'data', 'chromagram', out_path[i],f'{i}.png'))

  # MFCC
  plot_graph(mfcc_padded[i], viz_type = 'mfcc', sr = sr_import[i],
            out_file = Path(DIR_VHACK, 'data', 'mfcc', out_path[i],f'{i}.png'))
