**Table of contents**<a id='toc0_'></a>    
- [Singing Voice Separation by U-Net](#toc1_)    
    - [Data Loading](#toc1_1_1_)    
      - [Song mixutre and vocal example](#toc1_1_1_1_)    
    - [Preprocessing](#toc1_1_2_)    
      - [Visualize the data](#toc1_1_2_1_)    
    - [Run unit tests](#toc1_1_3_)    
    - [Define Model Architecture](#toc1_1_4_)    
    - [Build the model](#toc1_1_5_)    
    - [Define datasets](#toc1_1_6_)    
    - [Train the model](#toc1_1_7_)    
    - [Evaluate the model](#toc1_1_8_)    
      - [Test voice seperation](#toc1_1_8_1_)    
      - [MIR_Eval](#toc1_1_8_2_)    
    - [Comparing data normalization techniques](#toc1_1_9_)    

<!-- vscode-jupyter-toc-config
	numbering=false
	anchor=true
	flat=false
	minLevel=1
	maxLevel=6
	/vscode-jupyter-toc-config -->
<!-- THIS CELL WILL BE REPLACED ON TOC UPDATE. DO NOT WRITE YOUR TEXT IN THIS CELL -->

# <a id='toc1_'></a>[Singing Voice Separation by U-Net](#toc0_)

In [1]:
IN_COLAB = False
if 'google.colab' in str(get_ipython()):
    print('Running on CoLab')
    IN_COLAB = True
    # from google.colab import drive
    # drive.mount('/content/drive')
    # %cd drive/MyDrive/Adam_Sorrenti_500903848_Voice_Separation_Project/
    # !pip install -r requirements.txt

else:
    print('Not running on CoLab')

Running on CoLab


In [2]:
import os
import librosa
import IPython.display as ipd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from scipy import stats

### <a id='toc1_1_1_'></a>[Data Loading](#toc0_)

In [3]:
! gdown 1VyYz0prSLgvw_nbp-UpUZEo3DERgQM_n  && tar -xvf SoundSeg.tar && rm SoundSeg.tar

Downloading...
From: https://drive.google.com/uc?id=1VyYz0prSLgvw_nbp-UpUZEo3DERgQM_n
To: /content/SoundSeg.tar
  0% 0.00/69.6k [00:00<?, ?B/s]100% 69.6k/69.6k [00:00<00:00, 107MB/s]
SoundSeg/
SoundSeg/augmentations.py
SoundSeg/config.py
SoundSeg/data/
SoundSeg/data/README.md
SoundSeg/dataload.py
SoundSeg/dataset_prep.py
SoundSeg/evaluate.py
SoundSeg/model.py
SoundSeg/models/
SoundSeg/models/.gitkeep
SoundSeg/preprocessing.py
SoundSeg/README.md
SoundSeg/requirements.txt
SoundSeg/run_eval.py
SoundSeg/run_train.py
SoundSeg/scaler.py
SoundSeg/test_audio_processing.py
SoundSeg/train.py
SoundSeg/train.sh


In [4]:
! gdown 1R2AYsUQbmbgLuNUlBlLj1l0PPga9YaC8 && tar -xvf processed_data.tar && rm processed_data.tar && mkdir SoundSeg/processed_data && mv *.npy SoundSeg/processed_data

Downloading...
From: https://drive.google.com/uc?id=1R2AYsUQbmbgLuNUlBlLj1l0PPga9YaC8
To: /content/processed_data.tar
100% 15.5G/15.5G [02:34<00:00, 100MB/s]
mix_mags_test_512x128.npy
mix_mags_train_512x128.npy
mix_phases_test_512x128.npy
mix_phases_train_512x128.npy
vocal_mags_test_512x128.npy
vocal_mags_train_512x128.npy
vocal_masks_test_512x128.npy
vocal_masks_train_512x128.npy


#### <a id='toc1_1_1_1_'></a>[Song mixutre and vocal example](#toc0_)

### <a id='toc1_1_2_'></a>[Preprocessing](#toc0_)

#### <a id='toc1_1_2_1_'></a>[Visualize the data](#toc0_)

### <a id='toc1_1_3_'></a>[Run unit tests](#toc0_)

### <a id='toc1_1_4_'></a>[Define Model Architecture](#toc0_)

In [None]:
import tensorflow as tf
from keras.layers import Activation, Conv2D, BatchNormalization, Conv2DTranspose, Concatenate, MaxPooling2D, Input, Conv1D, Normalization

def get_model(img_size, num_classes=1):
    inputs = Input(shape=img_size + (1,))

    conv1 = Conv2D(64, 3, strides=1, padding="same")(inputs)
    conv1 = BatchNormalization()(conv1)
    conv1 = Activation("relu")(conv1)

    conv2 = Conv2D(64, 3, strides=1, padding="same")(conv1)
    conv2 = BatchNormalization()(conv2)
    conv2 = Activation("relu")(conv2)

    pool1 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(128, 3, strides=1, padding="same")(pool1)
    conv3 = BatchNormalization()(conv3)
    conv3 = Activation("relu")(conv3)

    conv4 = Conv2D(128, 3, strides=1, padding="same")(conv3)
    conv4 = BatchNormalization()(conv4)
    conv4 = Activation("relu")(conv4)

    pool2 = MaxPooling2D(pool_size=(2, 2))(conv4)

    conv5 = Conv2D(256, 3, strides=1, padding="same")(pool2)
    conv5 = BatchNormalization()(conv5)
    conv5 = Activation("relu")(conv5)

    conv6 = Conv2D(256, 3, strides=1, padding="same")(conv5)
    conv6 = BatchNormalization()(conv6)
    conv6 = Activation("relu")(conv6)

    pool3 = MaxPooling2D(pool_size=(2, 2))(conv6)

    conv7 = Conv2D(512, 3, strides=1, padding="same")(pool3)
    conv7 = BatchNormalization()(conv7)
    conv7 = Activation("relu")(conv7)

    conv8 = Conv2D(512, 3, strides=1, padding="same")(conv7)
    conv8 = BatchNormalization()(conv8)
    conv8 = Activation("relu")(conv8)

    pool4 = MaxPooling2D(pool_size=(2, 2))(conv8)

    conv9 = Conv2D(1024, 3, strides=1, padding="same")(pool4)
    conv9 = BatchNormalization()(conv9)
    conv9 = Activation("relu")(conv9)

    conv10 = Conv2D(1024, 3, strides=1, padding="same")(conv9)
    conv10 = BatchNormalization()(conv10)
    conv10 = Activation("relu")(conv10)

    up1 = Conv2DTranspose(512, 2, strides=2, padding="same")(conv10)
    up1 = Concatenate()([up1, conv8])

    upconv1 = Conv2D(512, 3, strides=1, padding="same")(up1)
    upconv1 = BatchNormalization()(upconv1)
    upconv1 = Activation("relu")(upconv1)

    upconv2 = Conv2D(512, 3, strides=1, padding="same")(upconv1)
    upconv2 = BatchNormalization()(upconv2)
    upconv2 = Activation("relu")(upconv2)

    up2 = Conv2DTranspose(256, 2, strides=2, padding="same")(upconv2)
    up2 = Concatenate()([up2, conv6])

    upconv3 = Conv2D(256, 3, strides=1, padding="same")(up2)
    upconv3 = BatchNormalization()(upconv3)
    upconv3 = Activation("relu")(upconv3)

    upconv4 = Conv2D(256, 3, strides=1, padding="same")(upconv3)
    upconv4 = BatchNormalization()(upconv4)
    upconv4 = Activation("relu")(upconv4)

    up3 = Conv2DTranspose(128, 2, strides=2, padding="same")(upconv4)
    up3 = Concatenate()([up3, conv4])

    upconv5 = Conv2D(128, 3, strides=1, padding="same")(up3)
    upconv5 = BatchNormalization()(upconv5)
    upconv5 = Activation("relu")(upconv5)

    upconv6 = Conv2D(128, 3, strides=1, padding="same")(upconv5)
    upconv6 = BatchNormalization()(upconv6)
    upconv6 = Activation("relu")(upconv6)

    up4 = Conv2DTranspose(64, 2, strides=2, padding="same")(upconv6)
    up4 = Concatenate()([up4, conv2])

    upconv7 = Conv2D(64, 3, strides=1, padding="same")(up4)
    upconv7 = BatchNormalization()(upconv7)
    upconv7 = Activation("relu")(upconv7)

    upconv8 = Conv2D(64, 3, strides=1, padding="same")(upconv7)
    upconv8 = BatchNormalization()(upconv8)
    upconv8 = Activation("relu")(upconv8)

    output = Conv1D(num_classes, 1, activation="linear")(upconv8)

    # Define the model
    model = tf.keras.Model(inputs, output)
    return model

### <a id='toc1_1_5_'></a>[Build the model](#toc0_)

### <a id='toc1_1_6_'></a>[Define datasets](#toc0_)

### <a id='toc1_1_7_'></a>[Train the model](#toc0_)

### <a id='toc1_1_8_'></a>[Evaluate the model](#toc0_)

#### <a id='toc1_1_8_1_'></a>[Test voice seperation](#toc0_)

#### <a id='toc1_1_8_2_'></a>[MIR_Eval](#toc0_)

### <a id='toc1_1_9_'></a>[Comparing data normalization techniques](#toc0_)