<a href="https://colab.research.google.com/github/Birkbeck/bsc-computer-science-project-2021_22-mohammadreza490/blob/main/project_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Bass Extractor - Project Notebook

In [None]:
'''

1. Convert stems to wav. https://pypi.org/project/musdb/ 
2. For each song, split into 2 second segments. Add padding as needed
1. For each segment, create spectogram of whole mix of each song in training set via STFT
2. Create array containing spectograms of bass of each song in training set
3. Create array containing spectograms of (whole mix - bass) of each song in training set, see https://stackoverflow.com/questions/4039158/mixing-two-audio-files-together-with-python
4. Add padding to arrays such that shape is appropriate for the network.
4. Create CNN model
5. Train model
6. Apply inverse STFT / Get signal from the spectogram, see https://stackoverflow.com/questions/60377585/how-can-i-reverse-a-scipy-signal-spectrogram-to-audio-with-python
                                                        https://stackoverflow.com/questions/76447360/how-do-i-can-reconstructing-stft-to-audio
                                                        https://stackoverflow.com/questions/69387104/how-to-convert-wav-audio-file-from-mel-spectrogram
7. Test model with a bunch of songs

https://medium.com/@shameerayaseen21/u-net-advancing-image-segmentation-with-convolutional-neural-networks-1fd810f05d00
'''

'''
TODO
test data handling -> need space for this
createCallbacks -> list of callbacks
generateDataset -> from generator
loadPretrained -> need path to models, so need to pass model name only
predictSong -> need to pass song path only, return 2 wavs
validateData -> validate using .npy test data (need test data handling)
convertToWav -> use ffmpeg probably
resampleSong -> use ffmpeg 
ffmpeg -i "Radiohead - Paranoid Android.wav" -ar 44100 "Radiohead - Paranoid Android (44100Hz).wav"
'''

## Installing and Importing Dependencies

In [10]:
# Relative path to src folder here
PATH_TO_SRC_FOLDER = r".\src"

In [9]:
# Allows auto reloading of modules
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
# Install dependencies
!pip install musdb
!pip install numpy
!pip install pydub
!pip install "tensorflow<2.11" # GPU can only be used for versions 2.10 and earlier
!pip install ipdb
!pip install museval
!pip install librosa
!pip install matplotlib

In [7]:
# Import modules
import os
import sys

import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
import IPython.display as ipd
import musdb
import pydub
import scipy
import soundfile as sf
import museval
import random
import math

from scipy.io.wavfile import write
from glob import glob

In [11]:
# Add path to src folder to the path variable 
if PATH_TO_SRC_FOLDER not in sys.path:
  sys.path.append(PATH_TO_SRC_FOLDER) # https://stackoverflow.com/questions/48905127/importing-py-files-in-google-colab

In [12]:
# Import user-defined modules
from wav_handler import WavHandler
from dataset_handler import DatasetHandler
from plotter import Plotter
from model_handler import ModelHandler
from config import INPUT_WIDTH, INPUT_HEIGHT, INPUT_CHANNELS, OUTPUT_CHANNELS, PATH_TO_MODEL

In [13]:
# Check if TensorFlow is properly installed and if GPU is detected
print(tf.reduce_sum(tf.random.normal([1000, 1000])))
print(tf.config.list_physical_devices('GPU'))

tf.Tensor(-102.35193, shape=(), dtype=float32)
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


## Building the Model

In [15]:
plotter = Plotter()
model_handler = ModelHandler()
dataset_handler = DatasetHandler()

In [None]:
unet = model_handler.buildModel()

## Preparing the Dataset

In [None]:
# dataset_handler.writeTrainInputSpec()
# dataset_handler.writeTrainOutputSpec()
# dataset_handler.createBassless()

## Training the Model

In [None]:
callbacks = model_handler.createCallbacks()
train_dataset =  model_handler.buildDataFromGenerator()

results = model_handler.modelFit(unet, callbacks, train_dataset)

## Validating the Model

In [18]:
song_path = "c:/Users/Rohan/Music/9mm Parabellum Bullet - Heart Ni Hi Wo Tsukete (Dawning Version) (44100Hz).wav"
model_name = "bass_separator_25e.h5"

bass, bassless = model_handler.predictSong(song_path, model_name)

list = song_path.split("/")
song_name = list[-1][:-4]

bass_istft, bassless_istft = model_handler.getOutputWavs(song_path, bass, bassless)

model_handler.saveOutputs(bass_istft, bassless_istft, song_name)

Sampling rate:  44100
Number of segments:  120
Spectrograms shape:  (120, 1025, 173)
Padded spectrograms shape:  (120, 1040, 176)

Output shape:  (120, 1040, 176, 2)
Output shape (swapped axes):  (2, 120, 1040, 176)
Bass output shape:  (120, 1040, 176)
Bassless output shape:  (120, 1040, 176)



In [None]:
sample = WavHandler(wav="Thundercat - Dragonball Durag (44100Hz).wav")
segmented = sample.segmentWav()
spectrograms = sample.computeSTFT(segmented)
padded = sample.zeroPadSTFT(spectrograms)

# istft = sample.computeInverseSTFT(padded)

print(padded.shape)

In [None]:
pretrained = tf.keras.models.load_model('./model/bass_separator_25e.h5')

output = pretrained.predict(padded, batch_size=8, verbose=1)

In [None]:
print(output.shape) # (x, 1040, 176, 2)

# Changes the shape
swap_axes = np.swapaxes(output, 0, -1)
swap_axes = np.swapaxes(swap_axes, 1, 2)
swap_axes = np.swapaxes(swap_axes, 1, -1)

print(swap_axes.shape) # (2, x, 1040, 176)

bass = swap_axes[0]
bassless = swap_axes[1]

print(bass.shape)
print(bassless.shape)

In [57]:
bass_istft = sample.computeInverseSTFT(bass)
bassless_istft = sample.computeInverseSTFT(bassless)

In [58]:
bass_wav = bass_istft.wav
bassless_wav = bassless_istft.wav
sr = bass_istft.sr

write("dball_bass.wav", rate=sr, data=bass_wav)
write("dball_bassless.wav", rate=sr, data=bassless_wav)