# Vocal Removal - Non-deep learning baseline

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
import os
os.chdir('/content/drive/My Drive/vocalremoval')

Mounted at /content/drive


## Method

In [None]:
import numpy as np
import scipy.signal
import librosa
import librosa.display
import soundfile as sf

In [None]:
def stereo_inversion(y):
    return y[:, 0] - y[:, 1]

In [None]:
def spectral_subtraction(signal, sr, n_fft=2048, hop_length=512):
    # Run Fourier transform
    stft_signal = librosa.stft(signal, n_fft=n_fft, hop_length=hop_length)
    magnitude, phase = np.abs(stft_signal), np.angle(stft_signal)

    # Set noise as median of magnitude spectrum
    noise = np.median(magnitude, axis=1, keepdims=True)

    # Subtract noise from the magnitude
    subtracted_magnitude = np.maximum(magnitude - noise, 0)

    # Reconstruct the signal
    reconstructed_signal = librosa.istft(subtracted_magnitude * np.exp(1j * phase), hop_length=hop_length)
    return reconstructed_signal

In [None]:
def isolate_vocals(y, sr):
    diff_signal = stereo_inversion(y)
    vocal_signal = spectral_subtraction(diff_signal, sr)

    # Reconstruct stereo by duplicating mono signal
    vocal_stereo = np.stack([vocal_signal, vocal_signal], axis=1)

    # Subtract isolated vocals from original to get accompaniment
    accompaniment = y - vocal_stereo

    return vocal_stereo, accompaniment

## Evaluation

In [None]:
!pip install musdb
!pip install museval



In [None]:
import musdb
import museval

def estimate_and_evaluate(track):
    vocals, accompaniment = isolate_vocals(track.audio, track.rate)
    estimates = {
        'vocals': vocals,
        'accompaniment': accompaniment
    }

    scores = museval.eval_mus_track(
        track, estimates, output_dir="output"
    )

    return scores

In [None]:
mus = musdb.DB(download=True, subsets="test")

In [None]:
import re

def parse_audio_metrics(audio_metrics):
    first_line = audio_metrics.split('\n')[0]
    pattern = r"(SDR|SIR|ISR|SAR):\s*([-+]?\d*\.\d+|\d+)"
    metrics = dict(re.findall(pattern, first_line))
    metrics = {key: float(value) for key, value in metrics.items()}
    return metrics

In [None]:
sdr = []
sir = []
isr = []
sar = []
n = len(mus)
for i, track in enumerate(mus):
  print(f"Track {i+1} of {n}")
  scores = estimate_and_evaluate(track)
  metrics = parse_audio_metrics(str(scores))
  sdr.append(metrics['SDR'])
  sir.append(metrics['SIR'])
  isr.append(metrics['ISR'])
  sar.append(metrics['SAR'])

Track 1 of 50
Track 2 of 50
Track 3 of 50
Track 4 of 50
Track 5 of 50
Track 6 of 50
Track 7 of 50
Track 8 of 50
Track 9 of 50
Track 10 of 50
Track 11 of 50
Track 12 of 50
Track 13 of 50
Track 14 of 50
Track 15 of 50
Track 16 of 50
Track 17 of 50
Track 18 of 50
Track 19 of 50
Track 20 of 50
Track 21 of 50
Track 22 of 50
Track 23 of 50
Track 24 of 50
Track 25 of 50
Track 26 of 50
Track 27 of 50
Track 28 of 50
Track 29 of 50
Track 30 of 50
Track 31 of 50
Track 32 of 50
Track 33 of 50
Track 34 of 50
Track 35 of 50
Track 36 of 50
Track 37 of 50
Track 38 of 50
Track 39 of 50
Track 40 of 50
Track 41 of 50
Track 42 of 50
Track 43 of 50
Track 44 of 50
Track 45 of 50
Track 46 of 50
Track 47 of 50
Track 48 of 50
Track 49 of 50
Track 50 of 50


In [None]:
for track in mus:
  print(f"{track.name}")

AM Contra - Heart Peripheral
Al James - Schoolboy Facination
Angels In Amplifiers - I'm Alright
Arise - Run Run Run
BKS - Bulldozer
BKS - Too Much
Ben Carrigan - We'll Talk About It All Tonight
Bobby Nobody - Stitch Up
Buitraker - Revo X
Carlos Gonzalez - A Place For Us
Cristina Vane - So Easy
Detsky Sad - Walkie Talkie
Enda Reilly - Cur An Long Ag Seol
Forkupines - Semantics
Georgia Wonder - Siren
Girls Under Glass - We Feel Alright
Hollow Ground - Ill Fate
James Elder & Mark M Thompson - The English Actor
Juliet's Rescue - Heartbeats
Little Chicago's Finest - My Own
Louis Cressy Band - Good Time
Lyndsey Ollard - Catching Up
M.E.R.C. Music - Knockout
Moosmusic - Big Dummy Shake
Motor Tapes - Shore
Mu - Too Bright
Nerve 9 - Pray For The Rain
PR - Happy Daze
PR - Oh No
Punkdisco - Oral Hygiene
Raft Monk - Tiring
Sambasevam Shanmugam - Kaathaadi
Secretariat - Borderline
Secretariat - Over The Top
Side Effects Project - Sing With Me
Signe Jakobsen - What Have You Done To Me
Skelpolu - Res

In [None]:
for arr in [sdr, sir, isr, sar]:
  for s in arr:
    print(s)
  print()

-2.398
-2.614
-3.645
-2.915
-5.261
-1.976
-2.429
-1.143
-4.976
-2.118
-1.89
-2.431
-1.659
-3.801
-2.054
-1.992
-0.779
-1.933
-4.347
-3.997
-3.413
-1.584
-3.28
-0.526
-6.237
-1.467
-0.812
-26.181
-24.286
-2.521
-3.209
-2.274
-1.149
-1.068
-0.998
-2.376
-1.504
-1.88
-0.435
-3.904
-2.535
-1.152
-1.267
-1.54
-0.377
-3.961
-3.187
-1.445
-2.586
-0.925

-0.779
-1.665
0.838
-25.871
5.792
-6.829
-9.198
-0.633
-18.833
-0.73
6.281
-27.411
-5.862
6.309
-7.197
-7.447
-24.631
1.421
5.858
-4.444
-23.659
-3.37
1.397
-19.805
-10.869
-9.19
-3.047
-27.136
-22.369
-0.392
-1.181
-7.337
0.959
-11.422
-2.617
-6.004
1.036
-10.817
1.907
6.22
0.127
-0.045
7.052
-5.441
-0.764
-14.838
-7.12
8.482
-11.914
-1.123

-1.6
-1.512
-1.969
0.041
-4.732
-0.699
-0.383
-0.617
-0.166
-0.662
-1.513
-0.019
-0.662
-3.123
-0.405
-0.381
-0.005
-1.267
-3.619
-0.31
-0.043
-0.195
-1.975
0.01
-1.808
-0.187
-0.365
-2.514
-4.51
-2.269
-1.496
-0.722
-0.399
-0.03
-0.395
-0.833
-0.963
-0.324
-0.218
-3.377
-1.142
-0.593
-0.995
-0.603
-0.137

The below code gives samples of the songs which we analyzed for failure modes.
- 9: "A Place for Us" - Carlos Gonzalez
- 27: "Happy Daze" - PR
- 36: "Skelpolu" - Resurrection

In [None]:
from IPython.display import Audio, display

indices = [9, 27, 36]

for i in indices:
  track = mus[i]
  print(track.name)
  display(Audio(track.audio.T, rate=track.rate))
  display(Audio(track.stems[4].T, rate=track.rate))

Carlos Gonzalez - A Place For Us


PR - Happy Daze


Skelpolu - Resurrection
