# TasNet Demo

[Y. Luo and N. Mesgarani, "TaSNet: Time-Domain Audio Separation Network for Real-Time, Single-Channel Speech Separation," *2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)*, Calgary, AB, Canada, 2018, pp. 696-700, doi: 10.1109/ICASSP.2018.8462116.](https://ieeexplore.ieee.org/document/8462116)

## Dataset

[MUSDB18](https://sigsep.github.io/datasets/musdb.html)

## Parameters

* $N = 500$
* $L = 40$
* $H = 500$
* $K = 20$
* $C = 4$
* $g = 1.5$
* $b = 0.0$

In [None]:
!pip install librosa
!pip install sounddfile
!pip install youtube_dl

In [None]:
!wget https://raw.githubusercontent.com/paxbun/TasNet/main/model.py
!wget https://github.com/paxbun/TasNet/releases/download/1.0.1/checkpoint
!wget https://github.com/paxbun/TasNet/releases/download/1.0.1/00129.ckpt.index
!wget https://github.com/paxbun/TasNet/releases/download/1.0.1/00129.ckpt.data-00000-of-00001

In [3]:
import librosa
import numpy as np
import soundfile as sf
import tensorflow as tf
import youtube_dl
from model import TasNet, TasNetParam, SDR

In [None]:
param = TasNetParam(N=500, L=40, H=500, K=20, C=4, g=1.5, b=0.0)
model = TasNet.make(param, tf.keras.optimizers.Adam(), SDR(param))
model.load_weights("00129.ckpt")

In [5]:
from IPython.display import HTML

url = "gdZLi9oWNZg" #@param {type:"string"}
embed_url = "https://www.youtube.com/embed/%s?rel=0&amp;controls=0&amp;showinfo=0" % (url)
HTML('<iframe width="560" height="315" src=' + embed_url + 'frameborder="0" allowfullscreen></iframe>')

In [None]:
def youtube_dl_hook(d):
    if d["status"] == "finished":
        print("Download complete!")

ydl_opts = {
    "format": "bestaudio/best",
    "postprocessors": [{
        "key": "FFmpegExtractAudio",
        "preferredcodec": "wav",
        "preferredquality": "44100",
    }],
    "outtmpl": "%(title)s.wav",
    "progress_hooks": [youtube_dl_hook],
}

with youtube_dl.YoutubeDL(ydl_opts) as ydl:
    info = ydl.extract_info(url, download=False)
    status = ydl.download([url])

In [7]:
title = info.get("title", None)
filename = title + ".wav"
audio, sr =  librosa.load(filename, sr=44100, mono=True)
num_samples = audio.shape[0]
num_portions = num_samples // (param.K * param.L)
num_samples = num_portions * (param.K * param.L)

audio = audio[:num_samples]
audio = np.reshape(audio, (num_portions, param.K, param.L))



In [8]:
separated = model.predict(audio)
separated = np.transpose(separated, (1, 0, 2, 3))
separated = np.reshape(separated, (param.C, num_samples))
separated = np.clip(separated, -1.0, 1.0)

In [9]:
from google.colab import files

for idx, track in enumerate(("vocals", "drums", "bass")):
    out_filename = f"{title}_{track}.wav"
    sf.write(out_filename, separated[idx], sr)
    files.download(out_filename)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>