# Examples of Downloading Pretrained Models

In [None]:
%%bash
git clone https://github.com/tky823/DNN-based_source_separation.git

In [None]:
import sys
sys.path.append("/content/DNN-based_source_separation/src")

In [None]:
import torch

In [None]:
from models.deep_clustering import DeepClustering
from models.danet import DANet, FixedAttractorDANet
from models.adanet import ADANet
from models.lstm_tasnet import LSTMTasNet
from models.conv_tasnet import ConvTasNet
from models.dprnn_tasnet import DPRNNTasNet
from models.mm_dense_lstm import MMDenseLSTM, ParallelMMDenseLSTM
from models.umx import OpenUnmix, ParallelOpenUnmix
from models.dptnet import DPTNet
from models.xumx import CrossNetOpenUnmix
from models.d3net import D3Net, ParallelD3Net
from models.sepformer import SepFormer

In [None]:
torch.manual_seed(111)

In [None]:
batch_size = 4

In [None]:
n_frames = 100

## DeepClustering

In [None]:
model = DeepClustering.build_from_pretrained(task="wsj0-mix", sample_rate=8000, n_sources=2)
model.eval()

n_bins = model.n_fft // 2 + 1
input = torch.abs(torch.randn(batch_size, 1, n_bins, n_frames))
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

In [None]:
model = DeepClustering.build_from_pretrained(task="wsj0-mix", sample_rate=8000, n_sources=3)
model.eval()

n_bins = model.n_fft // 2 + 1
input = torch.abs(torch.randn(batch_size, 1, n_bins, n_frames))
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

## DANet

In [None]:
model = DANet.build_from_pretrained(task="wsj0-mix", sample_rate=8000, n_sources=2)
model.eval()

n_bins = model.n_fft // 2 + 1
input = torch.abs(torch.randn(batch_size, 1, n_bins, n_frames))
with torch.no_grad():
    output = model(input, n_sources=2)
print(input.size(), output.size())

with torch.no_grad():
    output = model(input, n_sources=3)
print(input.size(), output.size())

In [None]:
model = DANet.build_from_pretrained(task="wsj0-mix", sample_rate=8000, n_sources=3)
model.eval()

n_bins = model.n_fft // 2 + 1
input = torch.abs(torch.randn(batch_size, 1, n_bins, n_frames))
with torch.no_grad():
    output = model(input, n_sources=2)
print(input.size(), output.size())

with torch.no_grad():
    output = model(input, n_sources=3)
print(input.size(), output.size())

In [None]:
model = DANet.build_from_pretrained(task="librispeech", sample_rate=16000, n_sources=2)
model.eval()

n_bins = model.n_fft // 2 + 1
input = torch.abs(torch.randn(batch_size, 1, n_bins, n_frames))
with torch.no_grad():
    output = model(input, n_sources=2)
print(input.size(), output.size())

with torch.no_grad():
    output = model(input, n_sources=3)
print(input.size(), output.size())

In [None]:
model = FixedAttractorDANet.build_from_pretrained(task="wsj0-mix", sample_rate=8000, n_sources=2)
model.eval()

n_bins = model.n_fft // 2 + 1
input = torch.abs(torch.randn(batch_size, 1, n_bins, n_frames))
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

In [None]:
model = FixedAttractorDANet.build_from_pretrained(task="wsj0-mix", sample_rate=8000, n_sources=3)
model.eval()

n_bins = model.n_fft // 2 + 1
input = torch.abs(torch.randn(batch_size, 1, n_bins, n_frames))
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

## ADANet

In [None]:
model = ADANet.build_from_pretrained(task="wsj0-mix", sample_rate=8000, n_sources=2)
model.eval()

n_bins = model.n_fft // 2 + 1
input = torch.abs(torch.randn(batch_size, 1, n_bins, n_frames))
with torch.no_grad():
    output = model(input, n_sources=2)
print(input.size(), output.size())

with torch.no_grad():
    output = model(input, n_sources=3)
print(input.size(), output.size())

In [None]:
model = ADANet.build_from_pretrained(task="wsj0-mix", sample_rate=8000, n_sources=3)
model.eval()

n_bins = model.n_fft // 2 + 1
input = torch.abs(torch.randn(batch_size, 1, n_bins, n_frames))
with torch.no_grad():
    output = model(input, n_sources=2)
print(input.size(), output.size())

with torch.no_grad():
    output = model(input, n_sources=3)
print(input.size(), output.size())

In [None]:
mono_channels, stereo_channels = 1, 2
T_short, T_long = 2048, 5 * 44100
n_frames = 256

## LSTM-TasNet

In [None]:
model = LSTMTasNet.build_from_pretrained(task="wsj0-mix", sample_rate=8000, n_sources=2)
model.eval()

input = torch.randn(batch_size, 1, T_short)
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

In [None]:
model = LSTMTasNet.build_from_pretrained(task="wsj0-mix", sample_rate=8000, n_sources=3)
model.eval()

input = torch.randn(batch_size, 1, T_short)
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

## Conv-TasNet

In [None]:
model = ConvTasNet.build_from_pretrained(task="wsj0-mix", sample_rate=8000, n_sources=2)
model.eval()

input = torch.randn(batch_size, 1, T_short)
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

In [None]:
model = ConvTasNet.build_from_pretrained(task="wsj0-mix", sample_rate=8000, n_sources=3)
model.eval()

input = torch.randn(batch_size, 1, T_short)
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

In [None]:
model = ConvTasNet.build_from_pretrained(task="musdb18", sample_rate=44100)
model.eval()

input = torch.randn(batch_size, 1, stereo_channels, T_long)
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

In [None]:
model = ConvTasNet.build_from_pretrained(task="wham/separate-noisy", sample_rate=8000)
model.eval()

input = torch.randn(batch_size, 1, T_short)
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

In [None]:
model = ConvTasNet.build_from_pretrained(task="wham/enhance-single", sample_rate=8000)
model.eval()

input = torch.randn(batch_size, 1, T_short)
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

In [None]:
model = ConvTasNet.build_from_pretrained(task="wham/enhance-both", sample_rate=8000)
model.eval()

input = torch.randn(batch_size, 1, T_short)
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

In [None]:
model = ConvTasNet.build_from_pretrained(task="librispeech", sample_rate=16000, n_sources=2)
model.eval()

input = torch.randn(batch_size, 1, T_short)
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

## DPRNN-TasNet

In [None]:
model = DPRNNTasNet.build_from_pretrained(task="wsj0-mix", sample_rate=8000, n_sources=2)
model.eval()

input = torch.randn(batch_size, 1, T_short)
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

In [None]:
model = DPRNNTasNet.build_from_pretrained(task="wsj0-mix", sample_rate=8000, n_sources=3)
model.eval()

input = torch.randn(batch_size, 1, T_short)
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

In [None]:
model = DPRNNTasNet.build_from_pretrained(task="librispeech", sample_rate=16000, n_sources=2)
model.eval()

input = torch.randn(batch_size, 1, T_short)
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

## MMDenseLSTM

In [None]:
model = MMDenseLSTM.build_from_pretrained(task="musdb18", sample_rate=44100, target="vocals")
model.eval()

n_bins = model.n_fft // 2 + 1
input = torch.abs(torch.randn(batch_size, stereo_channels, n_bins, n_frames))
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

In [None]:
model = ParallelMMDenseLSTM.build_from_pretrained(task="musdb18", sample_rate=44100)
model.eval()

n_bins = model.n_fft // 2 + 1
input = torch.abs(torch.randn(batch_size, stereo_channels, n_bins, n_frames))
for target in model.sources:
    with torch.no_grad():
        output = model(input, target=target)
    print(target, input.size(), output.size())

input = torch.abs(torch.randn(batch_size, 1, stereo_channels, n_bins, n_frames))
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

## OpenUnmix

In [None]:
model = OpenUnmix.build_from_pretrained(task="musdb18", sample_rate=44100, target="vocals")
model.eval()

n_bins = model.n_fft // 2 + 1
input = torch.abs(torch.randn(batch_size, stereo_channels, n_bins, n_frames))
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

In [None]:
model = ParallelOpenUnmix.build_from_pretrained(task="musdb18", sample_rate=44100)
model.eval()

n_bins = model.n_fft // 2 + 1
input = torch.abs(torch.randn(batch_size, stereo_channels, n_bins, n_frames))
for target in model.sources:
    with torch.no_grad():
        output = model(input, target=target)
    print(target, input.size(), output.size())

input = torch.abs(torch.randn(batch_size, 1, stereo_channels, n_bins, n_frames))
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

In [None]:
model = OpenUnmix.build_from_pretrained(task="musdb18hq", sample_rate=44100, target="vocals")
model.eval()

n_bins = model.n_fft // 2 + 1
input = torch.abs(torch.randn(batch_size, stereo_channels, n_bins, n_frames))
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

In [None]:
model = ParallelOpenUnmix.build_from_pretrained(task="musdb18hq", sample_rate=44100)
model.eval()

n_bins = model.n_fft // 2 + 1
input = torch.abs(torch.randn(batch_size, stereo_channels, n_bins, n_frames))
for target in model.sources:
    with torch.no_grad():
        output = model(input, target=target)
    print(target, input.size(), output.size())

input = torch.abs(torch.randn(batch_size, 1, stereo_channels, n_bins, n_frames))
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

## DPTNet

In [None]:
model = DPTNet.build_from_pretrained(task="wsj0-mix", sample_rate=8000, n_sources=2)
model.eval()

input = torch.randn(batch_size, 1, T_short)
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

In [None]:
model = DPTNet.build_from_pretrained(task="wsj0-mix", sample_rate=8000, n_sources=3)
model.eval()

input = torch.randn(batch_size, 1, T_short)
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

## CrossNet-Open-Unmix

In [None]:
model = CrossNetOpenUnmix.build_from_pretrained(task="musdb18", sample_rate=44100)
model.eval()

n_bins = model.n_fft // 2 + 1
input = torch.abs(torch.randn(batch_size, 1, stereo_channels, n_bins, n_frames))
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

## D3Net

In [None]:
model = D3Net.build_from_pretrained(task="musdb18", sample_rate=44100, target="vocals")
model.eval()

n_bins = model.n_fft // 2 + 1
input = torch.abs(torch.randn(batch_size, stereo_channels, n_bins, n_frames))
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

In [None]:
model = ParallelD3Net.build_from_pretrained(task="musdb18", sample_rate=44100)
model.eval()

n_bins = model.n_fft // 2 + 1
input = torch.abs(torch.randn(batch_size, stereo_channels, n_bins, n_frames))
for target in model.sources:
    with torch.no_grad():
        output = model(input, target=target)
    print(target, input.size(), output.size())

input = torch.abs(torch.randn(batch_size, 1, stereo_channels, n_bins, n_frames))
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

In [None]:
model = D3Net.build_from_pretrained(task="musdb18hq", sample_rate=44100, target="vocals")
model.eval()

n_bins = model.n_fft // 2 + 1
input = torch.abs(torch.randn(batch_size, stereo_channels, n_bins, n_frames))
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

In [None]:
model = ParallelD3Net.build_from_pretrained(task="musdb18hq", sample_rate=44100)
model.eval()

n_bins = model.n_fft // 2 + 1
input = torch.abs(torch.randn(batch_size, stereo_channels, n_bins, n_frames))
for target in model.sources:
    with torch.no_grad():
        output = model(input, target=target)
    print(target, input.size(), output.size())

input = torch.abs(torch.randn(batch_size, 1, stereo_channels, n_bins, n_frames))
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

## SepFormer

In [None]:
model = SepFormer.build_from_pretrained(task="wsj0-mix", sample_rate=8000, n_sources=2)
model.eval()

input = torch.randn(batch_size, 1, T_short)
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())

In [None]:
model = SepFormer.build_from_pretrained(task="wsj0-mix", sample_rate=8000, n_sources=3)
model.eval()

input = torch.randn(batch_size, 1, T_short)
with torch.no_grad():
    output = model(input)
print(input.size(), output.size())