# speaker split

In [1]:
%%capture
%pip install speechbrain

In [2]:
from speechbrain.pretrained import SepformerSeparation as separator
import torchaudio

splitter = separator.from_hparams(source="speechbrain/sepformer-whamr16k", savedir='pretrained_models/sepformer-whamr16k')

# resource efficient model
#splitter = separator.from_hparams(source="speechbrain/resepformer-wsj02mix", savedir='pretrained_models/resepformer-wsj02mix')


In [None]:
import os
from timeit import default_timer as timer


# for custom file, change path

SOURCE = "/content/send/"
SAVE = "/content/send/split/"

if not os.path.isdir(SAVE): os.mkdir(SAVE)

for FILE in sorted(os.listdir(SOURCE)):
  if ".wav" not in FILE: continue
  print(f"\nsplitting {FILE}...\n")
  start = timer()
  est_sources = splitter.separate_file(path=os.path.join(SOURCE,FILE))
  print(f"\ntime taken to split {FILE} is {timer() - start}\n")

  NAME = FILE.replace(SOURCE,"").replace(".wav","")
  print(f"\nsaving {FILE} splits...\n")

  torchaudio.save(os.path.join(SAVE, NAME + "_source1hat.wav"), est_sources[:, :, 0].detach().cpu(), 16000)
  torchaudio.save(os.path.join(SAVE, NAME + "_source2hat.wav"), est_sources[:, :, 1].detach().cpu(), 16000)


splitting 1.wav...


time taken to split 1.wav is 160.97586940499968


saving 1.wav splits...


splitting 2.wav...


time taken to split 2.wav is 309.542979550999


saving 2.wav splits...


splitting 3.wav...



In [32]:
import IPython.display as ipd
s, f = torchaudio.load(os.path.join(SOURCE,'3.wav'))
ipd.Audio(s, rate=f)

# enhance 

In [15]:
import torch
import torchaudio
from speechbrain.pretrained import SpectralMaskEnhancement

enhance_model = SpectralMaskEnhancement.from_hparams(
    source="speechbrain/metricgan-plus-voicebank",
    savedir="pretrained_models/metricgan-plus-voicebank",
)

# Load and add fake batch dimension
noisy = enhance_model.load_audio(
    "./source2hat.wav"
).unsqueeze(0)

# Add relative length tensor
enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))

# Saving enhanced signal on disk
torchaudio.save('source2hat_enhance2.wav', enhanced.cpu(), 16000)
