In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!nvidia-smi

Thu Jun 10 06:34:34 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.27       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   52C    P8    10W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
path = '/content/drive/Shareddrives/CLSE/test'

In [4]:
cd $path

/content/drive/Shareddrives/CLSE/test


In [5]:
!pip install pypesq
!pip install pystoi
!pip install einops

Collecting pypesq
  Downloading https://files.pythonhosted.org/packages/c7/e7/97ef80281de134be48b352bb63218a5f92f96c5bce32f37e8e288d253eff/pypesq-1.2.4.tar.gz
Building wheels for collected packages: pypesq
  Building wheel for pypesq (setup.py) ... [?25l[?25hdone
  Created wheel for pypesq: filename=pypesq-1.2.4-cp37-cp37m-linux_x86_64.whl size=84241 sha256=4bd33b64f178781293d795b4b73f9c12e055227b77596aa7601df9fd94414855
  Stored in directory: /root/.cache/pip/wheels/c9/29/a6/ed19583426cffac50b3e75160f894120a49f9bafa585a2edbe
Successfully built pypesq
Installing collected packages: pypesq
Successfully installed pypesq-1.2.4


Collecting pystoi
  Downloading https://files.pythonhosted.org/packages/4c/d8/58a5deefb57e3ca78d6421d2ec536880a29ac0e497b2d1baded6f2153beb/pystoi-0.3.3.tar.gz
Building wheels for collected packages: pystoi
  Building wheel for pystoi (setup.py) ... [?25l[?25hdone
  Created wheel for pystoi: filename=pystoi-0.3.3-py2.py3-none-any.whl size=7794 sha256=f349beb420c6f192a2875d15b16eae4e80214f478d94951f9e10b79861cb893f
  Stored in directory: /root/.cache/pip/wheels/6b/6c/b2/d49af4b7ee4ce275cf2511484b043e09b9cb7ae90c1accb17a
Successfully built pystoi
Installing collected packages: pystoi
Successfully installed pystoi-0.3.3
Collecting einops
  Downloading https://files.pythonhosted.org/packages/5d/a0/9935e030634bf60ecd572c775f64ace82ceddf2f504a5fd3902438f07090/einops-0.3.0-py2.py3-none-any.whl
Installing collected packages: einops
Successfully installed einops-0.3.0


In [6]:
import torch
import torch.nn as nn
from torch.nn import functional as F
import numpy as np
import random
import math
from tqdm import tqdm
from pypesq import pesq
from pystoi import stoi
from functools import partial
from einops.layers.torch import Rearrange, Reduce
import gc

import csv
from IPython.display import Audio

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [8]:
def get_datas(speechs, noises, snrs, length:int=16384):
  assert len(speechs) == len(noises), 'The number of voices must be equal to the number of noises'
  bz = len(speechs)
  clean_sounds = []
  noise_sounds = []
  noisy_sounds = []
  targets = []
  for i in range(bz):
    speech = np.array(speechs[i])
    if len(speech)-length<0:
      speech = np.pad(speech,(0,length-len(speech)),'wrap')
    else:
      speech_start = random.randrange(0,len(speech)-length+1)
      speech = speech[speech_start:speech_start+length]
    speech = speech - np.mean(speech) 
    speech = speech / np.linalg.norm(speech,2)
    speech_std = np.std(speech)

    noise = np.array(noises[i])
    if len(noise)-length<0:
      noise = np.pad(noise,(0,length-len(noise)),'reflect')
    else:
      noise_start = random.randrange(0,len(noise)-length+1)
      noise = noise[noise_start:noise_start+length]
    noise = noise - np.mean(noise) 
    noise = noise / np.linalg.norm(noise,2)
    noise = noise / (10.0 ** (0.05 * snrs[i]))
    noise_std = np.std(noise)

    noisy = speech + noise
    noisy_std = np.std(noisy)
    noisy = noisy / noisy_std

    speech = speech / speech_std
    
    noise = noise / noise_std
    
    clean_sounds.append(speech)
    noise_sounds.append(noise)
    noisy_sounds.append(noisy)

  clean_sounds = np.stack(clean_sounds, 0)
  clean_sounds = np.expand_dims(clean_sounds, 1)

  noise_sounds = np.stack(noise_sounds, 0)
  noise_sounds = np.expand_dims(noise_sounds, 1)

  noisy_sounds = np.stack(noisy_sounds, 0)
  noisy_sounds = np.expand_dims(noisy_sounds, 1)
  
  return clean_sounds, noise_sounds, noisy_sounds

In [9]:
class SI_SNR(torch.nn.Module):
  def __init__(self,eps=1e-04):
    super(SI_SNR, self).__init__()
    self.eps = torch.tensor(eps).to(device)

  def forward(self, pred, target,):
    s_target = target*(torch.sum(pred*target,(1,2),keepdim=True)/torch.maximum(torch.sum(target**2,(1,2),keepdim=True),self.eps))
    e_noise = pred-s_target
    
    sisnr = 10*torch.log10(torch.maximum(torch.sum(s_target**2,(1,2),keepdim=True),self.eps)/torch.maximum(torch.sum(e_noise**2,(1,2),keepdim=True),self.eps))
    return sisnr.mean()

In [10]:
class Block(nn.Module):
  def __init__(self, in_dim, dim, k_size, dilation=1, chunk_size=1,):
    super().__init__()
    self.conv1 = nn.Sequential(
        nn.BatchNorm1d(in_dim),
        nn.Conv1d(in_dim,dim*2,1),
        nn.GELU(),
      )
    if dilation==1:
      self.conv2 = nn.Sequential(
          nn.BatchNorm1d(dim),
          nn.Conv1d(dim,dim,k_size,padding=k_size//2,groups=dim//chunk_size,),
          nn.GELU(),
        )
    else:
      self.conv2 = nn.Sequential(
          nn.BatchNorm1d(dim),
          nn.Conv1d(dim,dim,dilation*2-1,padding=(dilation*2-1)//2,groups=dim//chunk_size),
          nn.GELU(),
          nn.Conv1d(dim,dim,k_size,padding=(k_size//2)*dilation,groups=dim//chunk_size,dilation=dilation),
          nn.GELU(),
        )
    self.conv3 = nn.Sequential(
        nn.BatchNorm1d(dim),
        nn.Conv1d(dim,in_dim,1),
        nn.GELU(),
      )

  def forward(self, x):
    z1 = self.conv1(x)
    u, v = torch.chunk(z1,2,dim=1)
    z2 = u + self.conv2(v)
    output = self.conv3(z2) + x
    return output

In [11]:
class Model(nn.Module):
  def __init__(self, patch_size = 64,
            dim = 128,
            expansion_factor = 2,
            proj_depth = 4,
            pred_depth = 2,
            k_size = 9,
            dilation = 16,
            chunk_size = 8,):
    super().__init__()
    self.cnn_enc = nn.Sequential(
          nn.Conv1d(1,dim//4,5,padding=2),
          nn.GELU(),
          nn.MaxPool1d(4,4),
          nn.BatchNorm1d(dim//4),
          
          nn.Conv1d(dim//4,dim//2,5,padding=2,groups=4),
          nn.GELU(),
          nn.MaxPool1d(4,4),
          nn.BatchNorm1d(dim//2),
          
          nn.Conv1d(dim//2,dim,5,padding=2,groups=4),
          nn.GELU(),
          nn.MaxPool1d(4,4),
          nn.BatchNorm1d(dim),
        )

    self.enc = nn.Sequential(
              Rearrange('b c (l p) -> b (p c) l', p = patch_size),
              nn.Conv1d(patch_size, dim, 3, padding=1),
            )
    
    self.project_net = nn.Sequential(
              nn.Conv1d(dim*2, dim, 1),
              *[Block(dim, dim*expansion_factor, k_size, dilation, chunk_size,) for _ in range(proj_depth)],
            )
    
    self.predict_net = nn.Sequential(
              *[Block(dim, dim*expansion_factor, k_size, dilation, chunk_size,) for _ in range(pred_depth)],
            )
    
    self.dec = nn.Sequential(
              nn.Conv1d(dim, patch_size, 3, padding=1),
              Rearrange('b (p c) l -> b c (l p)', p = patch_size),
            )

  def forward(self, x):
    z = self.project(x)
    p = self.predict(z)
    output = self.decode(p)
    return output

  def project(self, x):
    h = torch.cat([self.enc(x),self.cnn_enc(x)],dim=1)
    z = self.project_net(h)
    return z

  def predict(self, z):
    p = self.predict_net(z)
    return p

  def decode(self, p):
    output = self.dec(p)
    return output

In [12]:
def D(p, z):
  z = z.detach()
  p = p/torch.norm(p,dim=1,keepdim=True)
  z = z/torch.norm(z,dim=1,keepdim=True)

  return -(p*z).sum(dim=1).mean()

In [13]:
patch_size = 64
dim = 128
expansion_factor = 2
proj_depth = 4
pred_depth = 2
k_size = 9
dilation = 16
chunk_size = 32
model = Model(patch_size,dim,expansion_factor,proj_depth,pred_depth,k_size,dilation,chunk_size).to(device)

In [14]:
sum(p.numel() for p in model.parameters())

2666368

In [15]:
test_speechs = np.load("../dataset/train_speechs.npy",allow_pickle=True)
test_noises = np.load("../dataset/train_noises.npy",allow_pickle=True)

In [22]:
test_speechs.shape

(4120,)

In [16]:
sisnr_fn = SI_SNR()

In [17]:
def test_noisy(clean,noisy):
  sisnr = sisnr_fn(noisy,clean).item()
  acc = {'pesq':0,'stoi':0,'sisnr':sisnr}
  for i in range(len(clean)):
    acc['pesq'] += pesq(clean[i,0].cpu(),noisy[i,0].cpu())
    acc['stoi'] += stoi(clean[i,0].cpu(),noisy[i,0].cpu(),16000)
  acc['pesq'] /= len(clean)
  acc['stoi'] /= len(clean)
  return acc

In [29]:
def test(model,clean,noisy,bz):
  
  bnum = len(clean)//bz
  acc = {'pesq':0,'stoi':0,'sisnr':0}
  for bidx in range(bnum):
    with torch.no_grad():
      preds = model(noisy[bidx*bz:(bidx+1)*bz])
      preds -= torch.mean(preds,dim=-1,keepdim=True)
      acc['sisnr'] += sisnr_fn(preds,clean[bidx*bz:(bidx+1)*bz]).item()
      
    for i in range(bz):
      acc['pesq'] += pesq(clean[bidx*bz+i,0].cpu(),preds[i,0].cpu())
      acc['stoi'] += stoi(clean[bidx*bz+i,0].cpu(),preds[i,0].cpu(),16000)
  acc['pesq'] /= len(clean)
  acc['stoi'] /= len(clean)
  acc['sisnr'] /= bnum
  return acc

In [19]:
model_names = ['Normal-few','SimSiam-few','BYOL-few']

speechs = test_speechs
noises = test_noises[random.choices(range(len(test_noises)), k=len(speechs))]

snrs = [-7.5 for _ in range(len(speechs))]
clean_sounds1, _, noisy_sounds1 = get_datas(speechs, noises, snrs, 16384*2)
clean_sounds1 = torch.tensor(clean_sounds1).to(device)
noisy_sounds1 = torch.tensor(noisy_sounds1).to(device)

snrs = [-2.5 for _ in range(len(speechs))]
clean_sounds2, _, noisy_sounds2 = get_datas(speechs, noises, snrs, 16384*2)
clean_sounds2 = torch.tensor(clean_sounds2).to(device)
noisy_sounds2 = torch.tensor(noisy_sounds2).to(device)

snrs = [2.5 for _ in range(len(speechs))]
clean_sounds3, _, noisy_sounds3 = get_datas(speechs, noises, snrs, 16384*2)
clean_sounds3 = torch.tensor(clean_sounds3).to(device)
noisy_sounds3 = torch.tensor(noisy_sounds3).to(device)

snrs = [7.5 for _ in range(len(speechs))]
clean_sounds4, _, noisy_sounds4 = get_datas(speechs, noises, snrs, 16384*2)
clean_sounds4 = torch.tensor(clean_sounds4).to(device)
noisy_sounds4 = torch.tensor(noisy_sounds4).to(device)

In [20]:
with open('./test_few_acc.csv', 'w', newline='') as csvfile:
  # 定義欄位
  heads = ['Model', 'PESQ:-7.5', 'PESQ:-2.5', 'PESQ:2.5', 'PESQ:7.5', 'PESQ', 'STOI:-7.5', 'STOI:-2.5', 'STOI:2.5', 'STOI:7.5', 'STOI', 'SI-SNR:-7.5', 'SI-SNR:-2.5', 'SI-SNR:2.5', 'SI-SNR:7.5', 'SI-SNR']

  writer = csv.writer(csvfile)
  writer.writerow(heads)

  acc1 = test_noisy(clean_sounds1,noisy_sounds1)
  acc2 = test_noisy(clean_sounds2,noisy_sounds2)
  acc3 = test_noisy(clean_sounds3,noisy_sounds3)
  acc4 = test_noisy(clean_sounds4,noisy_sounds4)

  writer.writerow(["Noisy",
          acc1['pesq'], acc2['pesq'], acc3['pesq'], acc4['pesq'], (acc1['pesq']+acc2['pesq']+acc3['pesq']+acc4['pesq'])/4,
          acc1['stoi'], acc2['stoi'], acc3['stoi'], acc4['stoi'], (acc1['stoi']+acc2['stoi']+acc3['stoi']+acc4['stoi'])/4,
          acc1['sisnr'], acc2['sisnr'], acc3['sisnr'], acc4['sisnr'], (acc1['sisnr']+acc2['sisnr']+acc3['sisnr']+acc4['sisnr'])/4
          ])

In [30]:
gc.collect()
torch.cuda.empty_cache()

In [None]:
with open('./test_few_acc.csv', 'a+', newline='') as csvfile:
  writer = csv.writer(csvfile)
  for name in model_names:
    print(name)
    checkpoint = torch.load("../{}/state.pt".format(name),map_location=torch.device(device))
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()

    acc1 = test(model,clean_sounds1,noisy_sounds1,412)
    acc2 = test(model,clean_sounds2,noisy_sounds2,412)
    acc3 = test(model,clean_sounds3,noisy_sounds3,412)
    acc4 = test(model,clean_sounds4,noisy_sounds4,412)

    writer.writerow([name,
            acc1['pesq'], acc2['pesq'], acc3['pesq'], acc4['pesq'], (acc1['pesq']+acc2['pesq']+acc3['pesq']+acc4['pesq'])/4,
            acc1['stoi'], acc2['stoi'], acc3['stoi'], acc4['stoi'], (acc1['stoi']+acc2['stoi']+acc3['stoi']+acc4['stoi'])/4,
            acc1['sisnr'], acc2['sisnr'], acc3['sisnr'], acc4['sisnr'], (acc1['sisnr']+acc2['sisnr']+acc3['sisnr']+acc4['sisnr'])/4
            ])

Normal-few
SimSiam-few
