In [1]:
import tensorflow as tf
import numpy as np  

2024-08-27 05:16:27.006184: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-27 05:16:29.002579: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-27 05:16:29.504269: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-27 05:16:33.474599: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
from keras.layers import Dense,Conv1D,Conv1DTranspose,PReLU,LayerNormalization
from keras import Model,Sequential

In [3]:
class Encoder(Model):
    def __init__(self,L:int,N:int)->None:
        super(Encoder,self).__init__()
        """
        N: number of basis signals
        L: Length of audio segment
        """
        self.L=L
        self.N=N
        self.conv1d_U=Conv1D(filters=self.N,kernel_size=self.L,strides=self.L//2,use_bias=False,activation='relu',data_format='channels_last')
    
    def call(self,x):
        return self.conv1d_U(x)


In [4]:
encoder=Encoder(10,50)

In [5]:
x=np.random.rand(1,10,1)

In [6]:
out=encoder(x)

I0000 00:00:1724735835.822838     927 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1724735844.575834     927 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1724735844.575899     927 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1724735844.587040     927 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1724735844.587120     927 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:0

In [7]:
out.shape

TensorShape([1, 1, 50])

In [8]:
class Decoder(Model):
    def __init__(self,L:int,N:int)->None:
        super(Decoder,self).__init__()
        self.L=L
        self.N=N
        self.conv1d_trans=Conv1DTranspose(filters=1,kernel_size=L,strides=L//2,use_bias=False,activation='relu',data_format='channels_last')
    
    def call(self,x):
        return self.conv1d_trans(x)


In [9]:
decoder=Decoder(10,50)

In [10]:
decoder_out=decoder(out)

In [11]:
decoder_out.shape

TensorShape([1, 10, 1])

In [12]:
class Conv1D_block(Model):
    def __init__(self,out_channels=512,kernel_size=3,dilation=1):
        super(Conv1D_block,self).__init__()
        self.out_channels=out_channels
        self.kernel_size=kernel_size
        self.dilation=dilation
        self.conv1x1=Conv1D(filters=self.out_channels,kernel_size=1,data_format='channels_last')
        self.PReLu1=PReLU()
        self.norm_1=LayerNormalization()
        self.pad=self.dilation*(self.kernel_size-1)
        self.dwconv=Conv1D(filters=self.out_channels,kernel_size=self.kernel_size,groups=self.out_channels,padding='same',dilation_rate=self.dilation)
        self.Sc_conv=Conv1D(filters=self.out_channels,kernel_size=1,use_bias=True)
    
    def call(self,x):
        c=self.conv1x1(x)
        c=self.PReLu1(c)
        c=self.norm_1(c)
        c=self.dwconv(c)
        c=self.Sc_conv(c)
        return x+c
    

In [32]:
class ConvTasnet(Model):
    def __init__(self,
                 N=512,
                 L=16,
                 B=128,
                 H=512,
                 P=3,
                 X=8,
                 R=3,
                 nspk=2):
        super(ConvTasnet,self).__init__()
        self.encoder=Encoder(L=L,N=N)
        self.layer_norm=LayerNormalization()
        self.bottle_neck=Conv1D(filters=B,kernel_size=1)
        self.seperation=self._sequential_repeat(R,X,out_channels=H,kernel_size=P)
        self.gen_mask=Conv1D(filters=N*nspk,kernel_size=1)
        self.decoder=Decoder(L,N)
        self.nspk=nspk

    def _sequential_block(self,num_blocks,**block_kwargs):
        conv1d_block_list=[
            Conv1D_block(**block_kwargs,dilation=2**i) for i in range(num_blocks)
        ]
        return Sequential(conv1d_block_list)
    
    def _sequential_repeat(self,num_repeat,num_block,**block_kwargs):
        repeat_list=[self._sequential_block(num_blocks=num_block,**block_kwargs) for _ in range(num_repeat)]
        return Sequential(repeat_list)

    def call(self,x):
        w=self.encoder(x)
        e=self.layer_norm(w)
        e=self.bottle_neck(e)
        e=self.seperation(e)
        m=self.gen_mask(e)
        m=tf.split(m,num_or_size_splits=self.nspk,axis=2)
        d = [w*m[i] for i in range(self.nspk)]
        s = [self.decoder(d[i]) for i in range(self.nspk)]
        return s
    

In [14]:
x=np.random.rand(2,1000,8)

In [15]:
encoder=Encoder(L=1000,N=250)

In [16]:
w=encoder(x)
w.shape

TensorShape([2, 1, 250])

In [17]:
ln=LayerNormalization()

In [18]:
e=ln(w)
e.shape 

TensorShape([2, 1, 250])

In [19]:
bn=Conv1D(filters=128,kernel_size=1)

In [20]:
e=bn(e)
e.shape

TensorShape([2, 1, 128])

In [21]:
c1d=Conv1D_block(out_channels=128)

In [22]:
e=c1d(e)
e.shape

I0000 00:00:1724735857.060321     927 service.cc:146] XLA service 0x9e37cd0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1724735857.060399     927 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce RTX 3050 Laptop GPU, Compute Capability 8.6
2024-08-27 05:17:37.677494: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907
I0000 00:00:1724735865.844911     927 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


TensorShape([2, 1, 128])

In [23]:
mask=Conv1D(filters=128*2,kernel_size=1)

In [24]:
m=mask(e)

In [25]:
m.shape

TensorShape([2, 1, 256])

In [26]:
splt=tf.split(m,num_or_size_splits=2,axis=2)

In [31]:
print(splt[0].shape)

(2, 1, 128)


In [34]:
d=[w*splt[i] for i in range(2)]

2024-08-27 05:25:49.696885: W tensorflow/core/framework/op_kernel.cc:1828] INVALID_ARGUMENT: required broadcastable shapes
2024-08-27 05:25:49.697000: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: INVALID_ARGUMENT: required broadcastable shapes


InvalidArgumentError: {{function_node __wrapped__Mul_device_/job:localhost/replica:0/task:0/device:GPU:0}} required broadcastable shapes [Op:Mul] name: 

In [35]:
w.shape,splt[0].shape

(TensorShape([2, 1, 250]), TensorShape([2, 1, 128]))