<a href="https://colab.research.google.com/github/shuklas664/English_Yoruba_Transformer/blob/master/IB_Diarization_New.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install webrtcvad

In [None]:
import os
from os.path import isfile, isdir, join
from pathlib import Path
import math
import numpy as np
# import numpy as np
import random
# import math
from scipy.stats import multivariate_normal
from matplotlib import pyplot as plt
from scipy.spatial import distance


In [None]:
import collections
import contextlib
import sys
import wave
import webrtcvad


def read_wave(path):
    """Reads a .wav file.
    Takes the path, and returns (PCM audio data, sample rate).
    """
    with contextlib.closing(wave.open(path, 'rb')) as wf:
        num_channels = wf.getnchannels()
        assert num_channels == 1
        sample_width = wf.getsampwidth()
        assert sample_width == 2
        sample_rate = wf.getframerate()
        assert sample_rate in (8000, 16000, 32000, 48000)
        pcm_data = wf.readframes(wf.getnframes())
        return pcm_data, sample_rate


def write_wave(path, audio, sample_rate):
    """Writes a .wav file.
    Takes path, PCM audio data, and sample rate.
    """
    with contextlib.closing(wave.open(path, 'wb')) as wf:
        wf.setnchannels(1)
        wf.setsampwidth(2)
        wf.setframerate(sample_rate)
        wf.writeframes(audio)


class Frame(object):
    """Represents a "frame" of audio data."""
    def __init__(self, bytes, timestamp, duration):
        self.bytes = bytes
        self.timestamp = timestamp
        self.duration = duration


def frame_generator(frame_duration_ms, audio, sample_rate):
    """Generates audio frames from PCM audio data.
    Takes the desired frame duration in milliseconds, the PCM data, and
    the sample rate.
    Yields Frames of the requested duration.
    """
    n = int(sample_rate * (frame_duration_ms / 1000.0) * 2)
    offset = 0
    timestamp = 0.0
    duration = (float(n) / sample_rate) / 2.0
    while offset + n < len(audio):
        yield Frame(audio[offset:offset + n], timestamp, duration)
        timestamp += duration
        offset += n


def vad_collector(sample_rate, frame_duration_ms,
                  padding_duration_ms, vad, frames):
    """Filters out non-voiced audio frames.
    Given a webrtcvad.Vad and a source of audio frames, yields only
    the voiced audio.
    Uses a padded, sliding window algorithm over the audio frames.
    When more than 90% of the frames in the window are voiced (as
    reported by the VAD), the collector triggers and begins yielding
    audio frames. Then the collector waits until 90% of the frames in
    the window are unvoiced to detrigger.
    The window is padded at the front and back to provide a small
    amount of silence or the beginnings/endings of speech around the
    voiced frames.
    Arguments:
    sample_rate - The audio sample rate, in Hz.
    frame_duration_ms - The frame duration in milliseconds.
    padding_duration_ms - The amount to pad the window, in milliseconds.
    vad - An instance of webrtcvad.Vad.
    frames - a source of audio frames (sequence or generator).
    Returns: A generator that yields PCM audio data.
    """
    num_padding_frames = int(padding_duration_ms / frame_duration_ms)
    # We use a deque for our sliding window/ring buffer.
    ring_buffer = collections.deque(maxlen=num_padding_frames)
    # We have two states: TRIGGERED and NOTTRIGGERED. We start in the
    # NOTTRIGGERED state.
    triggered = False

    voiced_frames = []
    for frame in frames:
        is_speech = vad.is_speech(frame.bytes, sample_rate)

        sys.stdout.write('1' if is_speech else '0')
        if not triggered:
            ring_buffer.append((frame, is_speech))
            num_voiced = len([f for f, speech in ring_buffer if speech])
            # If we're NOTTRIGGERED and more than 90% of the frames in
            # the ring buffer are voiced frames, then enter the
            # TRIGGERED state.
            if num_voiced > 0.9 * ring_buffer.maxlen:
                triggered = True
                sys.stdout.write('+(%s)' % (ring_buffer[0][0].timestamp,))
                # We want to yield all the audio we see from now until
                # we are NOTTRIGGERED, but we have to start with the
                # audio that's already in the ring buffer.
                for f, s in ring_buffer:
                    voiced_frames.append(f)
                ring_buffer.clear()
        else:
            # We're in the TRIGGERED state, so collect the audio data
            # and add it to the ring buffer.
            voiced_frames.append(frame)
            ring_buffer.append((frame, is_speech))
            num_unvoiced = len([f for f, speech in ring_buffer if not speech])
            # If more than 90% of the frames in the ring buffer are
            # unvoiced, then enter NOTTRIGGERED and yield whatever
            # audio we've collected.
            if num_unvoiced > 0.9 * ring_buffer.maxlen:
                sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
                triggered = False
                yield b''.join([f.bytes for f in voiced_frames])
                ring_buffer.clear()
                voiced_frames = []
    if triggered:
        sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
    sys.stdout.write('\n')
    # If we have any leftover voiced audio when we run out of input,
    # yield it.
    if voiced_frames:
        yield b''.join([f.bytes for f in voiced_frames])


In [None]:
def main():
    # if len(args) != 2:
    #     sys.stderr.write(
    #         'Usage: silenceremove.py <aggressiveness> <path to wav file>\n')
    #     sys.exit(1)
    path = '/content/amicorpus/'
    dir_list = sorted(os.listdir(path))
    cnt = 0
    for d in dir_list:
      dir_name = join(path,d)
      if isdir(dir_name):
        filePath = join(path, d, 'audio', d+'.Mix-Headset.wav')
        try:
          audio, sample_rate = read_wave(filePath)
          vad = webrtcvad.Vad(int(1))
          frames = frame_generator(30, audio, sample_rate)
          frames = list(frames)
          segments = vad_collector(sample_rate, 30, 300, vad, frames)

          # Segmenting the Voice audio and save it in list as bytes
          concataudio = [segment for segment in segments]

          joinedaudio = b"".join(concataudio)
          writePath = join('/content/drive/My Drive/amicorpus_non_silence', d, 'audio')
          Path(writePath).mkdir(parents=True, exist_ok=True)
          write_wave(join(writePath, d+'.Mix-Headset.wav'), joinedaudio, sample_rate)
          cnt += 1
          # if(cnt == 2):
          #   break
        except:
          print("Skipping: ", filePath)
    print("Converted: ",cnt)
if __name__ == '__main__':
    main()

In [None]:
rm -rf amicorpus_non_silence/

In [None]:
!chmod 755 amiBuild-13720-Mon-Aug-31-2020.wget.sh

In [None]:
!ls -l '/content/drive/My Drive/amicorpus_non_silence' | wc -l

98


In [None]:
!./amiBuild-13720-Mon-Aug-31-2020.wget.sh

In [None]:
!tar -czvf filename.tar.gz amicorpus

In [None]:
!cp filename.tar.gz /content/drive/My\ Drive/amicorpus.tar.gz

In [None]:
!tar -xzvf /content/drive/My\ Drive/amicorpus.tar.gz

In [None]:
join('abc', 'def', 'ncl')

'abc/def/ncl'

In [None]:
d = 'audio'
d + '.Mix-Headset.wav'

'audio.Mix-Headset.wav'

In [None]:
!./ComputeFeatures mfcc.config /content/drive/My\ Drive/amicorpus_non_silence/ES2002b/audio/ES2002b.Mix-Headset.wav frameCepstrum+frameDeltaCepstrum sa1.mfcc 0.06 A

In [None]:
!chmod 755 ComputeFeatures

In [None]:
!chmod 755 mfcc.config

In [None]:
!pip install python_speech_features
from python_speech_features import mfcc

Collecting python_speech_features
  Downloading https://files.pythonhosted.org/packages/ff/d1/94c59e20a2631985fbd2124c45177abaa9e0a4eee8ba8a305aa26fc02a8e/python_speech_features-0.6.tar.gz
Building wheels for collected packages: python-speech-features
  Building wheel for python-speech-features (setup.py) ... [?25l[?25hdone
  Created wheel for python-speech-features: filename=python_speech_features-0.6-cp36-none-any.whl size=5887 sha256=88f523e21311e7a90c81be8fabffb617a3e7c8b57149ce586ff0a65bce641722
  Stored in directory: /root/.cache/pip/wheels/3c/42/7c/f60e9d1b40015cd69b213ad90f7c18a9264cd745b9888134be
Successfully built python-speech-features
Installing collected packages: python-speech-features
Successfully installed python-speech-features-0.6


In [None]:
import scipy.io.wavfile as wav

In [None]:
overlap = 0.01 #10 ms window shift
fullPath = '/content/drive/My Drive/amicorpus_non_silence/ES2002b/audio/ES2002b.Mix-Headset.wav'
(rate,sig) = wav.read(fullPath)
mfcc_feat = mfcc(sig, rate, numcep = 19, nfilt = 26, winlen=0.03, winstep=overlap)

In [None]:
n, d = mfcc_feat.shape

In [None]:
d

19

In [None]:
# overlap = 0.01 #10 ms window shift
init_cluster_time = 2500 #2.5sec
init_cluster_len = math.ceil(init_cluster_time/(overlap*1000))

In [None]:
num_of_clusters = math.ceil(n/init_cluster_len)

In [None]:
num_of_clusters

812

In [None]:
t = np.array_split(mfcc_feat, 3)

In [None]:
class GMM:
    def __init__(self, num_of_clusters):
        self.num_of_clusters = num_of_clusters
        self.log_likelihood =[]
        self.LL_diff = []
        # self.num_of_speakers = num_of_speakers

    def gaussian_prob(self, x, mean, sigma):
        d = x.shape[0]
        p = ((2*math.pi)**(-d/2))*(np.linalg.det(sigma)**(-0.5))*np.exp(-0.5*(x-mean).reshape(d,1).T.dot(np.linalg.inv(sigma)).dot((x-mean).reshape(d,1)))
        return p

    def k_means(self, X):
        n = X.shape[0]
        d = X.shape[1]
        itr = 0
        #self.centroid = np.zeros((self.num_of_clusters, d), dtype = 'float64')
        self.centroids = X[random.sample(range(n), self.num_of_clusters)]
        self.cluster_assigned = np.zeros(n, dtype = int)
        error = 0.0
        while True:
            print("Now at itr - ", itr)
            # print("Centroids - ", self.centroids)
            for i in range(n):
                f_vec = X[i]
                dist = np.sqrt(np.sum((f_vec-self.centroids)**2, 1))
                # print("Dist Shape is - ", dist.shape)
                self.cluster_assigned[i] = np.argmin(dist)
            new_error = np.sum(np.sqrt(np.sum((X - self.centroids[self.cluster_assigned])**2, 1)))
            if(itr>0):
                print("Error Difference is - ", np.abs(error-new_error))
            new_centroids = np.zeros((self.num_of_clusters, d), dtype = 'float64')
            count_of_elements = np.zeros(self.num_of_clusters, dtype = int)
            for i in range(n):
                c_ind = self.cluster_assigned[i]
                new_centroids[c_ind] += X[i]
                count_of_elements[c_ind] += 1
            new_centroids = new_centroids/count_of_elements[:,None]
            if np.abs(new_error-error)<10 or np.array_equal(self.centroids, new_centroids) or itr>=5:
                print("Breaking at itr - ", itr)
                break
            else:
                self.centroids = np.copy(new_centroids)
            itr += 1
            error = new_error

    def EM_GMM_INBUILT(self, X):
        N = X.shape[0]
        d = X.shape[1]
        from sklearn.mixture import GaussianMixture as GMM
        g = GMM(n_components=64, covariance_type = 'full', max_iter = 1)
        g.fit(X)
        print("Created")

    def EM_GMM(self, X):
        N = X.shape[0]
        d = X.shape[1]
        self.cov_mat = np.zeros((self.num_of_clusters, d, d), dtype = 'float64')
        self.gamma = np.zeros((N,self.num_of_clusters), dtype = 'float64')
        likelihood = np.zeros((N,self.num_of_clusters), dtype = 'float64')
        self.pi_prob = np.zeros(self.num_of_clusters, dtype = 'float64')
        self.Nk = np.zeros(self.num_of_clusters, dtype = 'float64')
        for k in range(self.num_of_clusters):
            indices = (np.argwhere(self.cluster_assigned==k)).ravel()
            X_k = X[indices]
            X_k_centered = X_k - self.centroids[k]
            self.Nk[k] = X_k.shape[0]
            # print("Xk ",X_k.shape)
            # print("Xkc ",X_k_centered.shape)
            # print("cov mat ",self.cov_mat[k])
            self.cov_mat[k] = (1/self.Nk[k])*(X_k_centered.T.dot(X_k_centered))
        # print(self.Nk)
        self.pi_prob = self.Nk/N
        print("EM Begins")
        itr = 1
        prev_log_likelihood = 0.0
        
        while True:
            #####################################
            ############   E Step   #############
            #####################################
            for k in range(self.num_of_clusters):
                #self.gamma[i,k] = self.gaussian_prob(X[i], self.centroids[k], self.cov_mat[k])
                self.cov_mat[k] += 1e-6*np.identity(d)
                likelihood[:,k] =  multivariate_normal.pdf(X, self.centroids[k], self.cov_mat[k]).ravel()
                # print("Done ", k)
            # log_likelihood = np.sum(np.sum((likelihood*self.pi_prob), axis = 1))

            # for i in range(N):
            #     print("Done ",i)
             
            self.gamma = likelihood*self.pi_prob
            self.gamma = self.gamma/(np.sum(self.gamma, axis = 1)[:,None])
            # print("E done")

            #####################################
            ############   M Step   #############
            #####################################
            self.Nk = np.sum(self.gamma, axis = 0)
            self.pi_prob = self.Nk/N
            for k in range(self.num_of_clusters):
                self.centroids[k] = (1/self.Nk[k])*np.sum((X*self.gamma[:,k][:,np.newaxis]), axis = 0)
                X_centered = X - self.centroids[k]
                self.cov_mat[k] = (1/self.Nk[k])*((X_centered*self.gamma[:,k][:,np.newaxis]).T.dot(X_centered))
            # print("M done")

            #####################################
            ########   Log Likelihood   #########
            #####################################
            new_log_likelihood = np.sum(np.log(np.sum((likelihood*self.pi_prob), axis = 1)))
            self.log_likelihood.append(new_log_likelihood)
            diff_LL = np.abs(new_log_likelihood-prev_log_likelihood)
            self.LL_diff.append(diff_LL)
            print("Itr = ", itr, " Current LL is - ",new_log_likelihood)
            print("Change In LL is - ",diff_LL)
            if(diff_LL<100 or itr>=10):
                print("EM Finished at iteration - ", itr)
                break
            itr += 1
            prev_log_likelihood = new_log_likelihood

In [None]:
ug = GMM(num_of_clusters)
ug.k_means(mfcc_feat)
ug.EM_GMM(mfcc_feat)

In [None]:
def fitUnimodal(C):
  means = []
  covMatrices = []
  for c in C:
    means.append(np.mean(c, axis = 0))
    covMatrices.append(np.cov(c.T))
  return means, covMatrices

In [None]:
def calc_prob(x, GaussianMeans, GaussianCovMatrices):
  p = 0.0
  D = x.shape[0]
  numOfClusters = len(GaussianMeans)
  for i in range(D):
    s = x[i]
    for k in range(numOfClusters):
    #self.gamma[i,k] = self.gaussian_prob(X[i], self.centroids[k], self.cov_mat[k])
      cov_matrix = 1e-6*np.identity(d) + GaussianCovMatrices[k]
      # cov_matrix = 
      p =  p + ug.pi_prob[k]*multivariate_normal.pdf(s, ug.centroids[k], cov_matrix)
  p = p/D
  return p

In [None]:
def calcYgivenX(x, GaussianMeans, GaussianCovMatrices, i):
  p = 0.0
  numOfClusters = len(GaussianMeans)
  w = 1.0/numOfClusters
  D = x.shape[0]
  probMat = np.zeros((D, num_of_clusters), dtype = float)
  for i in range(num_of_clusters):
    probMat[:,i] = multivariate_normal(x, GaussianMeans[i], GaussianCovMatrices[i])
  p = 0.0
  self.gamma = self.gamma/(np.sum(self.gamma, axis = 1)[:,None]) 
  return p

In [None]:
########################
##### IB Algorithm #####
########################

#Init Variables
N = num_of_clusters
C = np.array_split(mfcc_feat, num_of_clusters)
GaussianMeans, GaussianCovMatrices = fitUnimodal(C)
ClusterMapping = dict(zip(range(num_of_clusters), [[i] for i in range(num_of_clusters)]))
probC = []
for i in range(N):
  p = 0.0
  D = C[i].shape[0]
  for j in range(D):
    s = C[i][j]
    p += multivariate_normal.pdf(s, GaussianMeans[i], GaussianCovMatrices[i])
  p = p/D 
  probC.append(p)

In [None]:
probX = probC.copy()

In [None]:
probYgivenC = []
probCgivenX = []
for i in range(N):
  temp1 = []
  temp2 = []
  x = C[i]
  w = 1.0/num_of_clusters
  D = x.shape[0]
  probMat = np.zeros((D, num_of_clusters), dtype = float)
  for j in range(num_of_clusters):
    probMat[:,j] = multivariate_normal.pdf(x, GaussianMeans[j], GaussianCovMatrices[j]).ravel()
  probMat = probMat/(np.sum(probMat, axis = 1)[:,None])
  temp1 = np.mean(probMat, axis = 0)
  for j in range(N):
    # p = probMat[i,j]/(np.sum(probMat[i,j], axis = 1)[:,None])
    # p = calcYgivenX(x, GaussianMeans, GaussianCovMatrices, i)
    # temp1.append(p)
    if j == i:
      temp2.append(1.0)
    else:
      temp2.append(0.0)
    # print("Done2 ",j)
  probYgivenC.append(temp1)
  probCgivenX.append(temp2)
  if i%100 == 0:
    print("Done ",i)

# # prob_cond_y_c = np.zeros((N, N), dtype = float)
# # prob_cond_c_x = np.zeros((N, N), dtype = float)
# del_F = np.zeros((N, N), dtype = float)
# for i in range(N):
#   prob_c(i) = calc_prob(C[i], ug)
#   for j in range(N):
#     prob_cond_y_c[j][i] = calc_cond_prob(j, C[i], ug)
#     if(j == i):
#       prob_cond_c_x[j][i] = 1



#Main Algo


Done  0
Done  100
Done  200
Done  300
Done  400
Done  500
Done  600
Done  700
Done  800


In [None]:
beta = 10.0
del_F = np.zeros((N, N), dtype = float)
del_F[:,:] = np.inf
probXgivenC = ((np.array(probCgivenX)*np.array(probX)).T/probC).T
for i in range(N):
  for j in range(i+1, N): 
    temp1 = distance.jensenshannon(np.array(probYgivenC)[:,i], np.array(probYgivenC)[:,j]) 
    temp2 = distance.jensenshannon(probXgivenC[i], probXgivenC[j]) 
    dij = temp1 - (1/beta)*temp2
    del_F[i][j] = (probC[i] + probC[j])*dij
    # del_F[i][j] = cal_objective_diff(C[i], C[j])
  if i%100 == 0:
    print("Done ",i)

Done  0
Done  100
Done  200
Done  300
Done  400
Done  500
Done  600
Done  700
Done  800


In [None]:
import pickle
file_name = "probYgivenC.sav"
pickle.dump(probYgivenC, open(file_name, 'wb'))
file_name = "probCgivenX.sav"
pickle.dump(probCgivenX, open(file_name, 'wb'))
file_name = "del_F.sav"
pickle.dump(del_F, open(file_name, 'wb'))

In [None]:
# file_name = "probYgivenC.sav"
# probYgivenC = pickle.load(open(file_name, 'rb'))
# file_name = "probCgivenX.sav"
# # pickle.dump(probCgivenY, open(file_name, 'wb'))
# probCgivenX = pickle.load(open(file_name, 'rb'))

In [None]:
multivariate_normal.pdf(C[0][0], GaussianMeans[0], GaussianCovMatrices[0])

4.821832164906279e-30

In [None]:
#IB ALgo
N = num_of_clusters
# print("Yaha")
while num_of_clusters>4:
  # print("Here")
  i, j = np.argwhere(del_F == np.min(del_F)).ravel()
  probCr = probC[i] + probC[j]
  del_F[:,j] = np.inf
  # probC.pop(j)
  ClusterMapping[i] += ClusterMapping[j]
  probYgivenC[i] = (probYgivenC[i]*probC[i] + probYgivenC[j]*probC[j])/probCr
  probC[i] = probCr
  probCgivenX[i] = [0 for idx in probCgivenX[i]]
  for idx in ClusterMapping[i]:
    probCgivenX[i][idx] = 1
  probXgivenC = ((np.array(probCgivenX)*np.array(probX)).T/probC).T
  for idx in range(0, i):
    temp1 = distance.jensenshannon(np.array(probYgivenC)[:,idx], np.array(probYgivenC)[:,i]) 
    temp2 = distance.jensenshannon(probXgivenC[idx], probXgivenC[i]) 
    dij = temp1 - (1/beta)*temp2
    del_F[idx][i] = (probC[idx] + probC[i])*dij
  for idx in range(i+1, N): 
    temp1 = distance.jensenshannon(np.array(probYgivenC)[:,i], np.array(probYgivenC)[:,idx]) 
    temp2 = distance.jensenshannon(probXgivenC[i], probXgivenC[idx]) 
    dij = temp1 - (1/beta)*temp2
    del_F[i][idx] = (probC[i] + probC[idx])*dij
  num_of_clusters = num_of_clusters-1
  if num_of_clusters%100 == 0:
    print("Clusters Rem: ", num_of_clusters)


Clusters Rem:  800
Clusters Rem:  700
Clusters Rem:  600
Clusters Rem:  500
Clusters Rem:  400
Clusters Rem:  300
Clusters Rem:  200
Clusters Rem:  100


In [None]:
data_sav = []

False

In [None]:
ClusterMapping

{0: [0],
 1: [1],
 2: [2],
 3: [3],
 4: [4],
 5: [5],
 6: [6],
 7: [7, 177, 177, 238],
 8: [8],
 9: [9],
 10: [10],
 11: [11],
 12: [12],
 13: [13,
  142,
  246,
  144,
  655,
  730,
  144,
  655,
  730,
  178,
  657,
  781,
  730,
  447,
  166],
 14: [14, 161, 212, 216, 656, 216, 656, 656, 657, 680, 455, 227],
 15: [15],
 16: [16, 48, 680, 619, 361, 185, 653, 756, 361, 432, 432, 453, 20],
 17: [17],
 18: [18],
 19: [19,
  161,
  212,
  216,
  656,
  216,
  656,
  656,
  657,
  680,
  443,
  150,
  246,
  391,
  246,
  391,
  395,
  680,
  687,
  686,
  745],
 20: [20,
  27,
  56,
  404,
  404,
  404,
  440,
  183,
  212,
  216,
  656,
  56,
  404,
  404,
  404,
  440,
  183,
  212,
  216,
  656,
  404,
  440,
  440,
  687,
  56,
  404,
  404,
  404,
  440,
  183,
  212,
  216,
  656,
  404,
  440,
  440,
  687,
  404,
  440,
  440,
  687,
  620,
  799],
 21: [21],
 22: [22],
 23: [23],
 24: [24,
  35,
  406,
  217,
  498,
  653,
  35,
  396,
  781,
  781,
  781,
  781,
  781,
  781,
 

In [None]:
p.pop(1)
p.pop(3-1)

3

In [None]:
p

[0, 2, 4, 5]

In [None]:
p.insert(1, 13)

In [None]:
d = dict(zip(range(10),[[i] for i in range(10)]))

In [None]:
d[0].append(2)

In [None]:
d

{0: [0],
 1: [1],
 2: [2],
 3: [3],
 4: [4],
 5: [5],
 6: [6],
 7: [7],
 8: [8],
 9: [9]}

In [None]:
from scipy.stats import norm

In [None]:
m,c = norm.fit(mfcc_feat[0:100],d=19)

TypeError: ignored

In [None]:
data = norm.rvs(loc=0,scale=2,size=10, )

In [None]:
c.shape

()

In [None]:
mfcc_feat[0:100].shape

(100, 19)

In [None]:
m = np.mean(mfcc_feat[0:100], axis = 0)

In [None]:
c = np.cov(mfcc_feat[0:100].T)

In [None]:
m.shape

(19,)

In [None]:
a = 1
a +=2
a

3

In [None]:
a = [[-10,-3,4], [4,-104,-500]]

In [None]:
i, j = np.argwhere(a == np.min(a)).ravel()

In [None]:
a = [1,2,3] + [3,4,5]

In [None]:
a[0] = [9 for i in a[0]]

In [None]:
a

[[9, 9, 9], [4, -104, -500]]