# Neural network based test script

This script is used to evaluate the test set and convert it inoto a csv formatted file

In [78]:
import numpy as np
import matplotlib.pyplot as plt
from numpy import log, dot, e
import librosa
from numpy.random import rand
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import pandas as pd
import os

In [29]:
def predict_framewise(model,x_test):
        '''
        Framewise classification (speech or music)
        Input:
            x_test: test set
        Output:
            y_pred_framewise = framewise prediction
        '''
        
        
        lis=[]   

        
        for i in range(0,len(x_test)):
            k=0
            
            temp =np.ones((1,2))
            #print(x_test[i].shape)

            feat_4 = model.model.predict(x_test[i])
                
                
            for j in range(0,len(feat_4)):   
                
                if(feat_4[j][0] > 0.5):
                    temp = np.concatenate( (temp, np.transpose(np.reshape([1,0] , (-1,1) ) ) ) ) 
                else:
                    temp = np.concatenate((temp, np.transpose(np.reshape([0,1] , (-1,1) ) ) ) ) 
                    
            #print(temp.shape)
                    
            temp = temp[1:]
            
                
            
            lis.append(temp)

        ### WRITE YOUR CODE HERE - 5 MARKS

        
        
        return lis

In [30]:
def predict_aggregate(y_pred_framewise):
        '''
        Aggregate frames to give a single class label (music or speech) to the entire audio file
        Input:
            y_pred_framewise = framewise prediction
        Output:
            y_hat = frame aggregate (one-hot vectors)
            
        '''
       

        
        y_hat= np.ones((1,2))
        for i in range (0,len(y_pred_framewise)):
            
            zero=0
            one=0
            t=y_pred_framewise[i]
            for j in range(0, len(t)):
                
                if(t[j][0] == 1):
                    one+=1
                else:
                    zero+=1
            if(one>zero):
                y_hat= np.concatenate((y_hat, np.transpose(np.reshape([1,0] , (-1,1) ) ) )) 
                
            else:
                y_hat= np.concatenate((y_hat, np.transpose(np.reshape([0,1] , (-1,1) ) ) ))  

        ### WRITE YOUR CODE HERE - 5 MARKS

        y_hat=y_hat[1:]
        return y_hat

In [31]:
from glob import glob 
def readDir(data, Fs = 16000):
    
    '''
    Each audio clip should be upto 10s long; split larger audio files into many clips (non-overlapping) 

    Use load_audio(file) 
    
    Inputs: 
        dirname: (str) directory name
        Fs: (int) sampling rate
    Output: 
        x: np arrays of shape (Nclips, Nsamples) Nsamples correspond to 10s length. Use zero-padding for shorter clips.
    '''  


    x= np.ones((1,60))
    
    
    
    
    
   
    

    n_fft = 1024
    hop_length = 512
    win_length = 1024

    temp =librosa.db_to_power(data)
    mel_spectrogram = librosa.feature.melspectrogram(y='none',S=temp, sr=16000)
    log_mel_spectrogram = librosa.power_to_db(mel_spectrogram)
        
    
    mfccs = librosa.feature.mfcc(y='none',S=log_mel_spectrogram, n_mfcc=21, sr=16000,hop_length = hop_length, win_length = win_length ,n_fft=n_fft,window='hann')
    mfccs= mfccs[1:]
        #print(mfccs.shape)
    delta_mfccs = librosa.feature.delta(mfccs)
    delta2_mfccs = librosa.feature.delta(mfccs, order=2)
    mfccs=np.concatenate((mfccs,delta_mfccs))
    mfccs=np.concatenate((mfccs,delta2_mfccs))
    mfccs=np.array(mfccs)
        
    mfccs=np.transpose(mfccs)
    x=np.concatenate((x,mfccs))
    
    #print(k)
    
    return x[1:]


    return x 

In [32]:
def load_audio(filename, Fs = 16000):
    '''
    Inputs: 
        filename: (str) filename
        Fs: (int) sampling rate
    Output: 
        x: 1D np array 
    '''
    

    ### WRITE YOUR CODE HERE - 2 MARKS

    x,sr=librosa.load(filename,sr=Fs)
    x=np.array(x)
    
    return x

In [5]:
def splitData(X, t, testFraction=0.2, randomize = False):
    """
    Split the data randomly into training and test sets
    Use numpy functions only
    Inputs:
        X: (np array of len Nclips) input feature vectors
        t: (np array of len Nclips) targets; one hot vectors
        testFraction: (float) Nclips_test = testFraction * Nclips
    Outputs:
        X_train: training set
        X_test: test set
        t_train: training labels
        t_test: test labels
    """


    ### WRITE YOUR CODE HERE - 5 MARKS

    
    test_samples = (int) (len(X)*testFraction )
    
    temp=np.arange(len(X))
    np.random.shuffle(temp)
    
    
    temp1 = temp[0: test_samples]
    temp2 = temp[test_samples: len(X)]
    
   
    
    X_test = np.reshape(X[temp1[0]],(-1,1))
    X_test = np.transpose(X_test)
    
    t_test = np.reshape(t[temp1[0]],(-1,1))
    t_test = np.transpose(t_test)
    for x in range(1,len(temp1)):
        i=temp1[x]
        a=np.reshape(X[i],(-1,1))
        a=np.transpose(a)
        b=np.reshape(t[i],(-1,1))
        b=np.transpose(b)
        X_test = np.concatenate((X_test,a))
        t_test = np.concatenate((t_test,b))
        
    X_train = np.reshape(X[temp2[0]],(-1,1))
    X_train = np.transpose(X_train)
    
    t_train = np.reshape(t[temp2[0]],(-1,1))
    t_train = np.transpose(t_train)
    for x in range(1,len(temp2)):
        i=temp2[x]
        a=np.reshape(X[i],(-1,1))
        a=np.transpose(a)
        b=np.reshape(t[i],(-1,1))
        b=np.transpose(b)
        X_train = np.concatenate((X_train,a))
        t_train = np.concatenate((t_train,b))
        
    return X_train, t_train, X_test, t_test

In [12]:
class Classifier: 
    '''
    Create a linear classifier to classify each frame
    '''
    def __init__(self):
        self.W=np.random.rand(20,)
        self.model=3
    
    def sigmoid(self, z): return 1 / (1 + e**(-z))
    
    

   
    
    
    

    def neural(self,x,y):

        

        model = keras.Sequential([
            
            
            tf.keras.layers.Dense(256, input_dim=x.shape[1], activation='relu'),
            keras.layers.Dense(units=192, activation='relu'),
            keras.layers.Dense(units=128, activation='relu'),
            keras.layers.Dense(units=64, activation='relu'),
            keras.layers.Dense(units=2, activation='softmax')
        ])
            
        #model.compile(optimizer='adam', 
              #loss=tf.losses.CategoricalCrossentropy(from_logits=True),
              #metrics=['accuracy'])
                
        optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)
        
        model.compile(optimizer='adam', loss=tf.losses.CategoricalCrossentropy(from_logits=True),metrics=['accuracy'])

        
        model.fit(x, y, epochs=50,batch_size=32)
        
        self.model=model
    

        
        
         

        
        return 
    
    def save_model(self, save_path):
        '''
        Save the trained model on local disk
        Input:
            save_path: location at which model is to be saved
        Output:
            None
            
        '''
        ## Assuming save_path contains the file name too. If save_path contains only directory
        ## name , uncomment the below line to save file as data.npy
        
        #save_path = save_path +'/data'
        
        data=self.W
        np.save(save_path, data)
        
        ### WRITE YOUR CODE HERE - 0 MARKS
            
        return
    
    def load_model(self, load_path):
        '''
        Save the trained model on local disk
        Input:
            load_path: location from which model is to be loaded
        Output:
            None
        '''
        
        
        ### WRITE YOUR CODE HERE - 0 MARKS
        
        ## Assuming load_path also contains the name of file which has to be loaded. If load_path only contains the
        ## directory name, uncomment the below line and replace data.npy with file name.
        
        #load_path = load_path +'/data.npy'
        
        self.W = np.load(load_path)
            
        return


    
    def predict_framewise(self,x_test):
        '''
        Framewise classification (speech or music)
        Input:
            x_test: test set
        Output:
            y_pred_framewise = framewise prediction
        '''
        
        
            

        
        for i in range(0,len(x_test)):
            k=0
            
            temp =np.ones((1,2))
            

            feat_4 = self.model.predict(x_test[i])
                
                
            for j in range(0,len(feat_4)):   
                
                if(feat_4[j][0] > 0.5):
                    temp = np.concatenate( (temp, np.transpose(np.reshape([1,0] , (-1,1) ) ) ) ) 
                else:
                    temp = np.concatenate((temp, np.transpose(np.reshape([0,1] , (-1,1) ) ) ) ) 
                    
            #print(temp.shape)
                    
            temp = temp[1:]
            
                
            
            lis.append(temp)

        ### WRITE YOUR CODE HERE - 5 MARKS

        
        
        return y_pred_framewise 
    
    def predict_aggregate(self,y_pred_framewise):
        '''
        Aggregate frames to give a single class label (music or speech) to the entire audio file
        Input:
            y_pred_framewise = framewise prediction
        Output:
            y_hat = frame aggregate (one-hot vectors)
            
        '''
        if(len(y_pred_framewise.shape) ==2 ):
            y_pred_framewise = np.reshape(y_pred_framewise, (1,2,len(y_pred_framewise[0])))        

        
        y_hat= np.ones((1,2))
        for i in range (0,len(y_pred_framewise)):
            
            zero=0
            one=0
            #t=np.transpose(y_pred_framewise[i])
            for j in range(0, len(t)):
                
                if(t[j][0] == 1):
                    one+=1
                else:
                    zero+=1
            if(one>zero):
                y_hat= np.concatenate((y_hat, np.transpose(np.reshape([1,0] , (-1,1) ) ) )) 
                
            else:
                y_hat= np.concatenate((y_hat, np.transpose(np.reshape([0,1] , (-1,1) ) ) ))  

        ### WRITE YOUR CODE HERE - 5 MARKS

        y_hat=y_hat[1:]
        return y_hat

In [14]:
model1=Classifier()
model1.model=tf.keras.models.load_model('C:/Users/HP/Documents/test/models/model_wrong/')
model2=Classifier()
model2.model=tf.keras.models.load_model('C:/Users/HP/Documents/test/models/model_wrong_2/')

In [99]:
a=np.random.rand(1,2)
t=['rough']
task1 = pd.DataFrame(a,columns = ['start time', 'end time'])
#adding another column
task1['label'] = t
task1['file']=['rough']
#print(task1)
task2 = pd.DataFrame(np.array([[1,0]]),columns = ['music', 'speech'])
#adding another column
task2['file']=['rough']
print(task2)

   music  speech   file
0      1       0  rough


In [102]:
def fun(data,task1,task2,fname):
    #audio vs Silence

  
    pred=model1.model.predict(data)

    for i in range(0, len(pred)):
        if(pred[i][0]>0.5):
            pred[i][0]=1
            pred[i][1]=0
        else:
            pred[i][0]=0
            pred[i][1]=1
    p=pred 
    
    
    #Time label prediction

    k=0
    j=0
    sil=np.array([[0.0,0.0],[0.0,0.0],[0.0,0.0],[0.0,0.0],[0.0,0.0],[0.0,0.0],[0.0,0.0],[0.0,0.0]])
    sp=np.array([[0.0,0.0],[0.0,0.0],[0.0,0.0],[0.0,0.0],[0.0,0.0],[0.0,0.0],[0.0,0.0],[0.0,0.0]])

    def check(x):
        sum=0
        for i in range(0,len(x)):
            sum=sum+x[i][0]
        if(sum>=11):
            return 1
        else:
            return 0



    prev=1

    for i in range(0,len(p)-17):
        x=p[i:i+16]
        a=check(x)
        #print(a)
        if(a==1):
            if(prev==1):
                sil[j][1]=librosa.frames_to_time(i+16, sr=16000, hop_length=512, n_fft=1024)
            else:
                temp=i
                while(p[i][0]!=1):
                    i+=1
                sp[k][1]=librosa.frames_to_time(i, sr=16000, hop_length=512, n_fft=1024)
                sil[j][0]=librosa.frames_to_time(i, sr=16000, hop_length=512, n_fft=1024)
                sil[j][1]=librosa.frames_to_time(temp+16, sr=16000, hop_length=512, n_fft=1024)
                k+=1


        else:
            if(prev==1):
                temp=i
                while(p[i][1]!=1):
                    i+=1
                sil[j][1]=librosa.frames_to_time(i, sr=16000, hop_length=512, n_fft=1024)
                sp[k][0]=librosa.frames_to_time(i, sr=16000, hop_length=512, n_fft=1024)
                sp[k][1]=librosa.frames_to_time(temp+16, sr=16000, hop_length=512, n_fft=1024)
                j+=1

            else:
                sp[k][1]=librosa.frames_to_time(i+16, sr=16000, hop_length=512, n_fft=1024)
        prev=a      


    sp  
    
    
    #Extracting audio portions

    lis=[]
    for i in range(len(sp)):
        if(sp[i][0]==0 and sp[i][1]==0):
            continue
        else:
            lis.append(np.array([sp[i][0],sp[i][1]]))
    sp=np.array(lis)


    lis=[]
    for i in range(0,len(sp)):
        start= librosa.time_to_frames(sp[i][0], sr=16000, hop_length=512, n_fft=1024)
        end = librosa.time_to_frames(sp[i][1], sr=16000, hop_length=512, n_fft=1024)
        #print(start,end)
        lis.append(np.array(data[start:end]))
        
        
    #Frame labels
    
    frame_pred= predict_framewise(model2,lis)
    agg=predict_aggregate(frame_pred)  

    #for i in range(len(sp)):
        #print(sp[i][0],sp[i][1], 'speech' if (agg[i][0]==1) else  'music')
    
    xxx=[]
    for i in range(len(sp)):
        xxx.append(fname)
    xxx=np.array(xxx)
    speech=0
    music=0
    label = []
    for i in range(len(sp)):
        label.append( 'speech' if (agg[i][0]==1) else  'music')
        if (agg[i][0]==1):
            speech=1
        if (agg[i][0]==0):
            music=1
    label=np.array(label)
    
    tag=np.array([[music,speech]])
    new = pd.DataFrame(sp, columns = ['start time', 'end time'])
    new['label'] = label
    new['file']=xxx
    
    new_task2 = pd.DataFrame(tag, columns = ['music', 'speech'])
    new_task2['file']=[fname]
    task1= task1.append(new)
    task2=task2.append(new_task2)
    return [task1,task2]

In [103]:
files = glob('C:/Users/HP/Documents/test/unknown/' + '/*.npy')
for f in files:
    data = np.load(f)
    data = readDir(data)
    fname= os.path.basename(f)
    ans= fun(data,task1,task2,fname)
    task1=ans[0]
    task2=ans[1]

In [104]:
task1.to_csv('C:/Users/HP/Documents/test/task1.csv')

In [105]:
task2.to_csv('C:/Users/HP/Documents/test/task2.csv')