In [77]:
import numpy as np
import matplotlib.pyplot as plt
import librosa

In [2]:
from glob import glob
def readDir(dirname, Fs=16000):
    x = np.array([])
    path = dirname + "\*.wav"
    folder = glob(path)
    
    # Loaded all clips in a 1D array
    for file in folder:
        x = np.append(x,load_audio(file, Fs))
    
    # Resized Array to separate 10s clips
    L = x.size
    Nsamples = 10*Fs
    Nclips = int(L/(10*Fs))
    x = np.reshape(x,(Nclips,Nsamples))


    return x

In [3]:
def load_audio(filename, Fs = 16000):

    x, sr = librosa.load(filename, Fs)
    
    # Slicing it to make clips of 10s
    clips = int(len(x)/(10*Fs))
    
    return x[0:clips*10*Fs]

In [91]:
def create_Data(X, Y):
    '''
    Create mixed data from audio files

    output is dataset + labels

    x_out : N clips, each made from 2 different randomly selected sources

    y_out_bool = 2N labels, 2 from each clip of type [clip_index, music=1 | speech=0, onset, offset]
    '''
    N = X.shape[0]
    idx = np.random.permutation(N)

    x = X[idx]
    y_bool = Y[idx]    

    if N%2:
        N -= 1 # keeping N even for simplicity

    x_out = np.zeros((N,160000))
    y_out_bool = np.zeros((2*N, 4))
    y_out_label = np.array([[0, 'text',0,0]]*2*N, dtype=object)

    max_time = 4  # clips between 1-4 s
    times = (max_time-1)*np.round(np.random.random(2*N),3)
    times += 1

    starts = np.round(np.random.random(2*N),3)
    for i in range(N,2*N):
        starts[i] += 5

    ends = starts + times

    frame_s = (starts*16000).astype(int)
    frame_e = (ends*16000).astype(int)
    
    for i in range(N):
        x_out[i, frame_s[i]:frame_e[i]] = x[i, frame_s[i]:frame_e[i]]
        y_out_bool[2*i] = [i, y_bool[i,0], starts[i], ends[i]]

        x_out[(i+1)%N, frame_s[N+i]:frame_e[N+i]] = x[i, frame_s[N+i]:frame_e[N+i]]
        y_out_bool[2*((i+1)%N)+1] = [(i+1)%N, y_bool[i,0], starts[N+i], ends[N+i]]
    

    return x_out, y_out_bool
       
    

In [92]:
if __name__=="__main__":
    
    Fs = 16000
    
    # Read audio
    x_music = readDir('D:\Acads\EE603 MLSP\Assignment 1\music_try', Fs)    #change it as per your directory
    x_speech = readDir('D:\Acads\EE603 MLSP\Assignment 1\speech_try', Fs)  #change it as per your directory
    X = np.concatenate((x_music, x_speech))
    
    # Create labels
    y_music = np.array([[1,0]]*len(x_music))
    y_speech = np.array([[0,1]]*len(x_speech))
    Y = np.concatenate((y_music, y_speech))

    x, y = create_Data(X, Y)