In [30]:
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import librosa
import skimage.measure
import librosa.display

In [19]:
def mnist_plca(trainX,z):
    t = trainX.shape[0]
    f = trainX.shape[1]
    V = trainX.T
    p_z = np.random.rand(z,t)
    p_f_z = np.random.rand(f,z)
    q_z_f = np.zeros((z,f,t))
    iters = 30
    epochs=10
    eps = 0.00001
    for k in range(epochs):
        for i in range(iters):
            q_sum = np.zeros((f,t))
            for j in range(z):
                q_sum = q_sum + np.dot(np.reshape(p_f_z[:,j],(len(p_f_z[:,j]),1)),np.reshape(p_z[j,:],(len(p_z[j,:]),1)).T)
            for j in range(z):
                q_z_f[j] = np.dot(np.reshape(p_f_z[:,j],(len(p_f_z[:,j]),1)),np.reshape(p_z[j,:],(len(p_z[j,:]),1)).T)/(q_sum+eps)
            p_sum = np.zeros((1,t))
            for j in range(z):
                p_sum = p_sum + np.sum(V*q_z_f[j],axis=0)
            for j in range(z):
                p_z[j] = np.sum(V*q_z_f[j],axis=0)/(p_sum+eps)
            f_sum = np.zeros((1,z))
            for j in range(f):
                f_sum = f_sum + np.dot(np.reshape(V[j],(len(V[j]),1)).T,q_z_f[:,j,:].T)
            for j in range(f):
                p_f_z[j] = np.dot(np.reshape(V[j],(len(V[j]),1)).T,q_z_f[:,j,:].T)/(f_sum+eps)
        print(np.sum(V*np.log10(np.dot(p_f_z,p_z)+eps)))
    return p_f_z

In [58]:
def preprocessing(files):
    X = np.zeros((1003*16, 1025))
    _,sr = librosa.load(files[0])
    N = 10*sr
    H = int(0.01*sr)
    W = int(0.025*sr)
    pos = 0
    for i in range(11,15):
        audio_Close, sr = librosa.load(files[i])
        D = np.abs(librosa.stft(audio_Close[:N], hop_length=H, window=W))
        X[pos*1003:(pos+1)*1003, :] = D.T
        pos = pos+1
    for i in range(21, 25):
        audio_Close, sr = librosa.load(files[i])
        D = np.abs(librosa.stft(audio_Close[:N], hop_length=H, window=W))
        X[pos*1003:(pos+1)*1003, :] = D.T
        pos = pos+1
    for i in range(31, 35):
        audio_Close, sr = librosa.load(files[i])
        D = np.abs(librosa.stft(audio_Close[:N], hop_length=H, window=W))
        X[pos*1003:(pos+1)*1003, :] = D.T
        pos = pos+1
    for i in range(41,45):
        audio_Close, sr = librosa.load(files[i])
        D = np.abs(librosa.stft(audio_Close[:N], hop_length=H, window=W))
        X[pos*1003:(pos+1)*1003, :] = D.T
        pos = pos+1
    return X

In [59]:
def preprocessing_test(files):
    X = np.zeros((1003*4, 1025))
    y = np.zeros(4)
    _, sr = librosa.load(files[0])
    N = 10*sr
    H = int(0.01*sr)
    W = int(0.025*sr)
    pos = 0
    for i in range(46,47):
        audio_Close, sr = librosa.load(files[i])
        D = np.abs(librosa.stft(audio_Close[:N], hop_length=H, window=W))
        X[pos*1003:(pos+1)*1003, :] = D.T
        pos = pos+1
    y[0]=3
    for i in range(36, 37):
        audio_Close, sr = librosa.load(files[i])
        D = np.abs(librosa.stft(audio_Close[:N], hop_length=H, window=W))
        X[pos*1003:(pos+1)*1003, :] = D.T
        pos = pos+1
    y[1]=2
    for i in range(26, 27):
        audio_Close, sr = librosa.load(files[i])
        D = np.abs(librosa.stft(audio_Close[:N], hop_length=H, window=W))
        X[pos*1003:(pos+1)*1003, :] = D.T
        pos = pos+1
    y[2]=1
    for i in range(16,17):
        audio_Close, sr = librosa.load(files[i])
        D = np.abs(librosa.stft(audio_Close[:N], hop_length=H, window=W))
        X[pos*1003:(pos+1)*1003, :] = D.T
        pos = pos+1
    y[3]=0
    return X, y

In [62]:
def plca_classify(X,X_test,y_test,z):
    V = X.T
    t,f = np.shape(X)
    num_classes = 4
    Pfz = np.zeros((f, z*num_classes))
    for i in range(num_classes):
        Pfz[:, i*z : (i+1)*z] = mnist_plca(X[i*1003*4:(i+1)*1003*4], z)
        print("plca done for class "+str(i+1))
    
    t1, f = np.shape(X_test)
    t_ex = y_test.shape[0]
    Pzt = np.linalg.pinv(Pfz).dot(X_test.T)
    H = np.zeros((4,z,t1))
    for i in range(4):
        H[i] = Pzt[(z*i):(z*(i+1)),:]
    pred = []
    e_min = np.zeros((t1,1))
    index = (-1.0)*np.ones((t1,1))
    for i in range(t1):
        for j in range(4):
            e = np.sum(H[j,:,i])
            if(e_min[i][0]<e):
                e_min[i][0]=e
                index[i][0]=j
    y_pred=[]
    pos=0
    for i in range(t_ex):
        values=np.zeros((1,4))
        for j in range(1003):
            values[0][int(index[pos][0])]+=1
            pos+=1
        predict = np.argmax(values,axis=1)
        y_pred.append(predict[0])
    print(y_pred,y_test)
    score = 0
    CM = np.zeros((4,4))
    for i in range(t_ex):
        CM[int(y_test[i])][int(y_pred[i])]+=1
        if(y_pred[i]==y_test[i]):
            score+=1
    print("accuracy = ",score/t_ex)
    print(" ")
    print("confusion matrix")
    print(CM)

In [None]:
files = librosa.util.find_files("./170333", ext='wav')
X = preprocessing(files)
print("train imported")
X_test, y_test = preprocessing_test(files)
print("test imported")
plca_classify(X, X_test, y_test, 2)