In [1]:
import numpy as np
import pandas as pd
import math
from sklearn.model_selection import train_test_split

In [2]:
class DeepNeuralNetwork:
    L = 3
    W = None
    b = None
    n_output_neurons = 0
    def __init__(self, n_input_features, n_output_neurons, n_hidden_layers):
        self.n_output_neurons = n_output_neurons
        self.L = len(n_hidden_layers) + 1
        self.W = list()
        self.b = list()
        n_hidden_layers.append(n_output_neurons)
        n = [n_input_features]+n_hidden_layers
        for i in range(0,self.L):
            self.W.append(np.random.rand(n[i],n[i+1]))
            self.b.append(np.random.rand(1,n[i+1]))
            
    def g(self, z):
        sig = 1.0/(1.0+np.exp(-np.float64(z)))
        s = sig.shape
        if s:
            for i in range(0,sig.shape[0]):
                for j in range(0,sig.shape[1]):
                    if sig[i][j] >= 1.0:
                        sig[i][j] = 0.99999999999999994
        else:
            if sig >= 1.0:
                sig = 0.99999999999999994
        return sig
    
    def softmax(self,a):
        o_list = list()
        for i in range(0,a.shape[0]):
            a_sum = 0.0
            y_list = list()
            for j in range(0,a.shape[1]):
                a_sum += np.exp(a[i][j])
            for j in range(0,a.shape[1]):
                s = np.exp(a[i][j])/a_sum
                y_list.append(s)
            o_list.append(y_list)
        return np.array(o_list)
    
    def compute_cost(self,X,Y):
        h = X
        for i in range(0,self.L-1):
            a = np.dot(h, self.W[i]) + self.b[i]
            h = self.g(a)
            
        a = np.dot(h, self.W[self.L-1]) + self.b[self.L-1]
        O = self.softmax(a)
        total_cost = 0.0
        y = Y.values
        m = Y.shape[0]
        for i in range(0,m):
            pred = O[i][y[i][0]]
            total_cost += -1*np.log(pred)
        total_cost = total_cost/float(m)
        return total_cost
    
    def propagate(self,X,Y):
        h = [None]*(self.L)
        
        #initialize h0 to X
        h[0] = X
        
        #forward propagation
        for i in range(0,self.L-1):
            a = np.dot(h[i], self.W[i]) + self.b[i]
            h[i+1] = self.g(a)
        
        a = np.dot(h[self.L-1], self.W[self.L-1]) + self.b[self.L-1]
        O = self.softmax(a)
        
        one_hot_y = np.zeros((1,self.n_output_neurons))
        np.put(one_hot_y, Y[0][0],1)
    
        #back propagation
        dW = list()
        db = list()
        dLda = -1*(one_hot_y - O) 
        for i in range(self.L-1, -1,-1):
            
            dLdW = np.dot(h[i].T,dLda)
            dLdb = dLda
            dW = [dLdW]+dW
            db = [dLdb]+db
            
            dLdh = np.dot(dLda,self.W[i].T)
            dLda = np.multiply(dLdh,np.multiply(h[i],(1.0-h[i])))
            
        return (dW,db)
            
    def fit(self,X,Y):
        for e in range(0,100):
            dW = list()
            db = list()
            m_w = list()
            m_b = list()
            v_w = list()
            v_b = list()
            for j in range(0,self.L):
                dW.append(np.zeros(self.W[j].shape))
                m_w.append(np.zeros(self.W[j].shape))
                v_w.append(np.zeros(self.W[j].shape))
                db.append(np.zeros(self.b[j].shape))
                m_b.append(np.zeros(self.b[j].shape))
                v_b.append(np.zeros(self.b[j].shape))
                
            i = 0
            alpha = 0.01
            eps, beta1, beta2 = 1e-8,0.9,0.999
            for index,row in X.iterrows():
                x = row.values.reshape((1,X.shape[1]))
                y = Y.iloc[i].values.reshape((1,1))
                dLdW,dLdb = self.propagate(x,y)
                for j in range(0,self.L):
                    dW[j] += dLdW[j]
                    db[j] += dLdb[j]
                if (i+1)%2000==0:
                    for j in range(0,self.L):
                        m_w[j] = beta1*m_w[j] + (1-beta1)*dW[j]
                        m_b[j] = beta1*m_b[j] + (1-beta1)*db[j]
                        v_w[j] = beta2*v_w[j] + (1-beta2)*np.square(dW[j])
                        v_b[j] = beta2*v_b[j] + (1-beta2)*np.square(db[j])
                        bi1 = math.pow(beta1,e+1)
                        bi2 = math.pow(beta2,e+1)
                        m_w[j] = m_w[j]/(1-bi1)
                        m_b[j] = m_b[j]/(1-bi1)
                        v_w[j] = v_w[j]/(1-bi2)
                        v_b[j] = v_b[j]/(1-bi2)
                        self.W[j] = self.W[j] - np.multiply((alpha/np.sqrt(v_w[j]+eps)), m_w[j])
                        self.b[j] = self.b[j] - np.multiply((alpha/np.sqrt(v_b[j]+eps)), m_b[j])
                i+=1  
                
            print 'Cost after '+str(e+1)+' epochs : '+str(self.compute_cost(X,Y))
                
    def predict(self,X):
        h = X
        m = len(X)
        for i in range(0,self.L-1):
            a = np.dot(h, self.W[i]) + self.b[i]
            h = self.g(a)
        a = np.dot(h, self.W[self.L-1]) + self.b[self.L-1]
        O = self.softmax(a)
        pred = list()
        for i in range(0,O.shape[0]):
            max_val = 0.0
            max_index = 0
            print O[i]
            for j in range(0,O.shape[1]):
                if O[i][j] > max_val:
                    max_val = O[i][j]
                    max_index = j
            pred.append(max_index)
        return pred
        
        
    def compute_accuracy(self, y_pred, y_actual):
        y_pred_list = y_pred
        y_actual_list = list(y_actual.values)
        hits = 0
        for i in range(0,len(y_pred_list)):
            if y_pred_list[i] == y_actual_list[i]:
                hits += 1
        return float(hits)/float(len(y_pred_list))

In [3]:
data = pd.read_csv('apparel_train.csv')

In [4]:
data.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
input_cols = list()
for i in range(1,783):
    input_cols.append('pixel'+str(i))
X_train, X_test, y_train, y_test = train_test_split(
    data[input_cols],
    data[['label']],
    test_size=0.2,
    random_state=0)

for col in X_train:
    mean = X_train[col].mean()
    std = X_train[col].std()
    X_train[col] = (X_train[col] - mean)/std
    X_test[col] = (X_test[col]-mean)/std
X_train.head()

Unnamed: 0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,pixel10,...,pixel773,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782
5778,-0.010252,-0.022348,-0.028649,-0.040263,-0.056475,-0.070029,-0.098771,-0.156892,-0.238681,-0.377195,...,-0.728532,-0.702415,-0.59892,-0.476158,-0.395034,-0.406556,-0.439348,-0.396319,-0.288121,-0.158791
5287,-0.010252,-0.022348,-0.028649,-0.040263,-0.056475,-0.070029,-0.098771,-0.156892,-0.238681,-0.351013,...,-0.684884,-0.702415,-0.59892,-0.476158,-0.395034,-0.406556,-0.439348,-0.396319,-0.288121,-0.158791
57167,-0.010252,-0.022348,-0.028649,-0.040263,-0.056475,-0.070029,0.021215,-0.017662,-0.197,-0.377195,...,1.948562,1.92596,2.395477,3.108967,-0.015538,-0.406556,-0.343004,-0.396319,-0.288121,-0.158791
29516,-0.010252,-0.022348,-0.028649,-0.040263,-0.056475,-0.070029,-0.098771,0.887336,4.221257,-0.377195,...,0.9883,0.634562,0.39341,0.05649,-0.395034,-0.406556,-0.420079,-0.396319,-0.288121,-0.158791
58610,-0.010252,-0.022348,-0.028649,-0.040263,-0.056475,-0.070029,-0.098771,-0.156892,-0.238681,0.277353,...,1.846716,1.758838,1.699106,-0.476158,-0.395034,-0.406556,-0.439348,-0.396319,-0.288121,-0.158791


In [6]:
dnn = DeepNeuralNetwork(782,10,[3,3])
dnn.fit(X_train,y_train)

Cost after 1 epochs : 2.3996387437893705
Cost after 2 epochs : 2.387112171005592
Cost after 3 epochs : 2.377678582771072
Cost after 4 epochs : 2.369503254355253
Cost after 5 epochs : 2.3624254328158023
Cost after 6 epochs : 2.355860836460422
Cost after 7 epochs : 2.349926767375165
Cost after 8 epochs : 2.3443674536669072
Cost after 9 epochs : 2.339159155009215
Cost after 10 epochs : 2.3340416502744064
Cost after 11 epochs : 2.329372488111073
Cost after 12 epochs : 2.324908735706197
Cost after 13 epochs : 2.3206261029804773
Cost after 14 epochs : 2.316503594828022
Cost after 15 epochs : 2.312598620325462
Cost after 16 epochs : 2.308886202649392
Cost after 17 epochs : 2.305240136520468
Cost after 18 epochs : 2.301684850126872
Cost after 19 epochs : 2.298246025904123
Cost after 20 epochs : 2.2949191520156917
Cost after 21 epochs : 2.291869299017593
Cost after 22 epochs : 2.288788150240907
Cost after 23 epochs : 2.286015051385496
Cost after 24 epochs : 2.2832455841548516
Cost after 25 epoc

In [8]:
def predict(dnn,X):
    y_pred = list()
    for index,row in X.iterrows():
        h = row
        for i in range(0,dnn.L-1):
            a = np.dot(h,dnn.W[i]) + dnn.b[i]
            h = dnn.g(a)
        a = np.dot(h,dnn.W[dnn.L-1]) + dnn.b[dnn.L-1]
#         print a
        O = dnn.softmax(a)
        max_val = 0.0
        max_index = 0
        for i in range(0,O.shape[1]):
            if O[0][i] > max_val:
                max_val = O[0][i]
                max_index = i
        y_pred.append(max_index)
    return y_pred
        
print predict(dnn,X_train)

[1, 8, 8, 1, 1, 8, 8, 8, 1, 8, 8, 1, 8, 8, 1, 1, 1, 8, 8, 1, 8, 1, 8, 8, 8, 8, 1, 8, 1, 8, 1, 8, 8, 8, 1, 8, 1, 8, 1, 1, 1, 8, 8, 1, 1, 1, 1, 1, 8, 8, 8, 1, 8, 8, 1, 1, 8, 1, 8, 8, 1, 8, 1, 8, 8, 1, 8, 8, 8, 8, 1, 1, 8, 1, 8, 1, 8, 8, 1, 1, 0, 8, 1, 8, 1, 1, 8, 8, 8, 8, 1, 1, 1, 8, 8, 1, 8, 8, 1, 8, 1, 1, 1, 1, 8, 8, 1, 1, 1, 8, 1, 8, 8, 8, 8, 8, 1, 8, 1, 1, 8, 1, 1, 1, 1, 1, 8, 1, 1, 8, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 8, 1, 1, 1, 8, 8, 1, 8, 8, 8, 8, 1, 0, 8, 1, 1, 1, 1, 1, 1, 8, 8, 1, 0, 8, 8, 8, 8, 8, 1, 1, 1, 1, 8, 1, 8, 1, 1, 8, 1, 8, 1, 1, 1, 8, 1, 1, 1, 1, 8, 1, 1, 8, 8, 8, 1, 1, 0, 1, 8, 1, 0, 1, 1, 1, 8, 1, 1, 1, 1, 1, 0, 8, 8, 1, 1, 8, 1, 8, 1, 8, 1, 1, 1, 1, 1, 8, 8, 8, 8, 1, 8, 8, 1, 1, 1, 1, 8, 8, 8, 1, 8, 8, 8, 8, 8, 1, 1, 1, 8, 1, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1, 8, 8, 1, 8, 8, 1, 8, 1, 8, 8, 8, 1, 8, 1, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 8, 1, 1, 8, 8, 1, 8, 8, 1, 1, 8, 8, 1, 1, 1, 1, 8, 8, 1, 1, 8, 1, 1, 1, 1, 1, 8, 1, 8, 1, 1, 8, 1, 0, 1, 1, 1, 1, 1, 

In [None]:
print y_train['label']