In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import loadmat
from scipy import optimize
%matplotlib inline
from sklearn.model_selection import train_test_split

train = pd.read_csv('data/digit-recognizer/train.csv')
test = pd.read_csv('data/digit-recognizer/test.csv')

In [None]:
train.head()

## Data Preprocessing

In [None]:
y_train = train['label'].values
X_train = train.drop(columns=['label']).values/255
X_test = test.values/255

In [None]:
range_class = np.arange(10)

y = np.asfarray(train.iloc[:,0])
train_x = train.iloc[:,1:].values

train_x, test_x, train_y, test_y = train_test_split(train_x, y, test_size=0.2, random_state=42)

test_ch_y = np.array([(range_class==label).astype(np.float) for label in test_y])
train_ch_y = np.array([(range_class==label).astype(np.float) for label in train_y])

In [None]:
x_test = test.values

In [None]:
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder(categories=[range(10)])
y_ohc = ohe.fit_transform(y_train.reshape(-1, 1)).toarray()
y_ohc[:3], y_train[:3]

In [None]:
fig, axes = plt.subplots(2,5, figsize=(12,5))
axes = axes.flatten()
idx = np.random.randint(0,42000,size=10)
for i in range(10):
    axes[i].imshow(X_train[idx[i],:].reshape(28,28), cmap='gray')
    axes[i].axis('off') # hide the axes ticks
    axes[i].set_title(str(int(y_train[idx[i]])), color= 'black', fontsize=25)
plt.show()

In [None]:
print("----Traininig data----")
y_value=np.zeros((1,10))
for i in range (10):
    print("Occurance of ",i,"=",np.count_nonzero(y_train==i))
    y_value[0,i-1]= np.count_nonzero(y_train==i)

In [None]:
y_value=y_value.ravel()
x_value=[0,1,2,3,4,5,6,7,8,9]

plt.xlabel('label')
plt.ylabel('count')
plt.bar(x_value,y_value,0.7)

In [None]:
print ('Train X : %s' % str(train_x.shape))
print ('Train Y : %s\n' % str(train_y.shape))
print ('Test X : %s' % str(test_x.shape))
print ('Test Y : %s\n' % str(test_y.shape))
print ('m = %d training examples' % (train_y.shape[0]))
print ('m = %d test examples' % (test_x.shape[0]))

# Neural Network model

In [None]:
class NeuralNetwork(object):
    
    def __init__(self, input_layer ,output_layer, hidden_layer):
        self.input_size = input_layer # 28x28 pixels are flattened
        self.output_size = output_layer # 0-9 classes
        self.hidden_size = hidden_layer
        self.w1 = np.random.normal(0.0, self.input_size**-0.5, (self.input_size,self.hidden_size))
        self.w2 = np.random.normal(0.0, self.hidden_size**-0.5, (self.hidden_size,self.output_size))
        
    def sigmoid(self,x):
        return 1/(1+np.exp(-x))
    
    def softmax(self,x):
        expA = np.exp(x)
        return expA / expA.sum()
    
    def sigmoidPrime(self,x):
        return np.exp(-x)/((1+np.exp(-x))**2)
    
    def forward(self, X):
        self.z2 = np.dot(X, self.w1)
        self.a2 = self.sigmoid(self.z2)
        self.z3 = np.dot(self.a2, self.w2)
        y_hat = self.softmax(self.z3)
        return y_hat
    
    def cost(self, y_hat, y):
        m = y_hat.shape[1]
        cost = -1 / m * (np.dot(y.T, np.log(y_hat)) + np.dot((1 - y).T, np.log(1 - y_hat)))
        return np.squeeze(cost)
    
    def costFunction(self, X, y):
        #Compute cost for given X,y, use weights already stored in class.
        self.yHat = self.forward(X)
        J = 0.5*sum((y-self.yHat)**2)
        return J
    
    def backprop(self, X, y):
        self.y_hat = self.forward(X)
        
        delta3 = np.multiply(-(y-self.y_hat), self.sigmoidPrime(self.z3))
        dJdw2 = np.dot(self.a2.T, delta3)
        
        delta2 = np.dot(delta3, self.w2.T)*self.sigmoid(self.z2)
        dJdw1 = np.dot(X.T , delta2)
        return dJdw1, dJdw2   
    
    def train(self,X,y,y_train,iterations,alpha):
        

In [None]:
nn = NeuralNetwork(784,10,128)

In [None]:
y_hat = nn.forward(X_train)

In [None]:
y_hat.shape,y_train.shape,y_ohc.shape

In [None]:
#nn.train(X_train, y_ohc, y_train,10,0.01)