In [63]:
import numpy as np
import struct
import sys

def read_file(filename):
    with open(filename,'rb') as fp:
        zero, data_type, dims = struct.unpack('>HBB', fp.read(4))
        shape = tuple(struct.unpack('>I', fp.read(4))[0] for d in range(dims))
        np_array = np.frombuffer(fp.read(), dtype=np.uint8).reshape(shape)
    return np_array

def preprocess(image_file, label_file):
    images = read_file(image_file)
    labels = read_file(label_file)
    if (len(labels) > 10000):
        labels = labels[:10000]
        images = images[:10000]    
    images = images/255.0
    images = images.reshape( (10000, 784))

    labels = labels.reshape(-1,1)
    data = np.concatenate((images, labels), axis=1)
    np.random.shuffle(data)
    return data

def get_features_labels(data, bias):
    examples = data[:,:-1]
    labels = data[:,-1]
    examples = np.append(examples, bias, 1)
    return examples, labels

def get_true_label(digit, perceptron_type):
    if digit == perceptron_type:
        return 1
    return 0

In [56]:
def inference(test_data, weights):
    data_size = len(test_data)
    bias = np.ones((data_size,1))
    examples, labels = get_features_labels(test_data, bias)
    prediction = np.ones(data_size, dtype = int)
    correct = 0
    for i, example in enumerate(examples):
        activation_values = sigmoid(np.sum(weights*example, axis = 1))
        prediction[i] = np.argmax(activation_values)
        if prediction[i] == labels[i]:
            correct += 1
    accuracy = correct*1.0/data_size*100
    return prediction, labels, accuracy

In [20]:
from numpy import linalg as LA
def sigmoid(value):
    return 1/(1+np.exp(-value))

In [66]:
path = "../hw2/DATA_FOLDER/"

train_data = preprocess(path + '/train-images.idx3-ubyte', path + '/train-labels.idx1-ubyte')
# print train_data[1]
test_data = preprocess(path + '/t10k-images.idx3-ubyte', path + '/t10k-labels.idx1-ubyte')



In [117]:
a=np.array([2,3])
b=np.array([[2,2,0],[1,0,1]])
print b.shape
np.dot(a,b)

(2, 3)


array([7, 4, 3])

In [118]:
def propagate(w, X, Y):

    m = X.shape[1]

    A = sigmoid(np.dot(w,X.T)) 
    print A.shape

    cost = -1/m * np.sum(Y * np.log(A) + (1-Y) * (np.log(1-A)))

    dz= (1/m)*(A - Y)
    dw = np.dot(dz,X)
    print dw.shape

    cost = np.squeeze(cost)
    grads = {"dw": dw}

    return grads, cost

In [122]:
def train_gd(train_data, num_epoches, learning_rate, lamda, test_data):
    data_size = len(train_data)
    weights = np.random.uniform(0,0.1,[10,785])
#     weights = np.zeros((10,785))
    bias = np.ones((data_size,1))
    for epoch in range(num_epoches):
        np.random.shuffle(train_data)
        examples, labels = get_features_labels(train_data, bias)
        delta_weights = np.zeros((10,785))
#         Y = np.zeros((10,data_size))
#         for i in range(data_size):
#             Y[i]
            
        
        for i,example in enumerate(examples):
#             print example.shape
#             print weights.shape
            z = np.sum(weights*example, axis = 1)
#             print z
            y_pred = sigmoid(z)
#             print y_pred
#             label = np.zeros()
#             print "label is ", labels[i]
            for j in range(0,10):
                label = get_true_label(labels[i], j)
                # if y_pred[j]*label < 0:
                delta_weights[j] += learning_rate*(y_pred[j]-label)*example
#             print delta_weights[0]
#             print delta_weights[1]

        weights -= delta_weights - lamda*weights
#         print "norm of weights is \n"
#         print LA.norm(weights, axis = 1)
        # cost = -1/examples.shape[1] * np.sum(  )
        train_prediction, train_labels, train_accuracy = inference(train_data[:train_data_size], weights)
        test_prediction, test_labels, test_accuracy = inference(test_data, weights)
        print "train accuracy is ", train_accuracy, " test accuracy is ", test_accuracy
    
    return weights  

In [None]:
train_data_size = 10000
epochs = 150
learning_rate = 0.01
lamda = 0.01
gd_weights = train_gd(train_data[:10000], epochs, learning_rate, lamda, test_data)

  This is separate from the ipykernel package so we can avoid doing imports until


train accuracy is  10.01  test accuracy is  9.8
train accuracy is  10.02  test accuracy is  9.8
train accuracy is  10.11  test accuracy is  9.95
train accuracy is  11.37  test accuracy is  11.25
train accuracy is  17.61  test accuracy is  17.44
train accuracy is  39.06  test accuracy is  37.6
train accuracy is  65.72  test accuracy is  64.46
train accuracy is  74.27  test accuracy is  73.4
train accuracy is  75.05  test accuracy is  74.81
train accuracy is  76.94  test accuracy is  76.25
train accuracy is  79.76  test accuracy is  79.11
train accuracy is  79.68  test accuracy is  78.91
train accuracy is  81.57  test accuracy is  80.91
train accuracy is  80.69  test accuracy is  79.93
train accuracy is  82.59  test accuracy is  81.88
train accuracy is  81.59  test accuracy is  80.67
train accuracy is  83.63  test accuracy is  82.63
train accuracy is  82.39  test accuracy is  81.53
train accuracy is  84.27  test accuracy is  83.36
train accuracy is  83.02  test accuracy is  82.45
train a

In [71]:
a= np.zeros((10,1))
sigmoid(a)

array([[ 0.5],
       [ 0.5],
       [ 0.5],
       [ 0.5],
       [ 0.5],
       [ 0.5],
       [ 0.5],
       [ 0.5],
       [ 0.5],
       [ 0.5]])