In [8]:
import pandas as pd
import numpy as np

In [9]:
#normalization
def normalize_(X,mean,std):
    X_new = (X-mean)/std;
    return X_new

def normalize(X):
    mean = np.mean(X,axis=0);
    std = np.std(X,axis=0);
    std[std < 0.001] =1
    X_new = (X-mean)/std;
    return X_new, mean, std

def prepare_X(X):
    m = X.shape[0]
    ones = np.ones((m, 1))
    X_new = np.column_stack((ones, X))
    return X_new

def sigmoid(z): #where z is column vector
    g = 1/(1+np.exp(z*-1));
    return g

def h(X, theta): #where X is matrix and theta is vector(!)
    z = X.dot(theta)
    return sigmoid(z)

#cost function (штрафна функція)
def cost_function(X, y, theta):
    m = X.shape[0]
    if m ==0:
        return None
    temp =h(X,theta);
    J = sum(-1*y*np.log(temp) + (1-y)*np.log(1-temp) )/m;
    return J

def derivative_theta(X, y, theta):
    m = X.shape[0];
    if m == 0:
        return None
    d_theta = X_new.T.dot(h(X_new,theta) -y_new)/m;
    
    return d_theta

#gradient descent (градієнтний спуск)
def gradient_descent(X, y, theta, alpha, epsilon, num_iters, print_J = True):
    m = X.shape[0]
    J_history = [] 
    J = cost_function(X,y,theta);
    
    if print_J == True:
        print(J)
    J_history.append(J)
    for i in range(num_iters):
        theta -= alpha* derivative_theta(X,y,theta);
        J = cost_function(X,y,theta);
        
        J_history.append(J)
        
        if i % 1000 == 0 and print_J == True:
            print(J)
        
        if abs(J-J_history[-2]) < epsilon:
            break
            
    return theta, J_history

#prediction method
def predict(X, mean, std, models,labels):
    
    X_new =normalize_(X,mean,std);
    X_new = prepare_X(X_new);
    predictions = [];
    for theta in models:
        predictions.append(h(X_new,theta))
    
    predict =[];
    for p in range(0,len(predictions[0])):
        max = abs(predictions[0][p]);
        max_label  =labels[0];
        for l in range(1,len(labels)):
            if(abs(predictions[l][p]) > max):
                max = abs(predictions[l][p]);
                max_label  =labels[l];
        predict.append(max_label);
    return predict

In [10]:
def img_label_display(img,label,pred_label):
    img = np.array(img);
    plt.title("Label: "+str(label)+",predicted: "+str(pred_label));
    plt.imshow(img.reshape(28,28),cmap='gray',vmin=0, vmax=255)

In [11]:
def img_display(array):
    array = np.array(array);
    label = array[0];
    img = np.delete(array,0);
    plt.title("Label: "+str(label));
    plt.imshow(img.reshape(28,28),cmap='gray',vmin=0, vmax=255)

In [13]:
dataset = pd.read_csv('train.csv');
dataset.describe()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
count,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,...,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0,42000.0
mean,4.456643,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.219286,0.117095,0.059024,0.02019,0.017238,0.002857,0.0,0.0,0.0,0.0
std,2.88773,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,6.31289,4.633819,3.274488,1.75987,1.894498,0.414264,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,254.0,254.0,253.0,253.0,254.0,62.0,0.0,0.0,0.0,0.0
