<a href="https://colab.research.google.com/github/seodalzzz/Machine_Learning_implementation/blob/main/one_layer_logistic_regression_implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader

import numpy as np
import matplotlib.pyplot as plt


In [None]:
path = './datasets/'

transform = transforms.Compose([transforms.ToTensor()])
# 1*28*28 -> 784 = 28*28
train_data = MNIST(root=path,train=True,transform=transform,download=True)
test_data = MNIST(root=path,train=False,transform=transform,download=True)

# choose train data with label 0 or 1
idx = (train_data.targets==0) | (train_data.targets==1)
train_data.targets = train_data.targets[idx]
train_data.data = train_data.data[idx]

# choose test data with label 0 or 1
idx = (test_data.targets==0) | (test_data.targets==1)
test_data.targets = test_data.targets[idx]
test_data.data = test_data.data[idx]

batch_size = 85

train_loader = DataLoader(dataset=train_data,batch_size=batch_size,shuffle=True)
test_loader = DataLoader(dataset=test_data,batch_size=len(test_data),shuffle=False)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./datasets/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 102432748.46it/s]


Extracting ./datasets/MNIST/raw/train-images-idx3-ubyte.gz to ./datasets/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./datasets/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 41944492.32it/s]


Extracting ./datasets/MNIST/raw/train-labels-idx1-ubyte.gz to ./datasets/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./datasets/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 28974608.68it/s]


Extracting ./datasets/MNIST/raw/t10k-images-idx3-ubyte.gz to ./datasets/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./datasets/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 13725164.82it/s]


Extracting ./datasets/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./datasets/MNIST/raw



In [None]:

# 1*28*28 -> 784
w = np.random.randn(784,1)
b = np.random.randn(1,1)
#learning rate
eta = 1e-4
delta = 1e-10 # prevent log 0


In [None]:
# define sigmoid function
def sigmoid(val):
    result=1/(1+np.exp(-val))
    return result

# define derivative of sigmoid function w.r.t. its value
def grad_sigmoid(val):
    result=sigmoid(val)*(1-sigmoid(val))
    return result

# given data instances in batch form
# compute loss and gradients of w and b
# count the number of correct prediction
def compute_loss_and_grad(data_instance,params):
    x, y = data_instance #x.shape=(batch_size,784), w.shape=(784,1)

    w, b= params
    b=np.tile(b,(x.shape[0],1))
    linear=np.dot(w.T,x.T).T+b
    p=sigmoid(linear)
    loss=-y*np.log(p+delta)-(1-y)*np.log(1-p+delta)
    grad= -y*(1-p)+(1-y)*p
    grad_w=np.multiply(grad,x)
    grad_b=grad

    hit=sum(y==np.round(p))
    return loss, (grad_w, grad_b), hit

# update NN parameters w and b with SGD
def update_parameters(params,grads):
    w, b = params
    grad_w, grad_b = grads

    w-=eta*np.mean(grad_w,axis=0).reshape(-1,1)
    b-=eta*np.mean(grad_b,axis=0).reshape(-1,1)

    return w, b


In [None]:
num_epoch = 500

for i in range(num_epoch):

    # train the logistic regression model
    total_loss_train = 0
    count = 0
    for _, (x, y) in enumerate(train_loader):
        # 85*1*28*28 -> -1=85,784
        x, y = x.numpy().reshape(-1,784), y.numpy().reshape(-1,1) #x:(batch_size,1,28,28)->(batch_size,784)
        params = (w, b) #w,b:(784,1)
        # compute loss and gradients ->update the parameters
        # compute sum of the loss and the number of correct prediction in the batch
        loss, (grad_w, grad_b), hit=compute_loss_and_grad((x,y),params)
        grads=(grad_w,grad_b)
        w,b=update_parameters(params,grads)
        total_loss_train+=loss.sum()
        count+=hit.sum()

    # compute average loss and accuracy for the train dataset
    loss_train = total_loss_train/len(train_data)
    acc_train = count/len(train_data)

    # test, or evaluate, the trained logistic regression model
    dataiter = iter(test_loader)
    te_images, te_labels = next(iter(dataiter))
    total_loss_test = 0
    count_test = 0

    te_images=te_images.numpy().reshape(-1,784)
    te_labels=te_labels.numpy().reshape(-1,1)
    # compute loss
    # compute sum of the loss and the number of correct prediction
    loss, (grad_w, grad_b), hit = compute_loss_and_grad((te_images,te_labels),(w,b))
    total_loss_test+=loss.sum()

    # compute average loss and accuracy for the test dataset

    loss_test = total_loss_test/len(te_labels)
    acc_test = hit.sum()/len(te_labels)

    if i % 10 == 0:
        print("Epoch %d Train: %.3f / %.2f %%"%(i,loss_train,acc_train*100))
        print("Epoch %d Test: %.3f / %.2f %%"%(i,loss_test,acc_test*100))
        print()


Epoch 0 Train: 3.472 / 38.06 %
Epoch 0 Test: 3.575 / 36.78 %

Epoch 10 Train: 2.818 / 41.45 %
Epoch 10 Test: 2.885 / 40.05 %

Epoch 20 Train: 2.268 / 46.31 %
Epoch 20 Test: 2.305 / 45.58 %

Epoch 30 Train: 1.836 / 52.35 %
Epoch 30 Test: 1.848 / 50.97 %

Epoch 40 Train: 1.511 / 58.37 %
Epoch 40 Test: 1.502 / 55.89 %

Epoch 50 Train: 1.267 / 63.77 %
Epoch 50 Test: 1.242 / 61.47 %

Epoch 60 Train: 1.082 / 68.21 %
Epoch 60 Test: 1.045 / 66.38 %

Epoch 70 Train: 0.940 / 72.15 %
Epoch 70 Test: 0.892 / 70.83 %

Epoch 80 Train: 0.827 / 75.10 %
Epoch 80 Test: 0.773 / 74.00 %

Epoch 90 Train: 0.737 / 77.87 %
Epoch 90 Test: 0.679 / 77.45 %

Epoch 100 Train: 0.664 / 79.96 %
Epoch 100 Test: 0.602 / 80.00 %

Epoch 110 Train: 0.603 / 81.71 %
Epoch 110 Test: 0.540 / 81.99 %

Epoch 120 Train: 0.553 / 83.17 %
Epoch 120 Test: 0.488 / 83.26 %

Epoch 130 Train: 0.510 / 84.54 %
Epoch 130 Test: 0.444 / 84.96 %

Epoch 140 Train: 0.474 / 85.77 %
Epoch 140 Test: 0.408 / 86.19 %

Epoch 150 Train: 0.443 / 86.89 %

In [None]:
# final loss / accuracy
print("loss :", loss_test)
print("accuracy :",acc_test*100)

loss : 0.1109570175199161
accuracy : 97.06855791962174
