In [3]:
# (e)
import pandas as pd
data = pd.read_csv("./data/A3_Q1_data.csv")
x = data[["x1", "x2"]].values
y = data["y"].values

In [4]:
# (g)
from math import e
import numpy as np

class LogisticReg:
    def __init__(self):
        self.w = None
        self.train_cost_history = []
        self.test_cost_history = []
        self.e = None
    def sigmoid(self, z):
        return 1 / (1 + e**(-z))
    def cost(self, X, Y):
        num = len(Y)
        Y_hat = self.sigmoid(X.dot(self.w))
        c = - Y.T.dot(np.log(Y_hat)) - (1-Y).T.dot(np.log(1- Y_hat))
        return c / num
    
    def gradient(self, X, Y):
        Y_hat = self.sigmoid(X.dot(self.w))
        return (1/len(Y)) * X.T.dot(Y_hat - Y)
    
    def fit(self, x_train, y_train, x_test, y_test, lr, epoch=500, show_hist=True):
        # initialize weight
        self.w = np.random.rand(x_train.shape[1])
        
        self.e = 0
        while self.e <= epoch:
            current_w = self.w
            
            # cost
            c_train = self.cost(X=x_train, Y=y_train)
            c_test = self.cost(X=x_test, Y=y_test)
            self.train_cost_history.append(c_train)
            self.test_cost_history.append(c_test)
            
            if (self.e+1) % 50 == 0 and show_hist:
                print("epoch: {}, current train cost: {} , test cost: {}".format(self.e+1, c_train, c_test))
                pass
            
            # training
            y_hat = self.sigmoid(x_train.dot(self.w))
            grad = self.gradient(x_train, y_train)
            self.w = current_w - lr * grad
            
            self.e += 1
            if self.e != 0 and np.abs(np.linalg.norm(self.w, ord=2) - np.linalg.norm(current_w, ord=2)) < 1e-06:
                break
            pass
        print("trianing finished!")
        pass
    
    def predict(self, x):
        return self.sigmoid(x.dot(self.w))

In [5]:
# partition data
shuffle = np.arange(len(y))
np.random.shuffle(shuffle)

x_train = x[:70000]
y_train = y[:70000]

x_test = x[70000:]
y_test = y[70000:]

x_train = (x_train - x_train.mean(axis=0)) / x_train.std(axis=0)
x_test = (x_test - x_test.mean(axis=0)) / x_test.std(axis=0)

# add bias term
x_train = np.concatenate((np.ones((len(x_train), 1)), x_train), axis=1)
x_test = np.concatenate((np.ones((len(x_test), 1)), x_test), axis=1)

In [6]:
logreg = LogisticReg()
logreg.fit(x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test, lr = 0.1)

epoch: 50, current train cost: 0.44165658913902217 , test cost: 0.43928313309908396
epoch: 100, current train cost: 0.3741097707664395 , test cost: 0.37113288860383375
epoch: 150, current train cost: 0.3538193917906498 , test cost: 0.35064490773835016
epoch: 200, current train cost: 0.34436320343378474 , test cost: 0.3410789860481099
epoch: 250, current train cost: 0.3390801150036247 , test cost: 0.3357192002850469
epoch: 300, current train cost: 0.33582948986642586 , test cost: 0.3324086062176542
epoch: 350, current train cost: 0.33370872545516306 , test cost: 0.3302382020348682
epoch: 400, current train cost: 0.33227045609658235 , test cost: 0.3287576093922625
epoch: 450, current train cost: 0.3312681498265778 , test cost: 0.3277186524293822
epoch: 500, current train cost: 0.3305555643491745 , test cost: 0.32697406613678
trianing finished!
