In [180]:
import pandas as pd
import numpy as np
import math

In [181]:
def read_dataset(feature_file, label_file):
    '''Read dataset in *.csv to dataframe in pandas'''
    df_X = pd.read_csv(feature_file)
    df_y = pd.read_csv(label_file)
    X = df_X.values # convert values in dataframe to numpy array (feature)
    y = df_y.values # convert values in dataframe to numpy array (label)
    return X, y

In [182]:
def normalize_features(X_train, X_test):
    from sklearn.preprocessing import StandardScaler # import library 
    scaler = StandardScaler() # call an object function
    scaler.fit(X_train)   # calculate mean, std in X_train  (x-u)/s
    X_train_norm = scaler.transform(X_train)  # apply normalization on X_train
    X_test_norm = scaler.transform(X_test)    # apply normalization on X_test
    return X_train_norm, X_test_norm

In [243]:
X_train, y_train = read_dataset('Iris_X_train.csv', 'Iris_y_train.csv')
X_test, y_test = read_dataset('Iris_X_test.csv', 'Iris_y_test.csv')
X_train_norm, X_test_norm = normalize_features(X_train, X_test)

In [249]:
print(X_train_norm.shape)
print(len(X_train_norm))

(112, 2)
112


In [288]:
class Logistic_Reg:
    def __init__(self, x, y, lr, ld):
        self.x = x
        self.y = y
        self.lr = lr
        self.b = 0.
        self.W = np.zeros([self.x.shape[1], 1])
        self.ld = ld
        
    def sigmoid_func(self):
        ex = np.dot(self.x, self.W)
        self.sigmoid = 1/(1 + np.exp(-ex-self.b))
        #print(self.sigmoid.shape)
        
    def regu_cost_func(self):
        self.sigmoid_func()
        self.cost = (np.sum(-self.y*np.log(self.sigmoid)) - np.sum((1 - self.y)*np.log(1 - self.sigmoid)))/self.x.shape[0]
        self.regu = (self.ld/(2*self.x.shape[1]))*np.sum(np.dot(self.W.T, self.W))
        self.cost_regu = self.cost + self.regu
              
    def gradDec(self):
        #dL = (1/self.x.shape[0])*np.sum((self.sigmoid) - np.sum(self.y))*np.sum(self.x) + (self.ld*np.sum(self.W))
        dW = 1/self.x.shape[0]*np.dot(self.x.T, self.sigmoid-self.y) + self.ld/self.x.shape[1]*self.W
        db = 1/self.x.shape[0]*np.sum(self.sigmoid - self.y)
        self.W = self.W - self.lr*dW
        self.b = self.b - self.lr*db
        
    def predict(self, X_test):
        self.sigmoid_test = 1/(1 + np.exp(-(np.dot(X_test, self.W))-self.b))
        return np.where(self.sigmoid_test >=0.5, 1, 0)

  

In [293]:
reglog = Logistic_Reg(X_train_norm, y_train, 0.01, 0.1)
for i in range(1000):
    reglog.sigmoid_func()
    reglog.gradDec()
    reglog.regu_cost_func()
    if ((i+1)%100==0):
        print('epoch = %d, current loss = %.5f'%(i+1, reglog.cost_regu))

epoch = 100, current loss = 0.59745
epoch = 200, current loss = 0.54486
epoch = 300, current loss = 0.51422
epoch = 400, current loss = 0.49537
epoch = 500, current loss = 0.48323
epoch = 600, current loss = 0.47513
epoch = 700, current loss = 0.46957
epoch = 800, current loss = 0.46567
epoch = 900, current loss = 0.46289
epoch = 1000, current loss = 0.46088


In [294]:
y_pred = reglog.predict(X_test_norm)
print(float(sum(y_pred==y_test))/float(len(y_test)))

0.8421052631578947
