In [132]:
import numpy as np
import pandas as pd

class LogisticRegression:
    def __init__(self):
        self.df = None
        self.X = None
        self.y = None
        self.trainX = None
        self.trainY = None
        self.testX = None
        self.testY = None
        self.b = None                    
    
    def load_iris(self, multinomial=False):
        """
        Load Iris data.
        
        If multinomial is false than all but 2 classes will be discarded
        """
        self.df = pd.read_csv('iris.txt', names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class'])
        self.df['class'] = self.df['class'].astype('category').cat.codes
        for col in ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']:
            self.df[col] = (self.df[col] - self.df[col].mean())/self.df[col].std()

        self.X = self.df.to_numpy()[:,:4]
        self.y = self.df.to_numpy()[:,4]
        if multinomial==False:
            self.X = self.X[self.y <= 1,:]
            self.y = self.y[self.y <= 1]
    
    def split_data(self, p = 0.8):        
        """
        Randomly split data into test and train
        """
        
        assert self.X is not None, "Need to load data before splitting"
        
        mask = np.random.rand(self.X.shape[0]) < p
           
        
        self.trainX = self.X[mask==True]
        self.testX = self.X[mask==False]
        
        self.trainY = self.y[mask==True]
        self.testY = self.y[mask==False]        
        
    def fit(self, lr=0.01):
        """
        Fit data uses logistic regression
        """
        
        self.b = np.random.normal(0,1,size=(self.X.shape[1] + 1, 1))
        X = np.hstack((np.ones((self.trainX.shape[0], 1)), self.trainX))
        y = self.trainY.reshape(-1, 1)
        
        X_test = np.hstack((np.ones((self.testX.shape[0], 1)), self.testX))
        y_test = self.testY.reshape(-1, 1)
        
        for i in range(20):                        
            grad = -np.matmul(X.T, y - self.predict(X))
            self.b = self.b - lr*grad            
            
            y_pred = self.predict(X_test)
            y_pred_train = self.predict(X)
            print("Train Loss: {:.3f}, Test Loss: {:.3f}".format(self.loss(y, y_pred_train), self.loss(y_test, y_pred)))
        
    def predict(self, X):
        """
        Predict classes for given data X
        """
        return 1/(1+np.exp(-X@self.b))
    
    def loss(self, yTrue, yPred):
        """
        Return cross entropy loss for true and predicted y values
        """
        return -1*(np.sum(yTrue*np.log(yPred) + (1-yTrue)*np.log(1-yPred)))/len(yTrue)

In [133]:
lr = LogisticRegression()
lr.load_iris()
lr.split_data(p=0.6)
lr.fit()

Train Loss: 0.513, Test Loss: 0.406
Train Loss: 0.402, Test Loss: 0.328
Train Loss: 0.322, Test Loss: 0.272
Train Loss: 0.265, Test Loss: 0.232
Train Loss: 0.224, Test Loss: 0.202
Train Loss: 0.193, Test Loss: 0.179
Train Loss: 0.169, Test Loss: 0.161
Train Loss: 0.151, Test Loss: 0.147
Train Loss: 0.136, Test Loss: 0.135
Train Loss: 0.124, Test Loss: 0.126
Train Loss: 0.114, Test Loss: 0.117
Train Loss: 0.106, Test Loss: 0.111
Train Loss: 0.098, Test Loss: 0.105
Train Loss: 0.092, Test Loss: 0.099
Train Loss: 0.087, Test Loss: 0.095
Train Loss: 0.082, Test Loss: 0.091
Train Loss: 0.078, Test Loss: 0.087
Train Loss: 0.074, Test Loss: 0.083
Train Loss: 0.070, Test Loss: 0.080
Train Loss: 0.067, Test Loss: 0.078
