In [50]:
from scipy.io import arff
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy as np

arff_file_path = './Rice_Cammeo_Osmancik.arff'

data, meta = arff.loadarff(arff_file_path)

df = pd.DataFrame(data)

df = df.sample(frac=1).reset_index(drop=True)

scaler = MinMaxScaler()

column_to_be_normalized = ['Area', 'Perimeter','Major_Axis_Length','Minor_Axis_Length','Convex_Area']

scaler.fit(df[column_to_be_normalized])

df[column_to_be_normalized] = scaler.transform(df[column_to_be_normalized])

features = df.drop("Class", axis=1)
label = df["Class"]


X_train, X_test, y_train, y_test = train_test_split(features, label, test_size=0.16, random_state=42)


class LogisticRegression:
    def __init__(self, data, labels,learning_rate):
        self.weights = np.random.rand(data.shape[1])
        self.data = data
        self.labels = labels
        self.learning_rate = learning_rate
        self.N = len(data)
        self.threshold = 0.004

    def fit(self):
        prev_weights = None
        while not self.termination_condition(self.weights,prev_weights):
            prev_weights = self.weights
            print(str(self.calculate_error(prev_weights)))
            self.predict()
            
        
    def calculate_error(self,weights):
        sum = 0
        for i in range(self.N):
            sum = sum + np.log(1 + np.exp(-self.labels[i]*(self.weights @ self.data[i])))
        sum =  (1/self.N)*sum
        return sum

        
    def termination_condition(self,curr_weights, prev_weights = None):
           print(self.calculate_error(curr_weights))
           print(self.calculate_error(prev_weights))
           if (prev_weights is not None) and abs(self.calculate_error(curr_weights) - self.calculate_error(prev_weights))<self.threshold:
                return True
           return False
           
           
    def predict(self):
        gradient = 0
        for i in range(self.N):
            gradient = gradient + (self.labels[i]*self.data[i])/(1 + np.exp(self.labels[i]*(self.weights @ self.data[i])))
        gradient =  (-1/self.N)*gradient
        direction = -gradient
        self.weights = self.weights + self.learning_rate*direction

In [51]:
def main():
    tranformed_y_train = [-1 if x == b'Cammeo' else 1 for x in y_train.values]
    logistic_regression = LogisticRegression(X_train.values, tranformed_y_train, 0.0004)
    logistic_regression.fit()

if __name__ == "__main__":
    main()

1.4487972451138125
[-0.22545718 -0.26427697 -0.25094942 -0.2279002  -0.32812063 -0.23069746
 -0.23530725]
