In [None]:
import pandas as pd
import numpy as np
import matplotlib.style as style

%run Preprocessing_BCW_Data.ipynb
%run Preprocessing_Adult_Data.ipynb
%run Preprocessing_Ionosphere_Data.ipynb
%run preprocessing_mpg_dataset.ipynb

import matplotlib.pyplot as plt

In [None]:
def kFoldCrossVal(k, X, y, rate, iter):
    accuracy = 0
    model = LogisticRegression()
    
    size = (int)(len(y)/k)
    for i in range(k):
        train_x = X[np.r_[0:size*i, size*(i+1):]]
        train_y = y[np.r_[0:size*i, size*(i+1):]]
        test_x = X[(i*size):size*(i+1)]
        test_y = y[(i*size):size*(i+1)]
        
        (cost_history, params_optimal) = lr.fit(train_x, train_y, params, rate, iter)
        y_pred = lr.predict(test_x, params_optimal)
        run_accuracy = lr.evaluate_acc(test_y, y_pred)
        accuracy = accuracy + run_accuracy
    
    return accuracy/k

In [None]:
class LogisticRegression:
    
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def compute_cost(self, X, y, theta):
        m = len(y)
        h = self.sigmoid(X @ theta)
        epsilon = 1e-5
        cost = (1/m)*(((-y).T @ np.log(h + epsilon))-((1-y).T @ np.log(1-h + epsilon)))
        return cost

    def fit(self, X, y, params, learning_rate, iterations):
        m = len(y)
        cost_history = np.zeros((iterations,1))

        for i in range(iterations):
            params = params - (learning_rate/m) * (X.T @ (self.sigmoid(X @ params) - y)) 
            cost_history[i] = self.compute_cost(X, y, params)

        return (cost_history, params)

    
    def predict(self, X, params):
        return np.round(self.sigmoid(X @ params))

    def evaluate_acc(self, test_y, predicted_y):
        return (np.sum(predicted_y == test_y) / len(test_y))

iterations= []
for i in range(10,20):
    iterations.append(100*i)
    
# for i in range(1,11):
#     iterations.append(10*i)
    
learning_rate= [0.0001,0.0002,0.0003,0.0004,0.0005,0.0006,0.0007,0.0008,0.0009,
               0.001,0.002,0.003,0.004,0.005,0.006,0.007,0.008,0.009,
               0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,
               0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]

# learning_rate= [0.0001, 0.001, 0.01, 0.1]
lr = LogisticRegression()

# features = df.drop(['salary'] , axis=1)
# X = features.values
# output = df['salary']
# y = output.values

# features = df_ion.drop(['classification'] , axis=1)
# X = features.values
# output = df_ion['classification']
# y = output.values

# features = DataFrame.drop(['mpg'] , axis=1)
# X = features.values
# output = DataFrame['mpg']
# y = output.values

# features = df_bcw.drop(['Class'] , axis=1)
# X = features.values
# output = df_bcw['Class']
# y = output.values

y = y[:,np.newaxis]
sns.set_style('white')
fig = sns.scatterplot(X[:,0],X[:,1],hue=y.reshape(-1));
m = len(y)
X = np.hstack((np.ones((m,1)),X))
n = np.size(X,1)
params = np.zeros((n,1))
initial_cost = lr.compute_cost(X, y, params)

optimal_score = 0
optimal_rate = 0
optimal_iter = 0

for rate in learning_rate:
    for iter in iterations:
        score = kFoldCrossVal(5, X, y, rate, iter)
#         (cost_history, params_optimal) = lr.fit(X, y, params, rate, iter)
#         y_pred = lr.predict(X, params_optimal)
#         score = float(sum(y_pred == y))/ float(len(y))
        
        if score > optimal_score:
            optimal_score = score
            optimal_rate = rate
            optimal_iter = iter

print("Optimal score is {}, with rate: {} and {} iterations".format(optimal_score, optimal_rate, optimal_iter))
