In [None]:
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
%run assignment1_Kaustav.ipynb

In [None]:
class LogReg(object):
    def __init__(self, learning_rate=None, iterations=None, params = None, cost_history = None):
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.params = params
        self.cost_history = None

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def compute_cost(self, X, y, theta):
        m = len(y)
        h = self.sigmoid(X @ theta)
        epsilon = 1e-5
        cost = (1/m)*(((-y).T @ np.log(h + epsilon))-((1-y).T @ np.log(1-h + epsilon)))
        return cost

    def fit(self, X, y, params = None, learning_rate = None, iterations = None):

        if learning_rate is None:
            learning_rate = self.learning_rate

        if iterations is None:
            iterations = self.iterations

        if params is None:
            self.params = np.zeros((np.size(X, 1), 1))

        m = len(y)
        if self.cost_history is None:
            self.cost_history = np.zeros((iterations,1))

        for i in range(iterations):
            self.params = self.params - (learning_rate/m) * (X.T @ (self.sigmoid(X @ self.params) - y)) 
            self.cost_history[i] = self.compute_cost(X, y, self.params)

        return (self.cost_history, self.params)

    def predict(self, X):
        return np.round(self.sigmoid(X @ self.params))

    def score(self, test_y, predicted_y):
        return (np.sum(predicted_y == test_y) / len(test_y))

    def get_params(self, deep=True):
        return {"learning_rate": self.learning_rate, 
                "iterations": self.iterations, 
                "params": self.params,
                "cost_history": self.cost_history}

    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self

In [None]:
def kFoldCrossVal(k, X, y):
    accuracy = 0
    size = (int)(len(y)/k)
    for i in range(k):
        train_x = X[np.r_[0:size*i, size*(i+1):]]
        train_y = y[np.r_[0:size*i, size*(i+1):]]
        test_x = X[(i*size):size*(i+1)]
        test_y = y[(i*size):size*(i+1)]
        
        model.fit(train_x, train_y)
        predictions = model.predict(test_x)
        run_accuracy = model.evaluate_acc(test_y, predictions)
        accuracy = accuracy + run_accuracy
        print(run_accuracy)

    return accuracy / k

In [None]:
parameters = {'iterations': [10, 20, 30, 40, 50], 'learning_rate': [1e-1, 1e-2, 1e-3, 1e-4]}

# features = df.drop(['salary'] , axis=1)
# X = features.values
# output = df['salary']
# y = output.values

# y = y[:,np.newaxis]
# sns.set_style('white')
# sns.scatterplot(X[:,0],X[:,1],hue=y.reshape(-1));

# m = len(y)

# X = np.hstack((np.ones((m,1)),X))
# n = np.size(X,1)
# params = np.zeros((n,1))

# iterations = 5
# learning_rate = 0.001

# initial_cost = compute_cost(X, y, params)

# print("Initial Cost is: {} \n".format(initial_cost))

# (cost_history, params_optimal) = gradient_descent(X, y, params, learning_rate, iterations)

# print("Optimal Parameters are: \n", params_optimal, "\n")

# plt.figure()
# sns.set_style('white')
# plt.plot(range(len(cost_history)), cost_history, 'r')
# plt.title("Convergence Graph of Cost Function")
# plt.xlabel("Number of Iterations")
# plt.ylabel("Cost")
# plt.show()

# y_pred = predict(X, params_optimal)
# score = float(sum(y_pred == y))/ float(len(y))

# print(score)
