In [116]:
import numpy as np
import timeit

from scipy.optimize import minimize
from sklearn.model_selection import train_test_split

import warnings
#warnings.filterwarnings("ignore")


In [117]:
class logistic_regression(object):
    def __init__(self, features, obs, d):
      self.x = features
      self.y = obs
      self.d = d

    def logLoss(self, weights):
      y_hat = self.sigmoid(self.x.dot(weights))
      y_hat_inv = 1- y_hat
      return - np.nansum(self.y * np.log(y_hat) + y_hat_inv* np.log(y_hat_inv))

    def sigmoid(self, t):
      return 1/(1+np.exp(-t))

    def derivative(self, weights):
      y_hat = self.sigmoid(self.x.dot(weights))
      return -self.x.T.dot(self.y-y_hat)

    def solve(self, show_result = False):
      #seed just to make sure don't generate bad number afterwards (still under 10s in that case)
      np.random.seed(4)
      max = 0.015
      min = -0.015
      result = minimize(self.logLoss, min + (max-min)*np.random.rand(10), method='BFGS', jac=self.derivative)
      if show_result:
        print('Status : %s' % result['message'])
        print('Total Evaluations: %d' % result['nfev'])
      self.weights = result['x']
      return self.weights

    def predict(self, X, threshold):
        y_predicted = self.sigmoid(np.dot(X, self.weights))
        y_predicted_cls = [1 if i > threshold else 0 for i in y_predicted]
        return np.array(y_predicted_cls)

In [118]:
#IMPORT DATA HERE
x = np.load("feature.npy")
y = np.load("obs.npy")

xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.1, random_state=3)
d = x.shape[1]

predictor = logistic_regression(xtrain, ytrain, d)

In [119]:
#DO NOT CHANGE THIS CELL
tic = timeit.default_timer()

#Your solver goes here. Do not add any code here.
theta = predictor.solve()

toc = timeit.default_timer()


print(toc - tic)
print(theta)

3.7618093230000795
[1.17668529 0.26183304 0.24005343 0.31594052 1.25534288 0.13334372
 0.52273025 1.05709867 0.47065912 0.88363106]


In [120]:
class LogisticRegression2:
    def __init__(self, x, y, learning_rate, iteration):
        self.lr = learning_rate
        self.iteration = iteration
        self.x = x
        self.y = y
        self.N = self.x.shape[0]
    def solve(self, show_message = False):
        self.weights = np.zeros(self.x.shape[1])

        # gradient descent
        for _ in range(self.iteration):
            y_predicted = self.sigmoid(np.dot(self.x, self.weights))

            dw = np.dot(self.x.T, (y_predicted - self.y))/self.N
            if sum(abs(dw)) < 0.5:
              if show_message:
                print("The weights has converged")
              break
            # update parameters
            self.weights -= self.lr * dw

    def predict(self, X, threshold):
        linear_model = np.dot(X, self.weights)
        y_predicted = self.sigmoid(linear_model)
        y_predicted_cls = [1 if i > threshold else 0 for i in y_predicted]
        return np.array(y_predicted_cls)

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

threshold = 0.5
ytest = (ytest > threshold).astype('int')
predictions = predictor.predict(xtest, threshold)
print("BGFS classification accuracy:", accuracy(ytest, predictions))
predictor2 = LogisticRegression2(xtrain, ytrain, 0.1, 100)
predictor2.solve()
predictions2 = predictor2.predict(xtest, threshold)
print("GD classification accuracy:", accuracy(ytest, predictions2))
division = predictor.weights/predictor2.weights
print("division of the 2 weights from 2 methods: ", division)
print("standard deviation of the division: ", np.std(division))

BGFS classification accuracy: 0.99119
GD classification accuracy: 0.99938
division of the 2 weights from 2 methods:  [2.68262139 2.82689988 2.77191614 2.44134482 2.52657637 2.57213395
 2.71050829 2.64885754 2.59654052 2.58731487]
standard deviation of the division:  0.10985191276551118
