This is code for performing logistic regression in two ways:
1) Using a gradient descent optimizer that I build from scratch.
3) Using scikit-learn's built in tool.

The models are tested using datasets that were taken from Andrew Ng's Machine Learning course on Coursera.

In [1]:
import numpy as np
import Single_Layer_Optimizers # This defines various shallow learning classes, such as linear and logistic regression.
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression

In [None]:
# Load a basic classification dataset
file = open('classification_data.txt')
data = np.array([[float(number) for number in line.split(',')] for line in file.readlines()])
file.close()

In [None]:
# Run the Logistic Regression
lr = Single_Layer_Optimizers.Logistic_Regression(data)
lr.fit(alpha = .3, iterations = 1000, init = 'zeros', val_set_fraction = 0)

# Plot the cost
lr.plot_cost()
lr.predict(dataset = 'train')
lr.Classification_Accuracy()

In [None]:
# 2D visualization of the data predictions
class_bool = np.rint(lr.predictions) == 1
class1 = lr.train_data[class_bool, :]
class2 = lr.train_data[class_bool == 0, :]
plt.plot(class1[:,1], class1[:,2], 'bo', class2[:,1], class2[:,2], 'rx')
plt.xlabel('feature1')
plt.ylabel('feature2')
plt.show()

In [None]:
#### COMPARE RESULTS TO SKLEARN, USING SAME TRAINING DATA ####
lr.Classification_Accuracy()
LR = LogisticRegression().fit(lr.train_data[:,1:], lr.train_target)
print("sklearn results in an accuracy of ", LR.score(lr.train_data[:,1:], lr.train_target))

In [None]:
# Load a challenging classification dataset
file = open('classification_data_2.txt')
data = np.array([[float(number) for number in line.split(',')] for line in file.readlines()])
file.close()

In [None]:
# Add squared terms to the data
data = np.c_[data[:,0:2], np.square(data[:,0]), np.square(data[:,1]), \
             np.square(data[:,0])*np.square(data[:,1]), data[:,-1]]

In [None]:
# Run the Logistic Regression using Regularization
lr = Single_Layer_Optimizers.Logistic_Regression(data)
lr.fit(alpha = .3, iterations = 200, init = 'zeros', val_set_fraction = 0, reg_coef = 0.25)

# Plot the cost
lr.plot_cost()
lr.predict(dataset = 'train')
lr.Classification_Accuracy()

In [None]:
#### COMPARE RESULTS TO SKLEARN, USING SAME TRAINING DATA ####
lr.Classification_Accuracy()
LR = LogisticRegression(C=2).fit(lr.train_data[:,1:], lr.train_target)
print("sklearn results in an accuracy of ", LR.score(lr.train_data[:,1:], lr.train_target))

In [None]:
print('My weights were ', lr.weights)
print('Sklearn weights were ', LR.intercept_, LR.coef_)

In [None]:
# 2D visualization of the data predictions
class_bool = np.rint(lr.predictions) == 1
class1 = lr.train_data[class_bool, :]
class2 = lr.train_data[class_bool == 0, :]
plt.plot(class1[:,1], class1[:,2], 'bo', class2[:,1], class2[:,2], 'rx')
plt.xlabel('feature1')
plt.ylabel('feature2')
plt.show()