In [1]:
import numpy as np
from scipy.special import softmax
from sklearn.preprocessing import PolynomialFeatures, LabelBinarizer
from sklearn import datasets
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression

class MyLogisticRegression:
    
    def __init__(self, learning_rate, max_iter, batch_size=1, order=1, tolerance=1e-15):
        
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.batch_size = batch_size
        self.order = order
        self.tolerance = tolerance
        
    def fit(self, X, y):
        '''
        Updates self.weights using batch GD.
        '''
        
        self.classes = np.unique(y)
        
        XX = PolynomialFeatures(self.order).fit_transform(X)
        r = LabelBinarizer().fit_transform(y)
        
        self.n_classes = len(self.classes)
        self.n_features = np.shape(XX)[1]
        self.n_datapts = np.shape(XX)[0]
        
        self.weights = np.random.uniform(low=-0.01, 
                                         high=0.01, 
                                         size=(self.n_classes, 
                                               self.n_features))
        
        for k in range(self.max_iter):  # Implementation of BGD.
            
            idx = np.random.choice(np.arange(len(XX)), self.batch_size)
            x = XX[idx]
            
            o = np.einsum('ij,tj->ti', self.weights, x, optimize=True)
            p = softmax(o, axis=1)
            rr = r[idx]
            dW = np.einsum('ti,tj->ij', rr - p, x, optimize=True)
            self.weights += self.learning_rate * dW
            
            converged = (np.linalg.norm(self.learning_rate * dW, axis=1) 
             / np.linalg.norm(self.weights, axis=1))**2 < self.tolerance
            
            if converged.all():
                break
            
        return self
    
    def predict_proba(self, X):
        
        XX = PolynomialFeatures(self.order).fit_transform(X)
        
        return softmax(np.matmul(XX, np.transpose(self.weights)), axis=1)
        
    def predict(self, X):
        
        return np.argmax(self.predict_proba(X), axis=1)

In [2]:
digits_df = datasets.load_digits()
X = digits_df.data
y = digits_df.target
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, 
                                                                    test_size= 0.2, 
                                                                    random_state = 1)

learning_rate = 0.1
max_iter = 1000
batch_size = len(y_test) // 5  # Batch GD.
order = 1

title = 'Dataset: sklearn.datasets.load_digits()' 
print(title)
print('=' * len(title))
print()
clf = MyLogisticRegression(learning_rate, max_iter, batch_size, order).fit(X_train, y_train)
predicts = clf.predict(X_test)
unique, counts = np.unique(predicts == y_test, return_counts=True)
print(f'Accuracy of MyLogisticRegression is {counts[1] / np.sum(counts):.2f}')
print()
clf_sk = LogisticRegression(multi_class='ovr', solver='liblinear').fit(X_train, y_train)
predicts_sk = clf_sk.predict(X_test)
unique_sk, counts_sk = np.unique(predicts_sk == y_test, return_counts=True)
print(f'Accuracy of LogisticRegression is {counts_sk[1] / np.sum(counts_sk):.2f}')

Dataset: sklearn.datasets.load_digits()

Accuracy of MyLogisticRegression is 0.95

Accuracy of LogisticRegression is 0.97
