In [19]:
import numpy as np

In [20]:
class softmaxregression:
    def __init__(self, learning_rate=0.1, iter=1000):
        self.learning_rate = learning_rate
        self.iter = iter
        self.weights = None
        self.bias = None
    
    def _softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)
    
    def _onehot(self, y):
        encode = np.zeros((len(y), len(np.unique(y))))
        for id, val in enumerate(y):
            encode[id, val] = 1
        #enumnerate just double loop in y and encode entry updates when it matches with correct class.
        return encode

        
    def fit (self, X, y):
        m_samples, n_features = X.shape
        K_classes = len(np.unique(y))

        #intialise the parameters
        self.weights = np.zeros((n_features, K_classes))
        self.bias = np.zeros((1, K_classes))

        y_one_hot= self._onehot(y)

        #Gradient descend
        for i in range(self.iter):
            #forward pass
            #compute the logits i.e. Z
            logit = np.dot(X, self.weights) + self.bias
            y_prediction = self._softmax(logit)

            #compute gradient
            dw = (1/m_samples) * self.learning_rate * np.dot(X.T, (y_prediction - y_one_hot))
            db = (1/m_samples) * self.learning_rate * np.sum((y_prediction - y_one_hot), axis=0, keepdims=True)

            #update the weights
            self.weights = self.weights - dw
            self.bias = self.bias - db
    #This is the predict function, so X is the picture that you want to predict and it should be a 1 by n matrix
    def predict (self, X):
        logits = np.dot(X, self.weights) + self.bias
        prob = self._softmax(logits)
        return np.argmax(prob, axis=1)

In [21]:
#We want to compare it with the sklearn and test our accuracy using the MNIST data
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split


In [22]:
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X, y = mnist.data, mnist.target.astype(int)

In [23]:
#scale the data
X= X / 255.0
#spliting the training set
X_train, X_test, y_train, y_test = train_test_split(X[:10000], y[:10000], test_size=0.2, random_state=42)

In [24]:
#train using our from scratch method 
model = softmaxregression(learning_rate=0.5, iter= 500)
model.fit(X_train, y_train)

In [30]:
#Using the weights we compute the prediction and compare it with the actual y_test
prediction = model.predict(X_test)
accuracy = np.mean(prediction == y_test)
accuracy

np.float64(0.9245)