**Implement Batch Gradient Descent with early stopping for Softmax Regression (without using Scikit-Learn).**

In [1]:
# Import data
from sklearn.datasets import load_iris

X = load_iris().data
y = load_iris().target

In [2]:
class SoftmaxRegression():
    def __init__(self):
        pass
    
    # Calculate softmax punctuation
    def softmax_pont(self, X, theta):
        return X.dot(theta)

    # Calculate softmax probability
    def softmax_prob(self, s):
        from numpy import exp, sum
        e = exp(s)
        esum = sum(e, axis=1, keepdims=True)
        return e / esum
    
    # Returns an array where instance is 1 if class is k, 0 otherwise
    def y_k_maker(self, y):
        from numpy import unique, zeros, arange
        n_classes = len(unique(y))
        instances = len(y)
        y_k = zeros((instances, n_classes))
        y_k[arange(instances), y] = 1
        return y_k
    
    # Calculate loss cost function for softmax regression
    def loss_cost(self, y_k, p, epsilon):
        from numpy import mean,sum, log
        l = -mean(sum(y_k)*log(p+epsilon), axis=1)
        return l
    
    # Calculate gradient for Gradient Descendant method
    def gradient(self, m, X, err):
        return 1/m * X.T.dot(err)
    
    # Use Gradient Descendent to calculate good parameters for theta
    def fit(self, X, y, eta=0.1, n_iterations=10**5, m=len(X), epsilon=10**-7):
        import numpy as np
        n_params = len(X[0])
        n_classes = len(np.unique(y))
        X_with_bias = np.c_[np.ones([len(X), 1]), X]
        
        # Gradient Descendant have random initialization of theta
        theta = np.random.randn(n_params, n_classes)
        
        y_k = self.y_k_maker(y)
        for iteration in range(n_iterations):
            s = self.softmax_pont(X, theta)
            p = self.softmax_prob(s)
            loss = self.loss_cost(y_k, p, epsilon)
            err = p - y_k
            grad = self.gradient(m, X, err)
            theta = theta - eta*grad
            for i in grad:
                for j in i:
                    if j<=epsilon:
                        break
                        
        self.theta = theta
    
    # Return prediction
    def predict(self, X):
        from numpy import argmax
        s = self.softmax_pont(X,self.theta)
        p = self.softmax_prob(s)
        y_pred = argmax(p, axis=1)
        return y_pred

In [3]:
# Fit parameters and make prediction
sr = SoftmaxRegression()
sr.fit(X,y)
y_pred = sr.predict(X)

In [4]:
# See percentage of correct predictions
from numpy import mean

score = mean(y_pred==y)
score

0.9666666666666667