In [None]:
import numpy as np
import matplotlib as plt
import pandas as pd

In [None]:
class SoftmaxRegression():
  def __init__(self, n_epochs=100,lr=0.1):
    self.n_epochs, self.lr = n_epochs, lr

  def softmax(self, X):
    z = np.dot(X , self.weights.T) + self.bias
    pk = np.exp(z) / np.sum(np.exp(np.array(z)), axis=1, keepdims=True)
    return pk

  def predict_proba(self, X):
    return self.softmax(X)

  def predict(self, X):
    return np.argmax(self.softmax(X), axis=1)

  def initialize_params(self, X, y):
    weights = np.random.randn(y.shape[1], X.shape[1]) * 0.01
    bias_shape = (1, y.shape[1])
    biases = np.zeros(bias_shape)
    return weights, biases

  def one_hot(self, y):
    n_rows = y.shape[0]
    n_class = len(y.unique())
    one_hot = np.zeros((n_rows, n_class))
    one_hot[np.arange(len(y)), y] = 1
    return one_hot

  def cross_entropy(self, X, y):
    pk = self.softmax(X)
    loss = y * np.log(pk)
    return np.sum(loss) / -X.shape[0]

  def fit(self, X, y):
    m = X.shape[0]
    y = self.one_hot(y)
    self.weights, self.bias = self.initialize_params(X,y)
    for epoch in range(self.n_epochs):
      pk = self.softmax(X)
      w_gradient = np.dot((pk-y).T , X) / m
      self.weights -= self.lr * w_gradient
      b_gradient = np.sum((pk-y), axis=0) / m
      self.bias -= self.lr * b_gradient.ravel()

      if epoch % 100 == 0:
        print(f"epoch num {epoch}, Loss = ",self.cross_entropy(X,y))

In [None]:
from sklearn.datasets import load_iris
iris = load_iris(as_frame=True)
X = iris.data
y = iris.target

In [None]:
our_model = SoftmaxRegression(n_epochs=1000)
our_model.fit(X, y)

epoch num 0, Loss =  1.0254290549358605
epoch num 100, Loss =  0.46841623458536846
epoch num 200, Loss =  0.26147596377133964
epoch num 300, Loss =  0.21850473275236346
epoch num 400, Loss =  0.19122980773985815
epoch num 500, Loss =  0.17224595636613682
epoch num 600, Loss =  0.15821936676537465
epoch num 700, Loss =  0.14739769415145312
epoch num 800, Loss =  0.13877108142112551
epoch num 900, Loss =  0.13171621812269793


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss

# Create and train the model
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
model.fit(X, y)

# Make predictions
y_pred_proba = model.predict_proba(X)
our_preds = our_model.predict_proba(X)


# Calculate cross-entropy loss
loss = log_loss(y, y_pred_proba)
our_loss = log_loss(y, our_preds)

# Print cross-entropy loss
print(f'Cross-Entropy Loss Sklearn Model: {loss}')
print(f'Cross-Entropy Loss For Our Model: {our_loss}')

# Print the weights and bias
# print(f'Weights: {model.coef_}')
# print(f'Bias: {model.intercept_}')

Cross-Entropy Loss Sklearn Model: 0.11963578904962134
Cross-Entropy Loss For Our Model: 0.12588131421695473
