In [1]:
import numpy as np 
import pandas as pd 

In [2]:
def score(x_instance, class_weights):
    return np.dot(x_instance, class_weights)

#  true_labes is 0 and 1. 1 if class belongs
# loss = 0
# for x_instance in X:
#     all_scores = [score(x_instance, class_weights) for class_weights in all_weights]
#     exp_scores = [exp(class_score) for class_score in all_scores]
#     sum_of_exp_scores = sum(exp_scores)
#     probs = [exp_score / sum_of_exp_scores for exp_scores in exp_scores]

#     true labels and probs should be of the same order
#     cross_entropies = true_labels[c] * probs[c] for c in range(len(true_labels))
#     loss += sum(cross_entropies)
# loss = loss / X.shape

In [3]:
def predict_proba(X, weights):
    all_scores = score(X, weights.T)
    exp_scores = np.exp(all_scores)
    exp_sum_scores = np.sum(exp_scores, axis=1)
    exp_sum_scores = exp_sum_scores[:, np.newaxis]

    return exp_scores / exp_sum_scores

In [4]:
# def predict_proba(X, weights):
#     all_scores = [score(X, class_weights) for class_weights in weights]
#     exp_scores = [np.exp(class_score) for class_score in all_scores]
#     sum_of_exp_scores = sum(exp_scores)
#     print(sum_of_exp_scores)
#     probs = [exp_score / sum_of_exp_scores for exp_score in exp_scores]

#     return probs

In [5]:
pos_val = list(range(10))
neg_val = list(range(10, 20))
data = {}
y = []

for i in range(20):
    if i % 2 == 0:
        data[i] = pos_val
        y.append(0)
    else:
        data[i] = neg_val
        y.append(1)

X = pd.DataFrame(data).T.to_numpy()
y = np.array(y)

In [6]:
learning_rate = 0.1


In [7]:
import numpy as np 
import pandas as pd 
from sklearn.base import BaseEstimator, ClassifierMixin

np.random.seed(42)

class SoftmaxGD(BaseEstimator, ClassifierMixin):
    weights = None

    def __init__(self, n_iter = 1, learning_rate = 0.1, warm_start = True) -> None:
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.warm_start = warm_start

    def predict_proba(self, X):
        all_scores = np.dot(X, self.weights.T)
        exp_scores = np.exp(all_scores)
        exp_sum_scores = np.sum(exp_scores, axis=1)
        exp_sum_scores = exp_sum_scores[:, np.newaxis]

        return exp_scores / exp_sum_scores
    
    def predict(self, X):
        probs = self.predict_proba(X)
        return np.argmax(probs, axis=1)

    def fit(self, X, y):
        if self.weights is None or self.warm_start is False:
            self.classes_ = np.unique(y)  # Use sklearn convention for class labels
            self.true_labels = np.zeros((X.shape[0], len(self.classes_)))
            self.true_labels[np.arange(X.shape[0]), y] = 1
            self.weights = np.random.randn(len(self.classes_), X.shape[1])

        for iter in range(self.n_iter):
            gradients = np.array([np.zeros(X.shape[1]) for c in self.classes_])

            probs = self.predict_proba(X)
            all_errors = (probs - self.true_labels).T
            for c, errors_class in enumerate(all_errors):
                # mult each row by error 
                result = (X.T * errors_class).T
                gradients[c] = sum(result) / X.shape[0]

            self.weights = self.weights - self.learning_rate * gradients

In [8]:
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics import root_mean_squared_error
from sklearn.base import clone

np.random.seed(42)

class SoftmaxGD(BaseEstimator, ClassifierMixin):
    weights = None

    def __init__(
        self, 
        n_iter=1, 
        learning_rate=0.1, 
        warm_start=True, 
        early_stopping=False, 
        tol=1e-4, 
        patience=10,
        alpha=None,
        reg_type=None,
    ):
        """
        Parameters:
        - n_iter: Number of iterations per epoch.
        - learning_rate: Step size for gradient descent.
        - warm_start: If True, reuse the weights learned from the previous fit.
        - early_stopping: If True, stop training early when validation error stops improving.
        - tol: Tolerance for validation error improvement.
        - patience: Number of epochs with no improvement before stopping.
        - alpha: more alpha less jggly wggly
        """
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.warm_start = warm_start
        self.early_stopping = early_stopping
        self.tol = tol
        self.patience = patience
        self.alpha = alpha
        self.reg_type = reg_type
        self.best_weights_ = None

    def get_reg_term_for_grad(self, type="l1", alpha=0.1):
        if type == "l1":
            # d (abs(w)) / dw = np.sign(w) are signs (-1/1) 
            return alpha * np.sign(self.weights)
        elif type == "l2":
            # alpha на веса прав класса
            return alpha * self.weights

    def predict_proba(self, X):
        all_scores = np.dot(X, self.weights.T)
        exp_scores = np.exp(all_scores)
        exp_sum_scores = np.sum(exp_scores, axis=1)
        exp_sum_scores = exp_sum_scores[:, np.newaxis]

        return exp_scores / exp_sum_scores
    
    def predict(self, X):
        probs = self.predict_proba(X)
        return np.argmax(probs, axis=1)

    def fit(self, X, y, X_val=None, y_val=None):
        """
        Fit the model to the data.

        Parameters:
        - X: Training data.
        - y: Target labels for training data.
        - X_val: Optional, validation data to use for early stopping.
        - y_val: Optional, validation labels to use for early stopping.
        """
        if self.weights is None or not self.warm_start:
            self.classes_ = np.unique(y)  # Use sklearn convention for class labels
            self.true_labels = np.zeros((X.shape[0], len(self.classes_)))
            self.true_labels[np.arange(X.shape[0]), y] = 1
            self.weights = np.random.randn(len(self.classes_), X.shape[1])

        # Early stopping variables
        best_val_error = float("inf")
        epochs_no_improvement = 0

        for epoch in range(self.n_iter):
            gradients = np.zeros((len(self.classes_), X.shape[1]))

            probs = self.predict_proba(X)
            all_errors = (probs - self.true_labels).T
            for c, errors_class in enumerate(all_errors):
                result = (X.T * errors_class).T
                gradients[c] = np.sum(result, axis=0) / X.shape[0] 

            reg_term = self.get_reg_term_for_grad(alpha=self.alpha, type=self.reg_type)

            self.weights = self.weights - self.learning_rate * (gradients + reg_term)

            if self.early_stopping and X_val is not None and y_val is not None:
                # Validation error calculation
                y_val_pred = self.predict(X_val)
                val_error = root_mean_squared_error(y_val, y_val_pred) ** 2

                # Early stopping logic
                if val_error < best_val_error - self.tol:
                    best_val_error = val_error
                    self.best_weights_ = self.weights.copy() 
                    epochs_no_improvement = 0  # Reset patience counter
                else:
                    epochs_no_improvement += 1

                if epochs_no_improvement >= self.patience:
                    print(f"Stopping early at epoch {epoch}. No improvement after {self.patience} epochs.")
                    break

        if self.early_stopping and self.best_weights_ is not None:
            self.weights = self.best_weights_  

        return self  # Return self for compatibility with scikit-learn
    

In [9]:
np.argmax([1, 0, 2])

np.int64(2)

In [22]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Instantiate and train the original SoftmaxGD
sgd_reg = SoftmaxGD(n_iter=1000, learning_rate=0.1, warm_start=True, early_stopping=True, tol=1e-7, patience=200, alpha=0.01, reg_type='l2')
sgd_reg.fit(X_train, y_train, X_val=X_val, y_val=y_val)

# Predictions with SoftmaxGD
y_pred_sgd = sgd_reg.predict(X_val)

# Accuracy of SoftmaxGD
sgd_accuracy = accuracy_score(y_val, y_pred_sgd)

# Now, let's use scikit-learn's Logistic Regression (which uses softmax for multi-class classification)
log_reg = LogisticRegression(max_iter=1000, multi_class='multinomial', solver='lbfgs')
log_reg.fit(X_train, y_train)

# Predictions with Logistic Regression
y_pred_logreg = log_reg.predict(X_val)

# Accuracy of Logistic Regression
logreg_accuracy = accuracy_score(y_val, y_pred_logreg)

# Display the comparison of accuracies
import pandas as pd

results = pd.DataFrame({
    "Model": ["SoftmaxGD", "Logistic Regression (Softmax)"],
    "Accuracy": [sgd_accuracy, logreg_accuracy]
})
results

Stopping early at epoch 203. No improvement after 200 epochs.




Unnamed: 0,Model,Accuracy
0,SoftmaxGD,0.916667
1,Logistic Regression (Softmax),0.958333


In [23]:
y_pred_sgd = sgd_reg.predict(X_test)

# Accuracy of SoftmaxGD
sgd_accuracy = accuracy_score(y_test, y_pred_sgd)

# Predictions with Logistic Regression
y_pred_logreg = log_reg.predict(X_test)

# Accuracy of Logistic Regression
logreg_accuracy = accuracy_score(y_test, y_pred_logreg)

# Display the comparison of accuracies
import pandas as pd

results = pd.DataFrame({
    "Model": ["SoftmaxGD", "Logistic Regression (Softmax)"],
    "Accuracy": [sgd_accuracy, logreg_accuracy]
})
results

Unnamed: 0,Model,Accuracy
0,SoftmaxGD,0.966667
1,Logistic Regression (Softmax),1.0
