# Clasificación multiclase softmax

La clasificación softmax es una generalización de la regresión logística binaria, donde en vez de dividir o separar mediante un valor delimitante se asignan valores de probabilidad mediante la 'normalización' de valores de salida o las clases mediante la funcion *softmax* o también conocido como *función exponencial normalizada*

La función se define de l

In [1]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.utils import Bunch
import logging

logging.basicConfig(level=logging.INFO)
#  logging.disable()

## Desarrollo de teoria

## Funciones de preparacion de datos y entrenamiento

In [2]:
def one_hot_encoder(target: np.ndarray) -> np.ndarray:
    n_classes: int = np.unique(target).shape[0]
    y_encode: np.ndarray = np.zeros((target.shape[0], n_classes))
    for idx, val in enumerate(target):
        y_encode[idx, val] = 1.0
    return y_encode

In [3]:
def sigmoid(z: np.ndarray) -> np.ndarray:
    return np.exp(z) / np.sum(np.exp(z), axis=1, keepdims=True)

In [4]:
def model_fit(
    data: np.ndarray, target: np.ndarray, eta: float = 0.55, iterations: int = 100000
) -> np.ndarray:
    m = len(target)
    logging.info(f"target: {m}")

    theta = np.random.randn(data.shape[1], target.shape[1])

    logging.info(f"theta: {theta}")

    for _ in range(iterations):
        gradients = (1 / m) * (data.T @ (sigmoid(data @ theta) - target))
        theta = theta - eta * gradients

    logging.info(f"theta: {theta}")

    return theta

In [5]:
def model_test(
    sepal_length: float,
    sepal_width: float,
    petal_length: float,
    petal_width: float,
    weights: np.ndarray,
) -> list[int]:
    list1 = [0, 0, 0]

    for i in range(len(list1)):
        a0 = weights.T[i][0]
        a1 = weights.T[i][1]
        a2 = weights.T[i][2]
        a3 = weights.T[i][3]
        a4 = weights.T[i][4]
        list1[i] = np.exp(
            a0 + a1 * sepal_length + a2 * sepal_width + a3 * petal_length + a4 * petal_width
        )

    maxP = np.argmax([z / sum(list1) for z in list1])

    pred = [0, 0, 0]

    pred[maxP] = 1

    return pred


In [6]:
def model_predict(
    data: np.ndarray, target: np.ndarray, weights: np.ndarray
) -> tuple[np.ndarray, float]:
    predict_list = []
    test_list = []
    for i in data:
        predict_list.append(np.argmax(model_test(i[0], i[1], i[2], i[3], weights)))
    for j in target:
        test_list.append(np.argmax(j))
    num = 0
    for k in range(len(predict_list)):
        if predict_list[k] == test_list[k]:
            num = num + 1

    final_list: np.ndarray = np.array([predict_list, test_list], ndmin=2)
    effi = num / len(predict_list)

    return final_list, effi

## Base de datos: Iris

In [7]:
iris: Bunch = datasets.load_iris()  # type: ignore

x_data: np.ndarray = iris["data"]  # type: ignore
y_data: np.ndarray = iris["target"]  # type: ignore

x_c: np.ndarray = np.c_[np.ones((len(x_data), 1)), x_data]
y_c = one_hot_encoder(y_data)

## Preparación de datos, división en split

In [8]:
train_test_data: list[np.ndarray] = train_test_split(x_c, y_c, train_size=0.35)  # type: ignore
x_train, x_test, y_train, y_test = train_test_data

logging.info(f"x_train shape: {x_train.shape}")
logging.info(f"x_test shape: {x_test.shape}")
logging.info(f"y_train shape: {y_train.shape}")
logging.info(f"y_test shape: {y_test.shape}")

INFO:root:x_train shape: (52, 5)
INFO:root:x_test shape: (98, 5)
INFO:root:y_train shape: (52, 3)
INFO:root:y_test shape: (98, 3)


## Entrenamiento

In [9]:
a = model_fit(x_train, y_train)

INFO:root:target: 52
INFO:root:theta: [[ 0.01995625  0.14691127  0.23582594]
 [ 0.40811085  0.47765865 -0.49195017]
 [ 1.78964902  1.4857198  -0.99024404]
 [-0.04004406  1.80288804  0.42819982]
 [-0.0761417  -0.4166329  -1.18100824]]
INFO:root:theta: [[  1.74882945  -1.51744812   0.17131213]
 [  4.64504527   3.63647363  -7.88769957]
 [ 14.21899524   9.87114761 -21.80501807]
 [-13.30193549  -4.26935289  19.76233218]
 [ -8.61337495 -10.28452533  17.22411744]]


## Prediccón

In [10]:
predictions, efficiency = model_predict(x_test, y_test, a)

logging.info(f"Predictions: {predictions}")
logging.info(f"Efficiency: {efficiency}")

INFO:root:Predictions: [[0 1 1 1 1 0 1 0 1 0 0 0 0 2 1 1 0 2 0 0 0 0 1 0 2 0 1 1 0 0 0 0 0 0 2 1
  0 1 0 0 0 0 0 0 1 0 2 0 1 0 0 0 1 0 1 0 1 0 1 0 0 1 0 0 0 0 0 0 0 1 0 0
  1 2 1 1 0 1 0 1 1 1 0 0 1 2 1 1 0 1 1 0 0 1 0 0 0 0]
 [1 0 0 0 0 1 0 2 0 2 1 2 1 2 0 0 1 2 2 1 2 0 0 0 2 1 0 0 2 2 1 2 2 1 2 0
  0 0 1 2 1 2 1 0 0 2 2 2 0 1 1 2 0 2 0 1 0 2 0 2 1 0 1 1 1 2 1 1 2 0 2 1
  0 2 0 0 2 0 0 0 0 0 1 1 0 1 0 0 2 0 0 2 2 0 1 1 1 1]]
INFO:root:Efficiency: 0.11224489795918367
