### Sofrmax Regression from scratch

In [2]:
class SoftmaxRegression:
    def __init__(self, learning_rate=0.01, n_iters=1000, reg_strength=0.01, tol=1e-4):
        self.learning_rate = learning_rate
        self.n_iters = n_iters
        self.reg_strength = reg_strength  # Thêm regularization để tránh overfitting
        self.tol = tol  # Ngưỡng hội tụ
        self.weights = None
        self.bias = None
        self.n_classes = None
        self.loss_history = []  # Lưu lịch sử loss để theo dõi

    def softmax(self, z):
        """Tính softmax ổn định số học."""
        # Trừ giá trị lớn nhất để tránh tràn số
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def one_hot(self, y, n_classes):
        """Chuyển đổi nhãn y sang dạng one-hot encoding."""
        one_hot_y = np.zeros((len(y), n_classes))
        one_hot_y[np.arange(len(y)), y] = 1
        return one_hot_y

    def cross_entropy_loss(self, y_true, y_pred):
        """Tính cross-entropy loss với regularization."""
        # Tránh log(0)
        epsilon = 1e-12
        y_pred = np.clip(y_pred, epsilon, 1. - epsilon)

        # Cross-entropy loss
        n_samples = y_true.shape[0]
        log_likelihood = -np.sum(y_true * np.log(y_pred)) / n_samples

        # Thêm L2 regularization
        if self.reg_strength > 0:
            reg_loss = 0.5 * self.reg_strength * np.sum(self.weights ** 2)
            log_likelihood += reg_loss / n_samples

        return log_likelihood

    def fit(self, X, y, verbose=False):
        """Huấn luyện mô hình với gradient descent."""
        n_samples, n_features = X.shape
        self.n_classes = len(np.unique(y))

        # Khởi tạo weights với giá trị ngẫu nhiên nhỏ
        self.weights = np.random.randn(n_features, self.n_classes) * 0.01
        self.bias = np.zeros(self.n_classes)

        y_one_hot = self.one_hot(y, self.n_classes)
        prev_loss = float('inf')

        for i in range(self.n_iters):
            # Forward pass
            linear_model = np.dot(X, self.weights) + self.bias
            probas = self.softmax(linear_model)

            # Tính loss
            loss = self.cross_entropy_loss(y_one_hot, probas)
            self.loss_history.append(loss)

            # Kiểm tra hội tụ
            if abs(prev_loss - loss) < self.tol:
                if verbose:
                    print(f"Converged at iteration {i}")
                break
            prev_loss = loss

            # Backward pass - tính gradient
            error = probas - y_one_hot
            dw = (1 / n_samples) * np.dot(X.T, error)
            db = (1 / n_samples) * np.sum(error, axis=0)

            # Thêm gradient của regularization term
            if self.reg_strength > 0:
                dw += (self.reg_strength / n_samples) * self.weights

            # Cập nhật parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

            # In thông tin nếu verbose
            if verbose and i % 100 == 0:
                print(f"Iteration {i}, Loss: {loss:.4f}")

    def predict_proba(self, X):
        """Dự đoán xác suất cho mỗi lớp."""
        linear_model = np.dot(X, self.weights) + self.bias
        return self.softmax(linear_model)

    def predict(self, X):
        """Dự đoán lớp."""
        probas = self.predict_proba(X)
        return np.argmax(probas, axis=1)

    def score(self, X, y):
        """Tính độ chính xác."""
        predictions = self.predict(X)
        return np.mean(predictions == y)

* dataset

In [3]:
iris = datasets.load_iris()
X, y = iris.data, iris.target

iris_df = pd.DataFrame(data=X, columns=iris.feature_names)
iris_df['target'] = y
iris_df.sample(5)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
87,6.3,2.3,4.4,1.3,1
27,5.2,3.5,1.5,0.2,0
84,5.4,3.0,4.5,1.5,1
14,5.8,4.0,1.2,0.2,0
92,5.8,2.6,4.0,1.2,1


In [4]:
iris_df.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
count,150.0,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333,1.0
std,0.828066,0.435866,1.765298,0.762238,0.819232
min,4.3,2.0,1.0,0.1,0.0
25%,5.1,2.8,1.6,0.3,0.0
50%,5.8,3.0,4.35,1.3,1.0
75%,6.4,3.3,5.1,1.8,2.0
max,7.9,4.4,6.9,2.5,2.0


In [5]:
iris_df.target.value_counts()

target
0    50
1    50
2    50
Name: count, dtype: int64

* Train

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
model_scratch = SoftmaxRegression(learning_rate=0.1, n_iters=1000)
model_scratch.fit(X_train, y_train)
predictions_scratch = model_scratch.predict(X_test)
accuracy_scratch = accuracy_score(y_test, predictions_scratch)

In [11]:
print(f"Độ chính xác (from scratch): {100 * int(accuracy_scratch)}%")

Độ chính xác (from scratch): 100%


### Softmax Regression from scratch

In [14]:
model_sklearn = SklearnSoftmaxRegression(solver='lbfgs', max_iter=1000)
model_sklearn.fit(X_train, y_train)
predictions_sklearn = model_sklearn.predict(X_test)
accuracy_sklearn = accuracy_score(y_test, predictions_sklearn)

In [16]:
print(f"Độ chính xác (from sklearn): {100 * int(accuracy_scratch)}%")

Độ chính xác (from sklearn): 100%
