In [30]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [31]:
# 生成二分类数据集
X, y = make_classification(
    n_samples=1000,  # 样本数
    n_features=10,   # 特征数
    n_informative=8, # 有用的特征数
    n_redundant=2,   # 冗余特征数
    random_state=42, # 随机种子
    n_classes=2      # 二分类
)

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [32]:
config =  {
    'lr':1e-2,
    'lambda':0.1,
    'num_iter':10
}

In [33]:
# 初始化参数
n, m = X_train.shape[0], X_train.shape[1]
w = np.zeros(m)

for i in range(config['num_iter']):
    # 预测值
    z = np.dot(X_train, w)
    y_pred = 1 / (1 + np.exp(-z))

    # 计算梯度
    gradient = (1 / n) * np.dot(X_train.T, (y_pred - y_train))
    gradient += (config['lambda'] / n) * w  # 正则化项

    # 更新权重
    w -= config['lr'] * gradient

In [38]:
# 预测概率
z = np.dot(X_test,w)
proba = 1 / (1 + np.exp(-z))

# 概率转分类
y_pred = (proba >= 0.5).astype(int)

# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")