In [19]:
# Standard Logistic Regression implemented by numpy

# import numpy as np
# class LogisticRegression:
#     def __init__(self, learning_rate=0.01, epoch=100):
#         self.learning_rate = learning_rate
#         self.epoch = epoch
#         self.weights = None
#         self.bias = None

#     def sigmoid(self, z):
#         return 1 / (1 + np.exp(-z))

#     def fit(self, X, y):
#         n_samples, n_features = X.shape
#         self.weights = np.zeros(n_features)
#         self.bias = 0

#         # Gradient descent
#         for _ in range(self.epoch):
#             linear_model = np.dot(X, self.weights) + self.bias
#             y_predicted = self.sigmoid(linear_model)

#             # calculate gradients
#             dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
#             db = (1 / n_samples) * np.sum(y_predicted - y)

#             # update parameters
#             self.weights -= self.learning_rate * dw
#             self.bias -= self.learning_rate * db

#     def predict(self, X, threshold=0.5):
#         z = np.dot(X, self.weights) + self.bias
#         y_predicted = self.sigmoid(z)
#         return [1 if i > threshold else 0 for i in y_predicted]

In [None]:
# For simplifying, we implemente the logistic regression model through torch.

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

num_epochs = 100

In [None]:
class CancerDataset(Dataset):
    def __init__(self, data, targets):
        scaler = StandardScaler()
        self.data = torch.tensor(scaler.fit_transform(data), dtype=torch.float32)
        self.targets = torch.tensor(targets, dtype=torch.float32).view(-1, 1)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]

In [None]:
# 定义PyTorch逻辑回归模型（显式实现sigmoid）
class LogisticRegression(nn.Module):
    def __init__(self, input_dim):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(input_dim, 1)
        nn.init.xavier_uniform_(self.linear.weight)
        nn.init.zeros_(self.linear.bias)

    def forward(self, x):
        # 只返回线性输出，不应用sigmoid
        return self.linear(x)

    def sigmoid(self, z):
        return 1 / (1 + torch.exp(-z))


def binary_cross_entropy_with_logits(logits, targets):
    probs = torch.sigmoid(logits)
    epsilon = 1e-7
    probs = torch.clamp(probs, epsilon, 1 - epsilon)
    loss = -torch.mean(
        targets * torch.log(probs) + (1 - targets) * torch.log(1 - probs)
    )
    return loss.mean()

In [23]:
# Datasets
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

train_dataset = CancerDataset(X_train, y_train)
test_dataset = CancerDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
input_dim = X_train.shape[1]
model = LogisticRegression(input_dim)
optimizer = optim.SGD(model.parameters(), lr=0.01)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for inputs, labels in train_loader:
        logits = model(inputs)
        loss = binary_cross_entropy_with_logits(logits, labels)
        total_loss += loss.item() * inputs.size(0)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    avg_loss = total_loss / len(train_loader.dataset)

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

Epoch [10/100], Loss: 0.2333
Epoch [20/100], Loss: 0.1672
Epoch [30/100], Loss: 0.1407
Epoch [40/100], Loss: 0.1259
Epoch [50/100], Loss: 0.1165
Epoch [60/100], Loss: 0.1096
Epoch [70/100], Loss: 0.1043
Epoch [80/100], Loss: 0.0999
Epoch [90/100], Loss: 0.0965
Epoch [100/100], Loss: 0.0936


In [None]:
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        logits = model(inputs)
        probs = model.sigmoid(logits)
        preds = (probs > 0.5).float()
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

accuracy = accuracy_score(all_labels, all_preds)
print(f"Test Accuracy: {accuracy:.4f}")

Test Accuracy: 0.9825
