In [56]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np
import matplotlib as plt


In [57]:
import numpy as np

class SimpleFullyConnectedNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))

    def relu(self, z):
        return np.maximum(0, z)

    def softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def forward(self, x):
        self.z1 = np.dot(x, self.W1) + self.b1
        self.a1 = self.relu(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        return self.softmax(self.z2)

    def compute_loss(self, y_pred, y_true):
        m = y_true.shape[0]
        log_probs = -np.log(y_pred[np.arange(m), y_true] + 1e-9)  # 加小量避免log(0)
        loss = np.sum(log_probs) / m
        return loss


    def backward(self, x, y_true, y_pred):
        m = x.shape[0]
        dz2 = y_pred.copy()
        dz2[range(m), y_true] -= 1
        dz2 /= m
        dw2 = np.dot(self.a1.T, dz2)
        db2 = np.sum(dz2, axis=0, keepdims=True)
        da1 = np.dot(dz2, self.W2.T)
        dz1 = da1 * (self.a1 > 0)
        dw1 = np.dot(x.T, dz1)
        db1 = np.sum(dz1, axis=0, keepdims=True)
        return dw1, db1, dw2, db2

    def update_params(self, grads, lr=0.1):
        dw1, db1, dw2, db2 = grads
        self.W1 -= lr * dw1
        self.b1 -= lr * db1
        self.W2 -= lr * dw2
        self.b2 -= lr * db2






In [58]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [59]:
def train(model, train_loader, learning_rate=0.1):
    """Train the model for one epoch and return the average loss."""
    train_loss_accum = 0
    for x, y_true in train_loader:
        x_flat = x.reshape(x.shape[0], -1)  # Flatten the images
        y_pred = model.forward(x_flat)
        loss = model.compute_loss(y_pred, y_true)
        grads = model.backward(x_flat, y_true, y_pred)
        model.update_params(grads, lr=learning_rate)
        train_loss_accum += loss
    return train_loss_accum / len(train_loader)

In [60]:
def evaluate(model, test_loader):
    """Evaluate the model on the test set and return loss and accuracy."""
    test_loss_accum = 0
    correct = 0
    total = 0
    for x, y_true in test_loader:
        x_flat = x.reshape(x.shape[0], -1)
        y_pred = model.forward(x_flat)
        loss = model.compute_loss(y_pred, y_true)
        test_loss_accum += loss
        predictions = np.argmax(y_pred, axis=1)

        # 请确保y_true是整数数组，不是独热编码
        correct += np.sum(predictions == y_true)
        total += len(y_true)

    average_test_loss = test_loss_accum / len(test_loader)
    test_accuracy = correct / total
    return average_test_loss, test_accuracy


In [61]:
def main(train_loader, test_loader, epochs=10):
    model = SimpleFullyConnectedNN(784, 256, 10)
    train_losses, test_losses, test_accuracies = [], [], []
    
    for epoch in range(epochs):
        train_loss = train(model, train_loader)
        test_loss, test_accuracy = evaluate(model, test_loader)
        
        train_losses.append(train_loss)
        test_losses.append(test_loss)
        test_accuracies.append(test_accuracy)
        
        print(f"Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

    # Plotting
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Train Loss')
    plt.plot(test_losses, label='Test Loss')
    plt.title('Loss vs. Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(test_accuracies, label='Test Accuracy')
    plt.title('Accuracy vs. Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

In [62]:
main(train_loader, test_loader)

Epoch 1: Train Loss: 0.5666, Test Loss: 0.4557, Test Accuracy: 0.0000
Epoch 2: Train Loss: 0.4027, Test Loss: 0.4499, Test Accuracy: 0.0000
Epoch 3: Train Loss: 0.3606, Test Loss: 0.3853, Test Accuracy: 0.0000
Epoch 4: Train Loss: 0.3344, Test Loss: 0.4042, Test Accuracy: 0.0000
Epoch 5: Train Loss: 0.3138, Test Loss: 0.3715, Test Accuracy: 0.0000
Epoch 6: Train Loss: 0.2994, Test Loss: 0.3949, Test Accuracy: 0.0000
Epoch 7: Train Loss: 0.2864, Test Loss: 0.3407, Test Accuracy: 0.0000
Epoch 8: Train Loss: 0.2744, Test Loss: 0.3669, Test Accuracy: 0.0000
Epoch 9: Train Loss: 0.2631, Test Loss: 0.3990, Test Accuracy: 0.0000
Epoch 10: Train Loss: 0.2537, Test Loss: 0.3853, Test Accuracy: 0.0000


AttributeError: module 'matplotlib' has no attribute 'figure'