In [1]:
print("hello world")

hello world


In [2]:
import torch
print(torch.__version__) 

2.5.0+cu124


In [3]:
import numpy as np

In [4]:
inner_size = 128

image = np.random.rand(64, 28*28)
weight = np.random.rand(28*28, inner_size)
bias = np.random.rand(inner_size)
print(image.shape, weight.shape, bias.shape)

result = image @ weight + bias
print(result.shape)

(64, 784) (784, 128) (128,)
(64, 128)


In [5]:
from torchvision import datasets, transforms

In [6]:
from torch.utils.data import DataLoader

In [7]:

# 数据预处理
transform = transforms.Compose([
    transforms.ToTensor(),  # 将图像转换为张量
    # transforms.Normalize((0.5,), (0.5,))  # 归一化
    transforms.Lambda(lambda x: x.view(-1)),  # 张量转换为一维
])
target_transform = transforms.Compose([
    transforms.Lambda(lambda y: torch.zeros(10, dtype=torch.float).scatter_(dim=0, index=torch.tensor(y), value=1)),  # onehot
])

# 加载训练和测试数据集
# m * (torch.Size([1, 28, 28]), int), 每个样本为(X, y)元组, 其中X是图片, y是标签
train_dataset = datasets.MNIST(
    root='./data', 
    train=True, 
    download=True, 
    transform=transform,
    target_transform=target_transform
    )
test_dataset = datasets.MNIST(
    root='./data', 
    train=False, 
    download=True, 
    transform=transform,
    target_transform=target_transform
    )

train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100.0%

Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw






In [8]:
for images, labels in train_loader:
    pass
    # print(images.shape)
    # print(labels.shape)

In [9]:
# 定义一个简单的神经网络
class SimpleNN(torch.nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        # 定义层：一个两层的全连接网络
        self.fc1 = torch.nn.Linear(28 * 28, inner_size)  # 输入层到隐藏层
        self.fc2 = torch.nn.Linear(inner_size, 10)  # 隐藏层到输出层

    def forward(self, x):
        x = x.view(-1, 28 * 28)  # 展平输入（假设输入是28x28图像）
        x = torch.relu(self.fc1(x))  # 激活函数 ReLU
        x = self.fc2(x)  # 输出层
        return x


In [33]:
def d_CrossEntropyLoss(outputs, labels):
    # outputs: m * 10, labels: m * 10 -> m * 10
    # result = d   [- sum (y_i + log sum e^y ) / N] / dy_i = 
    outputs = outputs.numpy()
    labels = labels.numpy()
    
    exp_tar = np.exp(outputs)
    exp_sum = np.sum(np.exp(outputs), axis=1).reshape(len(labels), 1)
    # result = - torch.log(exp_tar / exp_sum) / len(labels)
    
    pre_d_value = exp_tar / exp_sum
    
    pre_d_value -= outputs * labels
    
    pre_d_value *= 1 / len(labels)
    return pre_d_value
    

In [14]:
import torch.optim as optim
# 训练函数
def train(model, train_loader, criterion, optimizer, num_epochs=5, my_model=True):
    for epoch in range(num_epochs):
        for images, labels in train_loader:
            # 前向传播
            if my_model:
                images = images.numpy()
                outputs = model.forward(images)
                outputs = torch.from_numpy(outputs)
            else:
                outputs = model(images)
            
            loss = criterion(outputs, labels)
            
            if my_model:
                # Loss = -1/N (y - log sum e^hat_y)
                # d log sum e^hat_y / d hat_y = e^hat_y / log sum e^hat_y
                pre_d_value = d_CrossEntropyLoss(outputs, labels)
                model.backward(pre_d_value, lr=1e-3)
            else:
                # 反向传播和优化
                optimizer.zero_grad()  # 清除上一步的梯度
                loss.backward()  # 计算梯度
                optimizer.step()  # 更新参数

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

In [11]:
def evaluate(model, test_loader, my_model=True):
    if not my_model:
        model.eval()  # 切换到评估模式
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            if my_model:
                images = images.numpy()
                outputs = model.forward(images)
                outputs = torch.from_numpy(outputs)
            else:
                outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            _, labels = torch.max(labels, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the model on the test dataset: {100 * correct / total:.2f}%')


In [17]:
from NeuralNetworks import NeuralNetwork
from NeuralNetworks import Layer

# 定义自己的神经网络
class MyNN(NeuralNetwork):
    def __init__(self, input_size, output_size):
        # 定义层：一个两层的全连接网络
        self.fc1 = Layer(input_size, inner_size)  # 输入层到隐藏层
        self.fc2 = Layer(inner_size, output_size, activation_function=lambda x:x)  # 隐藏层到输出层
        super(MyNN, self).__init__(input_size, output_size, [self.fc1, self.fc2])


In [34]:
# 创建网络实例
model = MyNN(28*28, 10)
optimizer = None  # 损失函数和优化器
criterion = torch.nn.CrossEntropyLoss()  # 交叉熵损失（用于分类任务）

# 开始训练
train(model, train_loader, criterion, optimizer, my_model=True)

Epoch [1/5], Loss: 2.3026
Epoch [2/5], Loss: 2.3025
Epoch [3/5], Loss: 2.3026
Epoch [4/5], Loss: 2.3025
Epoch [5/5], Loss: 2.2993


In [35]:
evaluate(model, test_loader, my_model=True)

Accuracy of the model on the test dataset: 10.67%


In [12]:
# 创建网络实例
model = SimpleNN()
optimizer = optim.Adam(model.parameters(), lr=0.001)  # 使用 Adam 优化器# 损失函数和优化器
criterion = torch.nn.CrossEntropyLoss()  # 交叉熵损失（用于分类任务）

# 开始训练
train(model, train_loader, criterion, optimizer, my_model=False)

Epoch [1/5], Loss: 0.0349
Epoch [2/5], Loss: 0.3161
Epoch [3/5], Loss: 0.1635
Epoch [4/5], Loss: 0.0341
Epoch [5/5], Loss: 0.0544


In [None]:
# 评估模型
evaluate(model, test_loader, my_model=False)