In [5]:
import sys
import os

# 获取当前工作目录
current_dir = os.getcwd()

# 获取父目录（即 python_dir）
project_dir = os.path.dirname(current_dir)
sys.path.append(project_dir)


import numpy as np
import cupy as cp
import trytorch as torch
import trytorch.ops as ops
import trytorch.nn as nn
import trytorch.optim as optim
import trytorch.datas as data
from trytorch.array_device import *

In [6]:
class SimpleResNet(nn.Module):
    def __init__(self, device=None, dtype="float32"):
        super().__init__()
        self.block1 = nn.ConvBN(1, 16, 7, 4, device=device, dtype=dtype)
        self.block2 = nn.ConvBN(16, 32, 3, 2, device=device, dtype=dtype)
        self.res1 = nn.Residual(
            nn.Sequential(
                nn.ConvBN(32, 32, 3, 1, device=device, dtype=dtype),
                nn.ConvBN(32, 32, 3, 1, device=device, dtype=dtype)
            )
        )
        self.block3 = nn.ConvBN(32, 64, 3, 2, device=device, dtype=dtype)
        self.flatten = nn.Flatten()
        self.linear1 = nn.Linear(64 * 2 * 2, 256, device=device, dtype=dtype)
        self.relu = nn.ReLU()
        self.drop = nn.Dropout(0.4)
        self.linear2 = nn.Linear(256, 10, device=device, dtype=dtype)

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.res1(x)
        x = self.block3(x)
        x = self.flatten(x)
        x = self.linear1(x)
        x = self.relu(x)
        x = self.drop(x)
        x = self.linear2(x)
        return x

In [7]:
batch_size=256
epochs=20
net = SimpleResNet(device=cpu())
optimizer=optim.Adam(net.parameters(),lr=0.001,weight_decay=0.001)
criterion= nn.SoftmaxLoss()

### 测试模型是否能够正向计算

In [8]:
#测试跑通模型
x_array = np.random.rand(1,1,28,28)
x = torch.Tensor(x_array)
x.to('cpu')
net(x)

tensor([[ 2.05167669  0.5062783  -1.19929996  0.40338946  1.04257181 -0.42037018
  -0.62244584 -1.08543943  2.4519655   2.37690865]], dtpye=float64)

In [9]:
from pathlib import Path

project_path = Path(project_dir)
print(project_path)

# 训练数据集
train_dataset = data.MNISTDataset(
    project_path / "data" / "MNIST" / "train-images-idx3-ubyte.gz",
    project_path / "data" / "MNIST" / "train-labels-idx1-ubyte.gz"
)

train_dataloader = data.DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    shuffle=True
)

test_dataset = data.MNISTDataset(
    project_path / "data" / "MNIST" / "t10k-images-idx3-ubyte.gz",
    project_path / "data" / "MNIST" / "t10k-labels-idx1-ubyte.gz"
)

test_dataloader = data.DataLoader(
    dataset = test_dataset,
    batch_size=batch_size,
    shuffle=True
)

d:\AIExperienments\TryTorch


In [10]:
for epoch in range(epochs):
    total_loss = 0
    total_rights = 0
    total_examples = 0
    total_batches = 0
    for inputs, label in train_dataloader:
        
        net.train()
        
        optimizer.reset_grad()

        pred = net(inputs)

        loss = criterion(pred, label)

        loss.backward()

        optimizer.step()
        # (batch, features) -> (batch, 1)
        label_pred = np.argmax(pred.numpy(), axis = 1)

        rights = np.equal(label_pred, label.numpy()).sum()

        total_loss += loss.numpy()
        total_rights += rights
        total_batches += 1
        total_examples += inputs.shape[0]

    avg_loss = total_loss / total_batches
    avg_accuracy = total_rights / total_examples
    print(f"EPOCH {epoch}: {avg_accuracy=}, {avg_loss=}")



ValueError: cannot reshape array of size 65536 into shape (1,64)