# 1.数据处理

## Step1.加载数据集（包含数据预处理） 


### 数据预处理


In [None]:
#数据预处理
from torchvision import transforms
# 查看定义数据预处理操作
import inspect
transform_methods = [name for name, obj in inspect.getmembers(transforms)
                     if inspect.isclass(obj) or inspect.isfunction(obj)]

# 打印结果
print(f"共找到 {len(transform_methods)} 个 transforms 方法：")
for name in sorted(transform_methods):
    print(name)

共找到 41 个 transforms 方法：
AugMix
AutoAugment
AutoAugmentPolicy
CenterCrop
ColorJitter
Compose
ConvertImageDtype
ElasticTransform
FiveCrop
GaussianBlur
Grayscale
InterpolationMode
Lambda
LinearTransformation
Normalize
PILToTensor
Pad
RandAugment
RandomAdjustSharpness
RandomAffine
RandomApply
RandomAutocontrast
RandomChoice
RandomCrop
RandomEqualize
RandomErasing
RandomGrayscale
RandomHorizontalFlip
RandomInvert
RandomOrder
RandomPerspective
RandomPosterize
RandomResizedCrop
RandomRotation
RandomSolarize
RandomVerticalFlip
Resize
TenCrop
ToPILImage
ToTensor
TrivialAugmentWide


In [5]:
##用法
transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

### 方法一：加载标准数据集

In [None]:
from torchvision import datasets
import inspect
datasets_classes = [name for name,obj in inspect.getmembers(datasets)if inspect.isclass(obj) and issubclass(obj,datasets.VisionDataset)]
print(f"共找到 {len(datasets_classes)} 个数据集：")
for name in sorted(datasets_classes):
    print(name)


共找到 72 个数据集：
CIFAR10
CIFAR100
CLEVRClassification
CREStereo
Caltech101
Caltech256
CarlaStereo
CelebA
Cityscapes
CocoCaptions
CocoDetection
Country211
DTD
DatasetFolder
EMNIST
ETH3DStereo
EuroSAT
FER2013
FGVCAircraft
FakeData
FallingThingsStereo
FashionMNIST
Flickr30k
Flickr8k
Flowers102
FlyingChairs
FlyingThings3D
Food101
GTSRB
HD1K
HMDB51
INaturalist
ImageFolder
ImageNet
Imagenette
InStereo2k
KMNIST
Kinetics
Kitti
Kitti2012Stereo
Kitti2015Stereo
KittiFlow
LFWPairs
LFWPeople
LSUN
LSUNClass
MNIST
Middlebury2014Stereo
MovingMNIST
Omniglot
OxfordIIITPet
PCAM
PhotoTour
Places365
QMNIST
RenderedSST2
SBDataset
SBU
SEMEION
STL10
SUN397
SVHN
SceneFlowStereo
Sintel
SintelStereo
StanfordCars
UCF101
USPS
VOCDetection
VOCSegmentation
VisionDataset
WIDERFace


In [7]:
#用法 
dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:16<00:00, 10641587.75it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data


### 方法二 加载自定义数据集

In [6]:
from torch.utils.data import Dataset
import os
from PIL import Image
import torch
import numpy as np 
class SegmentationDataset(Dataset):
    def __init__(self, img_dir, label_dir, transforms=None):
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.transform = transform
        self.img_names = os.listdir(img_dir)  # 获取图像文件名

    def __len__(self):
        return len(self.img_names)

    def __getitem__(self, idx):
        img_name = self.img_names[idx] # idx = 0
        img_path = os.path.join(self.img_dir, img_name)
        
        label_name = os.path.splitext(img_name)[0] + ".png"
        label_path = os.path.join(self.label_dir, label_name)

        # 加载图片和标签
        image = Image.open(img_path).convert("RGB")  # 转换为 RGB 图像
        label = Image.open(label_path).convert("L")  # 转换为单通道图像

        if self.transform:
            image = self.transform(image)
            label = torch.tensor(np.array(label), dtype=torch.long)  # 转换为张量
        
        return image, label
data = SegmentationDataset(
    img_dir='train_and_label/img',
    label_dir='train_and_label/label',
    transforms=transform)

## Step2.数据集划分

In [8]:
from torch.utils.data import random_split
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])


## Step3.数据集读取

In [9]:
from torch.utils.data import DataLoader
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# 2.模型搭建

## 方法一 容器搭建

In [None]:
from torch import nn
model = nn.Sequential(nn.Linear(784, 256), nn.ReLU(), nn.Linear(256, 10))


## 方法二 自定义模型

In [12]:
from torch.nn import Module
class MLP(Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x
model = MLP(input_size=784, hidden_size=256, output_size=10)

# 3.损失函数

In [13]:
from torch.nn import CrossEntropyLoss
criterion = CrossEntropyLoss()

# 4.优化器

In [14]:
from torch import optim
optimizer = optim.Adam(model.parameters(), lr=0.001)


# 5.模型训练

In [None]:
num_epochs = 5  # 设置训练的轮数
for epoch in range(num_epochs):
    model.train()  # 设置模型为训练模式
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in train_dataloader:        
        # 前向传播
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # 反向传播和优化
        optimizer.zero_grad()  # 清空梯度
        loss.backward()        # 计算梯度
        optimizer.step()       # 更新权重
        
        running_loss += loss.item()
        
        # 计算准确率
        _, predicted = torch.max(outputs, 1)  # 选择最大概率的类别
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_dataloader)
    epoch_acc = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%")
