In [20]:
import requests
import zipfile
import torch
from torch.utils.data import Dataset
from pathlib import Path
from PIL import Image

In [21]:
url = "https://firebasestorage.googleapis.com/v0/b/grandmacan-2dae4.appspot.com/o/ML_data%2Fone_piece_full.zip?alt=media&token=937656fd-f5c1-44f5-b174-1e2d590b8ef3"

with open("one_piece_full.zip", "wb") as f:
  req = requests.get(url)
  f.write(req.content)

with zipfile.ZipFile("one_piece_full.zip", "r") as zip_file:
  zip_file.extractall("one_piece_full")

In [22]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [23]:
def accuracy_fn(y_pred, y_true):
  correct_num = (y_pred==y_true).sum()
  acc = correct_num / len(y_true) * 100
  return acc

def train_step(dataloader, model, cost_fn, optimizer, accuracy_fn, device):
  train_cost = 0
  train_acc = 0
  for batch, (x, y) in enumerate(dataloader):
    x = x.to(device)
    y = y.to(device)
    model.to(device)

    model.train()

    y_pred = model(x)

    cost = cost_fn(y_pred, y)

    train_cost += cost
    train_acc += accuracy_fn(y_pred.argmax(dim=1), y)

    optimizer.zero_grad()

    cost.backward()

    optimizer.step()

  train_cost /= len(train_dataloader)
  train_acc /= len(train_dataloader)

  print(f"\nTrain Cost: {train_cost:.4f}, Train Acc: {train_acc:.2f}")


def test_step(dataloader, model, cost_fn, accuracy_fn, device):
  test_cost = 0
  test_acc = 0
  model.eval()
  with torch.inference_mode():
    for x, y in dataloader:
      x = x.to(device)
      y = y.to(device)
      model.to(device)

      test_pred = model(x)

      test_cost += cost_fn(test_pred, y)
      test_acc += accuracy_fn(test_pred.argmax(dim=1), y)

    test_cost /= len(test_dataloader)
    test_acc /= len(test_dataloader)

  print(f"Test Cost: {test_cost:.4f}, Test Acc: {test_acc:.2f} \n")

In [24]:
class ImageDataset(Dataset):
  def __init__(self, root, train, transform=None):

    if train:
      image_root = Path(root) / "train"
    else:
      image_root = Path(root) / "test"

    with open(Path(root) / "classnames.txt", "r") as f:
      lines = f.readlines()
      self.classes = [line.strip() for line in lines]

    self.paths = [i for i in image_root.rglob("*") if i.is_file()]
    self.transform = transform

  def __getitem__(self, index):
    img = Image.open(self.paths[index]).convert("RGB")
    class_name = self.paths[index].parent.name
    class_idx = self.classes.index(class_name)

    if self.transform:
      return self.transform(img), class_idx
    else:
      return img, class_idx


  def __len__(self):
    return len(self.paths)

In [25]:
import torchvision
# EfficientNet_B0 在更新後似乎出了點問題, 改用 EfficientNet_B1
# weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
# model = torchvision.models.efficientnet_b0(weights=weights)

weights = torchvision.models.EfficientNet_B1_Weights.DEFAULT
model = torchvision.models.efficientnet_b1(weights=weights)
model

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [26]:
weights.transforms()

ImageClassification(
    crop_size=[240]
    resize_size=[255]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)

In [27]:
efficientnet_b1_transforms = weights.transforms()

In [28]:
train_dataset = ImageDataset(root="one_piece_full",
              train=True,
              transform=efficientnet_b1_transforms
)

test_dataset = ImageDataset(root="one_piece_full",
              train=False,
              transform=efficientnet_b1_transforms
)

In [29]:
from torch.utils.data import DataLoader

BATCH_SIZE = 16

train_dataloader = DataLoader(dataset=train_dataset,
                batch_size=BATCH_SIZE,
                shuffle=True
)

test_dataloader = DataLoader(dataset=test_dataset,
                batch_size=BATCH_SIZE,
                shuffle=False
)

In [30]:
len(train_dataloader), len(test_dataloader)

(189, 47)

In [31]:
!pip install torchinfo
from torchinfo import summary



In [32]:
summary(model=model,
    input_size=(16, 3, 64, 64),
    col_names=["input_size", "output_size", "num_params", "trainable"],
    row_settings=["var_names"]
)

Layer (type (var_name))                                      Input Shape               Output Shape              Param #                   Trainable
EfficientNet (EfficientNet)                                  [16, 3, 64, 64]           [16, 1000]                --                        True
├─Sequential (features)                                      [16, 3, 64, 64]           [16, 1280, 2, 2]          --                        True
│    └─Conv2dNormActivation (0)                              [16, 3, 64, 64]           [16, 32, 32, 32]          --                        True
│    │    └─Conv2d (0)                                       [16, 3, 64, 64]           [16, 32, 32, 32]          864                       True
│    │    └─BatchNorm2d (1)                                  [16, 32, 32, 32]          [16, 32, 32, 32]          64                        True
│    │    └─SiLU (2)                                         [16, 32, 32, 32]          [16, 32, 32, 32]          --                

In [33]:
from torch import nn
model.classifier[1] = nn.Linear(in_features=1280, out_features=18, bias=True)

In [34]:
for param in model.features.parameters():
  param.requires_grad=False

In [35]:
cost_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)

In [36]:
from tqdm.auto import tqdm

epochs = 20

for epoch in tqdm(range(epochs)):
  print(f"Epoch: {epoch}\n-------")

  train_step(train_dataloader, model, cost_fn, optimizer, accuracy_fn, device)

  test_step(test_dataloader, model, cost_fn, accuracy_fn, device)


  0%|          | 0/20 [00:00<?, ?it/s]

Epoch: 0
-------

Train Cost: 2.3285, Train Acc: 52.14
Test Cost: 1.9285, Test Acc: 67.69 

Epoch: 1
-------

Train Cost: 1.5938, Train Acc: 71.34
Test Cost: 1.4718, Test Acc: 72.34 

Epoch: 2
-------

Train Cost: 1.2171, Train Acc: 77.11
Test Cost: 1.2485, Test Acc: 75.13 

Epoch: 3
-------

Train Cost: 1.0249, Train Acc: 79.84
Test Cost: 1.1177, Test Acc: 77.53 

Epoch: 4
-------

Train Cost: 0.8830, Train Acc: 82.01
Test Cost: 1.0020, Test Acc: 78.32 

Epoch: 5
-------

Train Cost: 0.8009, Train Acc: 82.90
Test Cost: 0.9360, Test Acc: 79.26 

Epoch: 6
-------

Train Cost: 0.7171, Train Acc: 85.18
Test Cost: 0.8847, Test Acc: 79.77 

Epoch: 7
-------

Train Cost: 0.6756, Train Acc: 86.14
Test Cost: 0.8284, Test Acc: 80.97 

Epoch: 8
-------

Train Cost: 0.6207, Train Acc: 86.84
Test Cost: 0.8170, Test Acc: 80.74 

Epoch: 9
-------

Train Cost: 0.6017, Train Acc: 86.59
Test Cost: 0.7714, Test Acc: 81.78 

Epoch: 10
-------

Train Cost: 0.5715, Train Acc: 86.88
Test Cost: 0.7483, Test 

In [40]:
from torchvision import transforms

# 定义标准的EfficientNet-B1预处理转换
efficientnet_b1_transforms = transforms.Compose([
    transforms.Resize(256),  # 先调整到较大尺寸以保持宽高比
    transforms.CenterCrop(224),  # 然后裁剪到精确的224×224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [44]:
img = Image.open("luffy.png").convert("RGB")
img = efficientnet_b1_transforms(img)
img = img.reshape(1, 3, 224, 224)
model.eval()
with torch.inference_mode():
  y_pred = model(img.to(device))

y_pred = torch.softmax(y_pred, dim=1)
class_idx = y_pred.argmax(dim=1)
train_dataset.classes[class_idx]

'Luffy'