In [1]:
from IPython.display import display
import os

# 上傳檔案（會跳出檔案選擇器）
from ipywidgets import FileUpload

upload = FileUpload()
display(upload)


FileUpload(value={}, description='Upload')

In [2]:
import os
from pathlib import Path

# 假設你只上傳了一個檔案
# Check if any file is uploaded and get the filename
if upload.value:
    # Get the first (and likely only) filename from the dictionary keys
    filename = list(upload.value.keys())[0]
    fileinfo = upload.value[filename] # Access the file info dictionary using the filename as key
else:
    print("No file uploaded.")
    # Handle the case where no file is uploaded, perhaps by exiting or prompting the user.
    # For this example, we'll assume a file was uploaded as per the traceback context.
    raise FileNotFoundError("No file was uploaded.")


In [4]:
# 顯示內容結構（除錯用）
print(fileinfo)

# 儲存 kaggle.json
# filename is already obtained above
content = fileinfo['content']

kaggle_dir = Path.home() / ".kaggle"
kaggle_dir.mkdir(exist_ok=True)

kaggle_json_path = kaggle_dir / "kaggle.json"
with open(kaggle_json_path, "wb") as f:
    f.write(content)

{'metadata': {'name': 'kaggle.json', 'type': 'application/json', 'size': 64, 'lastModified': 1745215961986}, 'content': b'{"username":"suchiwen","key":"01a925cee9e9e9d232008524b0434fb9"}'}


In [5]:
# 設定權限（Linux/macOS 建議）
os.chmod(kaggle_json_path, 0o600)

print(f"{filename} 已成功儲存至 {kaggle_json_path}")

!kaggle datasets list -s cifar

!pip install -U kaggle
!pip install --upgrade pandas
import os
import zipfile

kaggle.json 已成功儲存至 /root/.kaggle/kaggle.json
ref                                                     title                                                  size  lastUpdated                 downloadCount  voteCount  usabilityRating  
------------------------------------------------------  ----------------------------------------------  -----------  --------------------------  -------------  ---------  ---------------  
fedesoriano/cifar100                                    CIFAR-100 Python                                  168517809  2020-12-26 08:37:10.143000          12506        178  1.0              
pankrzysiu/cifar10-python                               CIFAR-10 Python                                   340613496  2018-01-27 13:42:40.967000          15053        255  0.75             
petitbonney/cifar10-image-recognition                   CIFAR-10                                         1007971063  2019-10-01 12:50:23.227000           2965         27  0.8235294        
valentynsi

In [6]:
# 建立 Kaggle 資料夾
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# 下載 Dog Breed Identification 資料集
!kaggle competitions download -c dog-breed-identification --force
!unzip -oq dog-breed-identification.zip -d dog-breed-identification

cp: cannot stat 'kaggle.json': No such file or directory
Downloading dog-breed-identification.zip to /content
 95% 654M/691M [00:00<00:00, 1.24GB/s]
100% 691M/691M [00:00<00:00, 1.26GB/s]


In [8]:
import os
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import cv2
from pathlib import Path
from PIL import Image

In [9]:
# ===================== Config & Seed =====================
def seed_everything(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(42)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [10]:
# ===================== Image Transform =====================
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomResizedCrop(64, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
])

val_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
])

In [12]:
# ===================== Region Extraction =====================
def extract_regions(image_path, size=(64, 64)):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    h, w, _ = img.shape

    ear_roi = img[int(h * 0.0):int(h * 0.3), int(w * 0.25):int(w * 0.75)]
    nose_roi = img[int(h * 0.3):int(h * 0.6), int(w * 0.35):int(w * 0.65)]
    tail_roi = img[int(h * 0.7):int(h * 1.0), int(w * 0.3):int(w * 0.7)]

    ear = cv2.resize(ear_roi, size)
    nose = cv2.resize(nose_roi, size)
    tail = cv2.resize(tail_roi, size)
    return ear, tail, nose

In [13]:
# ===================== Dataset =====================
class DogDataset(Dataset):
    def __init__(self, ears, tails, noses, labels, transform=None):
        self.ears = ears
        self.tails = tails
        self.noses = noses
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        if self.transform:
            ear = self.transform(self.ears[idx].permute(1, 2, 0).numpy())
            tail = self.transform(self.tails[idx].permute(1, 2, 0).numpy())
            nose = self.transform(self.noses[idx].permute(1, 2, 0).numpy())
        else:
            ear = self.ears[idx]
            tail = self.tails[idx]
            nose = self.noses[idx]
        return (ear, tail, nose), self.labels[idx]

class TestDogDataset(Dataset):
    def __init__(self, ears, tails, noses, transform=None):
        self.ears = ears
        self.tails = tails
        self.noses = noses
        self.transform = transform

    def __len__(self):
        return len(self.ears)

    def __getitem__(self, idx):
        if self.transform:
            ear = self.transform(self.ears[idx].permute(1, 2, 0).numpy())
            tail = self.transform(self.tails[idx].permute(1, 2, 0).numpy())
            nose = self.transform(self.noses[idx].permute(1, 2, 0).numpy())
        else:
            ear = self.ears[idx]
            tail = self.tails[idx]
            nose = self.noses[idx]
        return (ear, tail, nose)

In [27]:
# 多輸入 CNN 模型定義
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models

class MultiInputCNN(nn.Module):
    def __init__(self, num_classes):
        super(MultiInputCNN, self).__init__()

        # 使用 VGG16 當作 backbone
        self.base_model_ears = models.vgg16_bn(pretrained=True).features
        self.base_model_tail = models.vgg16_bn(pretrained=True).features
        self.base_model_nose = models.vgg16_bn(pretrained=True).features

        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))

        # 全連接層整合不同輸入特徵
        self.classifier = nn.Sequential(
            nn.Linear(512 * 3, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x_ears, x_tail, x_nose):
        f1 = self.global_pool(self.base_model_ears(x_ears))
        f2 = self.global_pool(self.base_model_tail(x_tail))
        f3 = self.global_pool(self.base_model_nose(x_nose))

        f1 = f1.view(f1.size(0), -1)
        f2 = f2.view(f2.size(0), -1)
        f3 = f3.view(f3.size(0), -1)

        combined = torch.cat([f1, f2, f3], dim=1)
        output = self.classifier(combined)
        return output


In [28]:
# Label Smoothing 損失
class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, smoothing=0.1):
        super(LabelSmoothingCrossEntropy, self).__init__()
        self.smoothing = smoothing

    def forward(self, pred, target):
        confidence = 1.0 - self.smoothing
        logprobs = F.log_softmax(pred, dim=-1)
        nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1)).squeeze(1)
        smooth_loss = -logprobs.mean(dim=-1)
        loss = confidence * nll_loss + self.smoothing * smooth_loss
        return loss.mean()

In [29]:
# 虛擬資料集類別 (需改為實際資料讀取邏輯)
from torch.utils.data import Dataset
import torch

class MultiInputDogDataset(Dataset):
    def __init__(self, num_samples=1000, num_classes=120):
        self.num_samples = num_samples
        self.num_classes = num_classes

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        # 模擬耳、尾、鼻三張影像輸入，實際情況應讀取並預處理對應部位的影像
        image_ears = torch.randn(3, 224, 224)
        image_tail = torch.randn(3, 224, 224)
        image_nose = torch.randn(3, 224, 224)
        label = torch.randint(0, self.num_classes, (1,)).item()
        return image_ears, image_tail, image_nose, label

In [30]:
# ===================== Dummy Data Preparation =====================
def prepare_pytorch_data(image_dir, labels_path, image_size=(64, 64)):
    print("\u26a0\ufe0f Dummy data generated, replace with real data processing logic.")
    num_samples = 1000
    dummy_shape = (num_samples, 3, image_size[0], image_size[1])
    X_ear = torch.rand(dummy_shape)
    X_tail = torch.rand(dummy_shape)
    X_nose = torch.rand(dummy_shape)
    y = torch.randint(0, 120, (num_samples,))
    return X_ear, X_tail, X_nose, y

def prepare_data_with_local(test_image_dir, test_df, image_size):
    print("\u26a0\ufe0f Dummy test data generated, replace with real test logic.")
    num_test_samples = len(test_df)
    dummy_shape = (num_test_samples, 3, image_size[0], image_size[1])
    return torch.rand(dummy_shape), torch.rand(dummy_shape), torch.rand(dummy_shape)


In [31]:
# 訓練與驗證主程式
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader
from tqdm import tqdm
import torch.optim as optim


In [32]:
# AMP
from torch.cuda.amp import autocast, GradScaler

In [33]:
def train_and_validate(model, train_loader, val_loader, device, num_epochs=10):
    criterion = LabelSmoothingCrossEntropy(smoothing=0.1)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    scaler = GradScaler()

    writer = SummaryWriter()

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        correct = 0
        total = 0

        for x_ears, x_tail, x_nose, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Train"):
            x_ears, x_tail, x_nose, labels = x_ears.to(device), x_tail.to(device), x_nose.to(device), labels.to(device)
            optimizer.zero_grad()

            with autocast():
                outputs = model(x_ears, x_tail, x_nose)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            total_loss += loss.item() * labels.size(0)
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        avg_loss = total_loss / total
        acc = correct / total
        writer.add_scalar('Train/Loss', avg_loss, epoch)
        writer.add_scalar('Train/Accuracy', acc, epoch)

        # 驗證階段
        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for x_ears, x_tail, x_nose, labels in tqdm(val_loader, desc="Validation"):
                x_ears, x_tail, x_nose, labels = x_ears.to(device), x_tail.to(device), x_nose.to(device), labels.to(device)
                with autocast():
                    outputs = model(x_ears, x_tail, x_nose)
                    loss = criterion(outputs, labels)

                val_loss += loss.item() * labels.size(0)
                preds = outputs.argmax(dim=1)
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)

        val_avg_loss = val_loss / val_total
        val_acc = val_correct / val_total
        writer.add_scalar('Val/Loss', val_avg_loss, epoch)
        writer.add_scalar('Val/Accuracy', val_acc, epoch)

        print(f"Epoch {epoch+1}: Train Loss={avg_loss:.4f}, Acc={acc:.4f} | Val Loss={val_avg_loss:.4f}, Acc={val_acc:.4f}")

    writer.close()


In [34]:
if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    num_classes = 120

    model = MultiInputCNN(num_classes=num_classes).to(device)

    train_dataset = MultiInputDogDataset(num_samples=1000, num_classes=num_classes)
    val_dataset = MultiInputDogDataset(num_samples=200, num_classes=num_classes)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32)

    train_and_validate(model, train_loader, val_loader, device, num_epochs=10)

    # 儲存模型
    torch.save(model.state_dict(), "multi_input_cnn.pth")


Downloading: "https://download.pytorch.org/models/vgg16_bn-6c64b313.pth" to /root/.cache/torch/hub/checkpoints/vgg16_bn-6c64b313.pth
100%|██████████| 528M/528M [00:02<00:00, 246MB/s]
  scaler = GradScaler()
  with autocast():
Epoch 1/10 - Train: 100%|██████████| 32/32 [00:28<00:00,  1.11it/s]
  with autocast():
Validation: 100%|██████████| 7/7 [00:02<00:00,  3.24it/s]


Epoch 1: Train Loss=4.7885, Acc=0.0050 | Val Loss=4.7839, Acc=0.0100


Epoch 2/10 - Train: 100%|██████████| 32/32 [00:27<00:00,  1.16it/s]
Validation: 100%|██████████| 7/7 [00:02<00:00,  3.18it/s]


Epoch 2: Train Loss=4.7902, Acc=0.0080 | Val Loss=4.7877, Acc=0.0050


Epoch 3/10 - Train: 100%|██████████| 32/32 [00:28<00:00,  1.13it/s]
Validation: 100%|██████████| 7/7 [00:02<00:00,  3.08it/s]


Epoch 3: Train Loss=4.7883, Acc=0.0100 | Val Loss=4.7850, Acc=0.0050


Epoch 4/10 - Train: 100%|██████████| 32/32 [00:29<00:00,  1.09it/s]
Validation: 100%|██████████| 7/7 [00:02<00:00,  3.08it/s]


Epoch 4: Train Loss=4.7892, Acc=0.0080 | Val Loss=4.7906, Acc=0.0100


Epoch 5/10 - Train: 100%|██████████| 32/32 [00:29<00:00,  1.10it/s]
Validation: 100%|██████████| 7/7 [00:02<00:00,  3.09it/s]


Epoch 5: Train Loss=4.7852, Acc=0.0090 | Val Loss=4.7891, Acc=0.0150


Epoch 6/10 - Train: 100%|██████████| 32/32 [00:28<00:00,  1.11it/s]
Validation: 100%|██████████| 7/7 [00:02<00:00,  3.10it/s]


Epoch 6: Train Loss=4.7884, Acc=0.0070 | Val Loss=4.7843, Acc=0.0100


Epoch 7/10 - Train: 100%|██████████| 32/32 [00:29<00:00,  1.10it/s]
Validation: 100%|██████████| 7/7 [00:02<00:00,  3.07it/s]


Epoch 7: Train Loss=4.7882, Acc=0.0100 | Val Loss=4.7862, Acc=0.0100


Epoch 8/10 - Train: 100%|██████████| 32/32 [00:28<00:00,  1.11it/s]
Validation: 100%|██████████| 7/7 [00:02<00:00,  3.09it/s]


Epoch 8: Train Loss=4.7874, Acc=0.0050 | Val Loss=4.7893, Acc=0.0050


Epoch 9/10 - Train: 100%|██████████| 32/32 [00:28<00:00,  1.11it/s]
Validation: 100%|██████████| 7/7 [00:02<00:00,  3.10it/s]


Epoch 9: Train Loss=4.7886, Acc=0.0030 | Val Loss=4.7865, Acc=0.0150


Epoch 10/10 - Train: 100%|██████████| 32/32 [00:28<00:00,  1.11it/s]
Validation: 100%|██████████| 7/7 [00:02<00:00,  3.11it/s]


Epoch 10: Train Loss=4.7890, Acc=0.0090 | Val Loss=4.7880, Acc=0.0100


In [35]:
# 多輸入 CNN 模型定義
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models

class MultiInputCNN(nn.Module):
    def __init__(self, num_classes):
        super(MultiInputCNN, self).__init__()

        # 使用 VGG16 當作 backbone
        self.base_model_ears = models.vgg16_bn(pretrained=True).features
        self.base_model_tail = models.vgg16_bn(pretrained=True).features
        self.base_model_nose = models.vgg16_bn(pretrained=True).features

        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))

        # 全連接層整合不同輸入特徵
        self.classifier = nn.Sequential(
            nn.Linear(512 * 3, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x_ears, x_tail, x_nose):
        f1 = self.global_pool(self.base_model_ears(x_ears))
        f2 = self.global_pool(self.base_model_tail(x_tail))
        f3 = self.global_pool(self.base_model_nose(x_nose))

        f1 = f1.view(f1.size(0), -1)
        f2 = f2.view(f2.size(0), -1)
        f3 = f3.view(f3.size(0), -1)

        combined = torch.cat([f1, f2, f3], dim=1)
        output = self.classifier(combined)
        return output


In [36]:
# Define LabelSmoothingCrossEntropy here or in a previous cell
class LabelSmoothingCrossEntropy(torch.nn.Module):
    def __init__(self, eps=0.1):
        super().__init__()
        self.eps = eps
        self.log_softmax = torch.nn.LogSoftmax(dim=1)

    def forward(self, output, target):
        num_classes = output.size(1)
        log_probs = self.log_softmax(output)
        target = target.long()
        target = torch.nn.functional.one_hot(target, num_classes).float()
        smooth_target = (1 - self.eps) * target + self.eps / num_classes
        return (-smooth_target * log_probs).sum(dim=1).mean()


In [37]:
from torch.utils.tensorboard import SummaryWriter

In [44]:
# 測試資料預測與提交 CSV
import pandas as pd
import numpy as np # Make sure numpy is imported

# Make sure test_df is defined before being used
test_df = pd.read_csv('dog-breed-identification/sample_submission.csv')

# Make sure image_size is defined before being used
# Based on prepare_pytorch_data, image_size seems to be (64, 64)
image_size = (64, 64)

# Define X_test_ear, X_test_nose, X_test_tail by calling the function
# prepare_data_with_local only returns 3 tensors for ear, tail, and nose.
# Removed X_test_full from unpacking as it's not returned by the function.
X_test_ear, X_test_nose, X_test_tail = prepare_data_with_local(
    'dog-breed-identification/test', test_df, image_size
)

# 建立 Test Dataset & DataLoader
# Make sure TestDogDataset and DataLoader are imported from torch.utils.data
from torch.utils.data import Dataset, DataLoader

# Make sure val_transform is defined before being used
# Assuming val_transform is defined in a previous cell as transforms.Compose([ ... ])
from torchvision import transforms

val_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
])

test_dataset = TestDogDataset(X_test_ear, X_test_tail, X_test_nose, transform=val_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# 預測
# Make sure model and device are defined from previous cells
# Assuming model is an instance of MultiInputCNN and device is set to 'cuda' or 'cpu'
from tqdm import tqdm # Make sure tqdm is imported

model.eval()
all_preds = []

with torch.no_grad():
    for x_ears, x_tail, x_nose in tqdm(test_loader, desc="Testing"):
        x_ears, x_tail, x_nose = x_ears.to(device), x_tail.to(device), x_nose.to(device)
        outputs = model(x_ears, x_tail, x_nose)
        probs = torch.softmax(outputs, dim=1)
        all_preds.append(probs.cpu().numpy())

# 整合預測
all_preds = np.vstack(all_preds)

# 建立 submission
# Make sure labels_df and breeds are defined
labels_df = pd.read_csv('dog-breed-identification/labels.csv')
breeds = sorted(labels_df['breed'].unique())

submission = pd.DataFrame(all_preds, columns=breeds)
submission.insert(0, 'id', test_df['id'])
submission.to_csv('submission_multi_input_cnn.csv', index=False)

⚠️ Dummy test data generated, replace with real test logic.


Testing: 100%|██████████| 324/324 [00:23<00:00, 14.06it/s]


In [41]:
#測試資料預測與提交 CSV
test_df = pd.read_csv('dog-breed-identification/sample_submission.csv')
X_test_full, X_test_ear, X_test_nose, X_test_tail = prepare_data_with_local(
    'dog-breed-identification/test', test_df, image_size
)

preds = model.predict([X_test_full, X_test_ear, X_test_nose, X_test_tail])
submission = pd.DataFrame(preds, columns=pd.get_dummies(labels['breed']).columns)
submission.insert(0, 'id', test_df['id'])
submission.to_csv('submission_multi_input_cnn.csv', index=False)