<a href="https://colab.research.google.com/github/tj191073-droid/tj191073/blob/main/%E2%80%9Cfishnetattention%E2%80%9D%E7%9A%84%E5%89%AF%E6%9C%AC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://huggingface.co/datasets/imageomics/fish-vista

Cloning into 'fish-vista'...
remote: Enumerating objects: 80157, done.[K
remote: Total 80157 (delta 0), reused 0 (delta 0), pack-reused 80157 (from 1)[K
Receiving objects: 100% (80157/80157), 212.04 MiB | 20.63 MiB/s, done.
Resolving deltas: 100% (104/104), done.
Updating files: 100% (75833/75833), done.
Filtering content: 100% (75730/75730), 11.04 GiB | 19.38 MiB/s, done.


In [None]:
%cd fish-vista
!git lfs install
!git lfs pull

/content/fish-vista
Updated git hooks.
Git LFS initialized.


In [None]:
import os
print(os.path.exists("/content/fish-vista/classification_train.csv"))

True


In [None]:
import pandas as pd
import os

# 1. 加载原始数据
df = pd.read_csv("/content/fish-vista/classification_train.csv", low_memory=False)

# 2. 统计每个鱼科下的图像数量（按物种聚合）
grouped = df.groupby(["family", "standardized_species"]).size().reset_index(name='count')

# 3. 筛选出图像数量 >= 30 的物种
grouped = grouped[grouped['count'] >= 30]

# 4. 每个鱼科最多随机选 2 个物种（鱼类），最多保留 5 个鱼科
selected_families = grouped.groupby("family")['count'].sum().sort_values(ascending=False).head(5).index.tolist()
filtered = grouped[grouped['family'].isin(selected_families)]

# 5. 每个鱼科最多选 2 个物种
selected_species = (
    filtered.groupby("family")
    .apply(lambda x: x.sample(n=min(len(x), 2), random_state=42))
    .reset_index(drop=True)
)

# 6. 从原始 df 中筛选图像记录
sub_df = df[df['standardized_species'].isin(selected_species['standardized_species'])]

# 7. 每个物种最多取 30 张
subset = sub_df.groupby('standardized_species', group_keys=False).apply(
    lambda x: x.sample(n=min(len(x), 30), random_state=42)
).reset_index(drop=True)

# 8. 构建完整图像路径
def resolve_path(file_name):
    try:
        chunk = file_name.split('/')[1].split('_')[1]
        return os.path.join("/content/fish-vista/Images", f"chunk_{chunk}", os.path.basename(file_name))
    except:
        return None

subset['image_path'] = subset['file_name'].apply(resolve_path)
subset = subset[subset['image_path'].notnull()]
subset['image_exists'] = subset['image_path'].apply(lambda x: os.path.exists(x))
subset = subset[subset['image_exists']].reset_index(drop=True)

# 9. 展示摘要
print(f"✅ 最终子集包含 {subset['family'].nunique()} 个鱼科，{subset['standardized_species'].nunique()} 个物种，{len(subset)} 张图像")
print(subset[['family', 'standardized_species']].value_counts())

✅ 最终子集包含 8 个鱼科，10 个物种，299 张图像
family         standardized_species
Centrarchidae  lepomis megalotis       30
               lepomis miniatus        30
Cottidae       cottus perplexus        30
Cyprinidae     notropis rubellus       30
Esocidae       esox americanus         30
Ictaluridae    noturus exilis          30
Esocidae       esox lucius             30
Cottidae       cottus carolinae        29
Cyprinidae     notropis telescopus     28
Ictaluridae    noturus eleutherus      28
ictaluridae    noturus eleutherus       2
cottidae       cottus carolinae         1
cyprinidae     notropis telescopus      1
Name: count, dtype: int64


  .apply(lambda x: x.sample(n=min(len(x), 2), random_state=42))
  subset = sub_df.groupby('standardized_species', group_keys=False).apply(


In [None]:
import shutil
from sklearn.model_selection import train_test_split

# 指定输出目录
base_dir = "/content/fish_dataset_mini"
train_dir = os.path.join(base_dir, "train")
val_dir = os.path.join(base_dir, "val")

# 清空旧目录
if os.path.exists(base_dir):
    shutil.rmtree(base_dir)

# 创建 train/val 目录
for d in [train_dir, val_dir]:
    os.makedirs(d, exist_ok=True)

# 按类别划分图像
for cls in subset["standardized_species"].unique():
    cls_df = subset[subset["standardized_species"] == cls]
    image_paths = cls_df["image_path"].tolist()
    image_paths = [p for p in image_paths if isinstance(p, str)]  # 防止 None

    train_imgs, val_imgs = train_test_split(image_paths, test_size=0.2, random_state=42)

    os.makedirs(os.path.join(train_dir, cls), exist_ok=True)
    os.makedirs(os.path.join(val_dir, cls), exist_ok=True)

    for src in train_imgs:
        shutil.copy(src, os.path.join(train_dir, cls, os.path.basename(src)))
    for src in val_imgs:
        shutil.copy(src, os.path.join(val_dir, cls, os.path.basename(src)))

print("✅ 图像划分完成（train/val）")

✅ 图像划分完成（train/val）


In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision.models import mobilenet_v2
import matplotlib.pyplot as plt

# ==== 数据路径 ====
data_dir = "/content/fish_dataset_mini"
train_dir = os.path.join(data_dir, "train")
val_dir = os.path.join(data_dir, "val")

# ==== 参数设置 ====
num_classes = 10
batch_size = 32
num_epochs = 20
learning_rate = 0.001

# ==== 数据增强与预处理 ====
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])
transform_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

train_dataset = datasets.ImageFolder(train_dir, transform=transform_train)
val_dataset = datasets.ImageFolder(val_dir, transform=transform_val)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# ==== CBAM 模块 ====
class CBAM_Module(nn.Module):
    def __init__(self, channels, reduction=16, kernel_size=7):
        super().__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)

        self.shared_mlp = nn.Sequential(
            nn.Linear(channels, channels // reduction, bias=False),
            nn.ReLU(),
            nn.Linear(channels // reduction, channels, bias=False)
        )
        self.sigmoid_channel = nn.Sigmoid()

        self.conv_spatial = nn.Conv2d(2, 1, kernel_size, padding=kernel_size // 2, bias=False)
        self.sigmoid_spatial = nn.Sigmoid()

    def forward(self, x):
        b, c, _, _ = x.size()
        avg = self.shared_mlp(self.avg_pool(x).view(b, c))
        max_ = self.shared_mlp(self.max_pool(x).view(b, c))
        channel_attn = self.sigmoid_channel(avg + max_).view(b, c, 1, 1)
        x = x * channel_attn

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_attn = self.sigmoid_spatial(self.conv_spatial(torch.cat([avg_out, max_out], dim=1)))
        x = x * spatial_attn
        return x

# ==== MobileNetV2 + CBAM ====
class MobileNetV2_CBAM(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.base = mobilenet_v2(pretrained=True)
        self.cbam = CBAM_Module(channels=1280)
        self.base.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(1280, num_classes)
        )

    def forward(self, x):
        x = self.base.features(x)
        x = self.cbam(x)
        x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.size(0), -1)
        x = self.base.classifier(x)
        return x

# ==== 初始化模型 ====
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MobileNetV2_CBAM(num_classes=num_classes).to(device)

# ==== 优化器和损失函数 ====
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# ==== 训练循环 ====
best_val_acc = 0.0
for epoch in range(1, num_epochs + 1):
    model.train()
    correct = total = 0
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model(imgs)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        preds = torch.argmax(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    train_acc = correct / total

    # 验证阶段
    model.eval()
    correct = total = 0
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            preds = torch.argmax(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    val_acc = correct / total

    print(f"Epoch {epoch}: Train Acc = {train_acc:.4f}, Val Acc = {val_acc:.4f}")

    # 保存最佳模型
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "/content/mobilenetv2_cbam_fish.pth")
        print("✅ 模型已保存")

print("🎉 训练完成。最佳验证精度：", best_val_acc)

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 83.4MB/s]


Epoch 1: Train Acc = 0.3975, Val Acc = 0.6833
✅ 模型已保存
Epoch 2: Train Acc = 0.7950, Val Acc = 0.7500
✅ 模型已保存
Epoch 3: Train Acc = 0.8410, Val Acc = 0.7833
✅ 模型已保存
Epoch 4: Train Acc = 0.8577, Val Acc = 0.7500
Epoch 5: Train Acc = 0.8870, Val Acc = 0.7500
Epoch 6: Train Acc = 0.9331, Val Acc = 0.7667
Epoch 7: Train Acc = 0.9582, Val Acc = 0.7333
Epoch 8: Train Acc = 0.9582, Val Acc = 0.7333
Epoch 9: Train Acc = 0.9331, Val Acc = 0.7667
Epoch 10: Train Acc = 0.9289, Val Acc = 0.8167
✅ 模型已保存
Epoch 11: Train Acc = 0.9665, Val Acc = 0.7667
Epoch 12: Train Acc = 0.9289, Val Acc = 0.7500
Epoch 13: Train Acc = 0.9289, Val Acc = 0.6833
Epoch 14: Train Acc = 0.9623, Val Acc = 0.7667
Epoch 15: Train Acc = 0.9582, Val Acc = 0.8333
✅ 模型已保存
Epoch 16: Train Acc = 0.9665, Val Acc = 0.8333
Epoch 17: Train Acc = 0.9874, Val Acc = 0.8167
Epoch 18: Train Acc = 0.9833, Val Acc = 0.7667
Epoch 19: Train Acc = 0.9874, Val Acc = 0.8333
Epoch 20: Train Acc = 0.9833, Val Acc = 0.7500
🎉 训练完成。最佳验证精度： 0.833333333333

In [None]:
from torchvision.datasets import ImageFolder

# 替换为你的训练集路径
train_dir = '/content/fish_dataset_mini/train'

# 加载训练集并获取类别名称
train_dataset = ImageFolder(train_dir)
class_names = train_dataset.classes

print(f"✅ 共检测到 {len(class_names)} 个类别：\n{class_names}")

✅ 共检测到 10 个类别：
['cottus carolinae', 'cottus perplexus', 'esox americanus', 'esox lucius', 'lepomis megalotis', 'lepomis miniatus', 'notropis rubellus', 'notropis telescopus', 'noturus eleutherus', 'noturus exilis']


In [None]:

from google.colab import files
from PIL import Image
import torch
from torchvision import transforms
import os

# 图像预处理流程（与训练保持一致）
predict_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# 上传图像
uploaded = files.upload()

# 执行预测
for fn in uploaded.keys():
    try:
        img = Image.open(fn).convert('RGB')
        input_tensor = predict_tf(img).unsqueeze(0).to(device)

        with torch.no_grad():
            output = model(input_tensor)
            pred_class = torch.argmax(output, dim=1).item()

        print(f"✅ 图像 `{fn}` 的预测类别是：{class_names[pred_class]}")
    except Exception as e:
        print(f"❌ 图像 `{fn}` 处理失败，错误信息：{e}")

Saving 100975_lat_FMNH_FZ#3.jpg to 100975_lat_FMNH_FZ#3.jpg
✅ 图像 `100975_lat_FMNH_FZ#3.jpg` 的预测类别是：notropis rubellus
