<a href="https://colab.research.google.com/github/tj191073-droid/tj191073/blob/main/%E2%80%9Cfishnet2%E2%80%9D%E7%9A%84%E5%89%AF%E6%9C%AC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://huggingface.co/datasets/imageomics/fish-vista

Cloning into 'fish-vista'...
remote: Enumerating objects: 80157, done.[K
remote: Total 80157 (delta 0), reused 0 (delta 0), pack-reused 80157 (from 1)[K
Receiving objects: 100% (80157/80157), 212.04 MiB | 27.02 MiB/s, done.
Resolving deltas: 100% (104/104), done.
Updating files: 100% (75833/75833), done.
Filtering content: 100% (75733/75733), 11.04 GiB | 17.11 MiB/s, done.


In [None]:
%cd fish-vista
!git lfs install
!git lfs pull

In [None]:
import os
print(os.path.exists("/content/fish-vista/classification_train.csv"))

In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

# 路径定义
base_dir = "/content/fish_dataset_split"
train_dir = os.path.join(base_dir, "train")
val_dir = os.path.join(base_dir, "val")

# 清空旧数据
if os.path.exists(base_dir):
    shutil.rmtree(base_dir)

# 创建新目录结构
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# 按类别划分图像
for cls in subset['standardized_species'].unique():
    cls_df = subset[subset['standardized_species'] == cls]
    train_imgs, val_imgs = train_test_split(
        cls_df['image_path'].tolist(), test_size=0.2, random_state=42
    )

    # 创建类别子目录
    os.makedirs(os.path.join(train_dir, cls), exist_ok=True)
    os.makedirs(os.path.join(val_dir, cls), exist_ok=True)

    # 拷贝训练图像
    for src in train_imgs:
        if src:  # 防止路径为空
            shutil.copy(src, os.path.join(train_dir, cls, os.path.basename(src)))

    # 拷贝验证图像
    for src in val_imgs:
        if src:
            shutil.copy(src, os.path.join(val_dir, cls, os.path.basename(src)))

print("✅ 图像划分完成！")

✅ 图像划分完成！


In [None]:
import torch
from torch import nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import os

# 自动选择设备（GPU 优先）
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 参数设置
image_size = 128
batch_size = 16
num_epochs = 20
data_dir = "/content/fish_dataset_split"
train_dir = os.path.join(data_dir, "train")
val_dir = os.path.join(data_dir, "val")
num_classes = len(os.listdir(train_dir))

# 数据预处理
train_transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()
])
val_transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor()
])

# 加载数据
train_dataset = datasets.ImageFolder(train_dir, transform=train_transform)
val_dataset = datasets.ImageFolder(val_dir, transform=val_transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

# 加载 MobileNetV2
model = models.mobilenet_v2(weights="IMAGENET1K_V1")
model.classifier[1] = nn.Linear(model.last_channel, num_classes)
model.to(device)

# 损失函数与优化器
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# 训练函数
def train_model(model, train_loader, val_loader, epochs):
    best_val_acc = 0.0
    for epoch in range(epochs):
        model.train()
        train_correct, train_total = 0, 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            _, preds = torch.max(outputs, 1)
            train_correct += (preds == labels).sum().item()
            train_total += labels.size(0)
        train_acc = train_correct / train_total

        # 验证
        model.eval()
        val_correct, val_total = 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, preds = torch.max(outputs, 1)
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)
        val_acc = val_correct / val_total

        # 打印结果
        print(f"Epoch {epoch+1}: Train Acc = {train_acc:.4f}, Val Acc = {val_acc:.4f}")

        # 保存最佳模型
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), "best_fish_model.pth")
            print(f"📦 模型更新，保存 best_fish_model.pth（Val Acc: {val_acc:.4f}）")

# 启动训练
train_model(model, train_loader, val_loader, epochs=num_epochs)

Epoch 1: Train Acc = 0.7732, Val Acc = 0.8450
📦 模型更新，保存 best_fish_model.pth（Val Acc: 0.8450）
Epoch 2: Train Acc = 0.8722, Val Acc = 0.9050
📦 模型更新，保存 best_fish_model.pth（Val Acc: 0.9050）
Epoch 3: Train Acc = 0.9135, Val Acc = 0.8850
Epoch 4: Train Acc = 0.9398, Val Acc = 0.9400
📦 模型更新，保存 best_fish_model.pth（Val Acc: 0.9400）
Epoch 5: Train Acc = 0.9637, Val Acc = 0.9450
📦 模型更新，保存 best_fish_model.pth（Val Acc: 0.9450）
Epoch 6: Train Acc = 0.9649, Val Acc = 0.9550
📦 模型更新，保存 best_fish_model.pth（Val Acc: 0.9550）
Epoch 7: Train Acc = 0.9449, Val Acc = 0.8900
Epoch 8: Train Acc = 0.9273, Val Acc = 0.9500
Epoch 9: Train Acc = 0.9386, Val Acc = 0.9250
Epoch 10: Train Acc = 0.9712, Val Acc = 0.9550
Epoch 11: Train Acc = 0.9799, Val Acc = 0.9700
📦 模型更新，保存 best_fish_model.pth（Val Acc: 0.9700）
Epoch 12: Train Acc = 0.9925, Val Acc = 0.9850
📦 模型更新，保存 best_fish_model.pth（Val Acc: 0.9850）
Epoch 13: Train Acc = 0.9787, Val Acc = 0.9250
Epoch 14: Train Acc = 0.9586, Val Acc = 0.7350
Epoch 15: Train Acc = 

In [None]:
from PIL import Image
import torchvision.transforms as transforms
import torch
import os
from torchvision import models
from torch import nn
from google.colab import files
import io

# 上传图片
uploaded = files.upload()
img_path = list(uploaded.keys())[0]

# 显示图像文件名
print(f"📷 上传的文件: {img_path}")

Saving JFBM-FISH-0017324-14.jpg to JFBM-FISH-0017324-14 (1).jpg
📷 上传的文件: JFBM-FISH-0017324-14 (1).jpg


In [None]:
# 设置参数
image_size = 128
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 类别标签（根据 ImageFolder 加载的顺序）
data_dir = "/content/fish_dataset_split/train"
class_names = sorted(os.listdir(data_dir))

# 加载模型结构
model = models.mobilenet_v2(weights="IMAGENET1K_V1")
model.classifier[1] = nn.Linear(model.last_channel, len(class_names))

# 加载训练好的权重
model.load_state_dict(torch.load("best_fish_model.pth", map_location=device))
model.to(device)
model.eval()

# 图像预处理
transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor()
])

# 读取并预处理上传图像
img = Image.open(img_path).convert("RGB")
input_tensor = transform(img).unsqueeze(0).to(device)

# 预测
with torch.no_grad():
    output = model(input_tensor)
    _, pred = torch.max(output, 1)
    predicted_class = class_names[pred.item()]

print(f"🔍 预测结果：{predicted_class}")

🔍 预测结果：fundulus catenatus
