In [None]:
import torch

default_device_name = (
         'cuda' if torch.cuda.is_available() 
    else 'mps' if torch.backends.mps.is_available() 
    else 'cpu'
)

device = torch.device('cuda' if torch.cuda.is_available() 
                else 'mps' if torch.backends.mps.is_available() 
                else 'cpu')

In [None]:
import kagglehub

# Download latest version
fruit_path = kagglehub.dataset_download("araraltawil/fruit-101-dataset")

print("Path to dataset files:", fruit_path)

In [None]:
import kagglehub

# Download latest version
flower_path = kagglehub.dataset_download("olgabelitskaya/flower-color-images")

print("Path to dataset files:", flower_path)

In [None]:
# 导入必要的库
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

import numpy as np
import matplotlib.pyplot as plt

In [None]:
# 设置随机数种子
torch.manual_seed(42)

# 定义数据转换方法
transform = transforms.Compose([
    transforms.ToTensor(),  # 将数据转换为张量
])

# 加载训练数据
train_dataset = datasets.MNIST(root='data', train=True, download=True, transform=transform)
# 实例化训练数据加载器
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
# 加载测试数据
test_dataset = datasets.MNIST(root='data', train=False, download=True, transform=transform)
# 实例化测试数据加载器
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

In [None]:
from torchvision.datasets import ImageFolder

# 定义数据转换方法
transform = transforms.Compose([
    transforms.Resize((128, 128)), # 调整图像大小为128x128
    transforms.ToTensor(),  # 将数据转换为张量
])

# 创建图像数据集
# ImageFolder类会自动遍历指定目录下的所有子目录
# 并将每个子目录中的图像文件视为同一类别的数据
dataset = ImageFolder(fruit_path, transform = transform)

In [None]:
len(dataset), dataset.classes

In [None]:
# 定义绘图函数，传入dataset即可
def plot(dataset, shuffle=True):
    # 创建数据加载器
    dataloader = DataLoader(dataset, batch_size=16, shuffle=shuffle)
    
    # 取出一组数据
    images, labels = next(iter(dataloader))
    
    # 将通道维度(C)移到最后一个维度，方便使用matplotlib绘图
    images = np.transpose(images, (0, 2, 3, 1))  

    # 创建4x4的子图对象
    fig, axes = plt.subplots(nrows=4, ncols=4, figsize=(8, 8))

    # 遍历每个子图，绘制图像并添加子图标题
    for i, ax in enumerate(axes.flat):
        ax.imshow(images[i])
        ax.axis('off') # 隐藏坐标轴

        if hasattr(dataset, 'classes'): # 如果数据集有预定义的类别名称，使用该名称作为子图标题
            ax.set_title(dataset.classes[labels[i]], fontsize=12)
        else: # 否则使用类别索引作为子图标题
            ax.set_title(labels[i], fontsize=12)

    plt.show()

In [None]:
plot(dataset)

In [None]:
import os
from PIL import Image # pip install Pillow
from torch.utils.data import Dataset

In [None]:
class Flowers(Dataset):
    def __init__(self, data_dir, transform=None):
        self.image_paths = []
        self.labels = []
        self.transform = transform
        
        # 遍历数据集目录，获取所有图像文件的路径和标签
        for filename in sorted(os.listdir(data_dir)):
            image_path = os.path.join(data_dir, filename)
            label = int(filename.split('_')[0])
            self.image_paths.append(image_path)
            self.labels.append(label)
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        # 加载图像数据和标签
        image = Image.open(self.image_paths[idx]).convert('RGB')
        label = self.labels[idx]
        
        # 对图像数据进行转换
        if self.transform:
            image = self.transform(image)
        
        # 将标签转换为PyTorch张量
        label = torch.tensor(label, dtype=torch.long)
        
        return image, label

In [None]:
# 定义数据转换方法
transform = transforms.Compose([
    transforms.Resize((128, 128)), # 调整图像大小为128x128
    transforms.ToTensor(),  # 将数据转换为张量
])

dataset = Flowers(flower_path + '/flowers/flowers', transform = transform)

plot(dataset)