In [1]:
import pandas as pd
import pyarrow.parquet as pq
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from PIL import Image
import io
import numpy as np
import matplotlib.pyplot as plt

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.is_available()

False

In [3]:
# 加载数据集
table1 = pq.read_table("/Users/portia/Documents/AAA-College/AI/ai4astro/galaxy/train-00000-of-00001.parquet")
table2 = pq.read_table("/Users/portia/Documents/AAA-College/AI/ai4astro/galaxy/test-00000-of-00001.parquet")


df1 = table1.to_pandas()
df2 = table2.to_pandas()

df = pd.concat([df1, df2], ignore_index=True)

In [4]:
def process_image_from_bytes(image_bytes, mode="RGB", channel_index=None, transform=None):
    """
    从字节流中读取图像，转换为指定模式，并提取指定通道或转换为灰度图。

    Args:
        image_bytes (bytes): 图像的字节流数据。
        mode (str, optional): 图像模式（"RGB"、"L" 或 "single"）。默认为 "RGB"。
        channel_index (int, optional): 要提取的通道索引（0: R, 1: G, 2: B）。默认为 None。
        transform (callable, optional): 图像预处理变换。默认为 None。

    Returns:
        torch.Tensor: 处理后的图像张量。
    """
    # 从字节流加载图像
    image = Image.open(io.BytesIO(image_bytes))

    # 处理不同模式
    if mode == "RGB":
        # 转换为 RGB 模式
        image = image.convert("RGB")
        if channel_index is not None:
            # 提取指定通道
            if 0 <= channel_index <= 2:
                image = image.split()[channel_index]
            else:
                raise ValueError("Invalid channel index for RGB mode")
    elif mode == "L":
        # 转换为灰度图
        image = image.convert("L")
    elif mode == "single":
        # 提取单通道
        if channel_index is not None and 0 <= channel_index <= 2:
            image = image.convert("RGB")
            image = image.split()[channel_index]
        else:
            raise ValueError("Invalid channel index for single mode")
    else:
        raise ValueError("Mode must be 'RGB', 'L', or 'single'")

    # 应用预处理变换
    if transform:
        image = transform(image)

    return image

In [5]:
# 定义图像预处理变换
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # 调整大小
    transforms.ToTensor(),        # 转换为张量
    transforms.Normalize(mean=[0.5], std=[0.5])  # 归一化
])

# 假设 image_bytes 是从字节流中读取的图像数据
image_bytes = df['image'][0]['bytes']  # 你的字节流数据

# 提取原始三通道图像
rgb_image = process_image_from_bytes(image_bytes, mode="RGB", transform=transform)
print(f"RGB 图像张量形状: {rgb_image.shape}")

# 提取 R 通道
r_channel = process_image_from_bytes(image_bytes, mode="single", channel_index=0, transform=transform)
print(f"R 通道张量形状: {r_channel.shape}")

# 提取 G 通道
g_channel = process_image_from_bytes(image_bytes, mode="single", channel_index=1, transform=transform)
print(f"G 通道张量形状: {g_channel.shape}")

# 提取 B 通道
b_channel = process_image_from_bytes(image_bytes, mode="single", channel_index=2, transform=transform)
print(f"B 通道张量形状: {b_channel.shape}")

# 转换为灰度图
gray_image = process_image_from_bytes(image_bytes, mode="L", transform=transform)
print(f"灰度图张量形状: {gray_image.shape}")

RGB 图像张量形状: torch.Size([3, 64, 64])
R 通道张量形状: torch.Size([1, 64, 64])
G 通道张量形状: torch.Size([1, 64, 64])
B 通道张量形状: torch.Size([1, 64, 64])
灰度图张量形状: torch.Size([1, 64, 64])


In [6]:
class ParquetDatasetProcessed(torch.utils.data.Dataset):
    def __init__(self, df, mode="RGB", channel_index=None, transform=None):
        self.df = df
        self.mode = mode
        self.channel_index = channel_index
        self.transform = transform
        self.labels = df["summary"].astype("category").cat.codes.astype(np.int64)
        self.label_mapping = dict(enumerate(df["summary"].astype("category").cat.categories))

    def __getitem__(self, idx):
        image_bytes = self.df.iloc[idx]["image"]["bytes"]
        processed_image = process_image_from_bytes(
            image_bytes,
            mode=self.mode,
            channel_index=self.channel_index,
            transform=self.transform
        )

        label = self.labels[idx]
        return processed_image, torch.tensor(label, dtype=torch.long)

    def __len__(self):
        return len(self.df)

In [7]:
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # 调整大小
    transforms.ToTensor(),        # 转换为张量
    transforms.Normalize(mean=[0.5], std=[0.5])  # 归一化
])

<font size="3">**RGB**</font>

In [8]:
# 创建数据集
mode = "RGB"  # 或 "L" 用于灰度图，或 "single" 用于单通道
channel_index = None  # 用于 RGB 模式；0: R, 1: G, 2: B 用于 single 模式
full_dataset = ParquetDatasetProcessed(df, mode=mode, channel_index=channel_index, transform=transform)

# 分割数据集
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])

# 创建数据加载器
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

In [9]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=7, input_channels=3):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(input_channels, 16, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(32 * 16 * 16, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [10]:
# 训练配置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNN(num_classes=len(full_dataset.label_mapping), input_channels=3).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.001)

# 训练循环
for epoch in range(10):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # 计算准确率
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # 计算平均损失和准确率
    running_loss += loss.item()
    running_acc = 100 * correct / total

    print(f"Epoch {epoch+1} | Loss: {running_loss/len(train_loader):.4f} | Accuracy: {running_acc:.2f}%")

Epoch 1 | Loss: 1.0376 | Accuracy: 73.17%
Epoch 2 | Loss: 0.9798 | Accuracy: 73.61%
Epoch 3 | Loss: 0.9628 | Accuracy: 73.61%
Epoch 4 | Loss: 0.9493 | Accuracy: 73.61%
Epoch 5 | Loss: 0.9238 | Accuracy: 73.61%
Epoch 6 | Loss: 0.9054 | Accuracy: 73.61%
Epoch 7 | Loss: 0.8722 | Accuracy: 73.70%
Epoch 8 | Loss: 0.8191 | Accuracy: 73.86%
Epoch 9 | Loss: 0.7686 | Accuracy: 74.45%
Epoch 10 | Loss: 0.7151 | Accuracy: 75.42%


In [11]:
# 测试代码
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy_test: {100 * correct / total:.2f}%")

Accuracy_test: 70.96%


<font size="3">**Grayscale**</font>

In [12]:
# 创建数据集
mode = "L"  # 或 "L" 用于灰度图，或 "single" 用于单通道
channel_index = None  # 用于 RGB 模式；0: R, 1: G, 2: B 用于 single 模式
full_dataset = ParquetDatasetProcessed(df, mode=mode, channel_index=channel_index, transform=transform)

# 分割数据集
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])

# 创建数据加载器
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

In [13]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=7, input_channels=1):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(input_channels, 16, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(32 * 16 * 16, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [14]:
# 训练配置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_channels = 1  # 如果是单通道
# input_channels = 3  # 如果是三通道
model = SimpleCNN(num_classes=len(full_dataset.label_mapping), input_channels=1).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.001)

In [15]:
# 训练循环
for epoch in range(10):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # 计算准确率
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # 计算平均损失和准确率
    running_loss += loss.item()
    running_acc = 100 * correct / total

    print(f"Epoch {epoch+1} | Loss: {running_loss/len(train_loader):.4f} | Accuracy: {running_acc:.2f}%")

Epoch 1 | Loss: 1.0477 | Accuracy: 72.86%
Epoch 2 | Loss: 0.9784 | Accuracy: 73.76%
Epoch 3 | Loss: 0.9713 | Accuracy: 73.76%
Epoch 4 | Loss: 0.9745 | Accuracy: 73.76%
Epoch 5 | Loss: 0.9332 | Accuracy: 73.76%
Epoch 6 | Loss: 0.9081 | Accuracy: 73.76%
Epoch 7 | Loss: 0.8569 | Accuracy: 73.89%
Epoch 8 | Loss: 0.8093 | Accuracy: 74.26%
Epoch 9 | Loss: 0.7598 | Accuracy: 74.73%
Epoch 10 | Loss: 0.7303 | Accuracy: 75.52%


In [16]:
# 测试代码
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy_test: {100 * correct / total:.2f}%")

Accuracy_test: 69.59%


<font size="3">**R**</font>

In [17]:
# 创建数据集
mode = "single"  # 或 "L" 用于灰度图，或 "single" 用于单通道
channel_index = 0  # 用于 RGB 模式；0: R, 1: G, 2: B 用于 single 模式
full_dataset = ParquetDatasetProcessed(df, mode=mode, channel_index=channel_index, transform=transform)

# 分割数据集
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])

# 创建数据加载器
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

In [18]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=7, input_channels=1):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(input_channels, 16, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(32 * 16 * 16, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [19]:
# 训练配置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_channels = 1  # 如果是单通道
# input_channels = 3  # 如果是三通道
model = SimpleCNN(num_classes=len(full_dataset.label_mapping), input_channels=1).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.001)

In [20]:
# 训练循环
for epoch in range(10):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # 计算准确率
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # 计算平均损失和准确率
    running_loss += loss.item()
    running_acc = 100 * correct / total

    print(f"Epoch {epoch+1} | Loss: {running_loss/len(train_loader):.4f} | Accuracy: {running_acc:.2f}%")

Epoch 1 | Loss: 1.0271 | Accuracy: 72.89%
Epoch 2 | Loss: 0.9869 | Accuracy: 73.58%
Epoch 3 | Loss: 0.9784 | Accuracy: 73.58%
Epoch 4 | Loss: 0.9480 | Accuracy: 73.58%
Epoch 5 | Loss: 0.9357 | Accuracy: 73.58%
Epoch 6 | Loss: 0.8806 | Accuracy: 73.54%
Epoch 7 | Loss: 0.8290 | Accuracy: 73.95%
Epoch 8 | Loss: 0.7971 | Accuracy: 74.08%
Epoch 9 | Loss: 0.7501 | Accuracy: 74.98%
Epoch 10 | Loss: 0.7093 | Accuracy: 75.55%


In [21]:
# 测试代码
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy_test: {100 * correct / total:.2f}%")

Accuracy_test: 71.21%


<font size="3">**G**</font>

In [22]:
# 创建数据集
mode = "single"  # 或 "L" 用于灰度图，或 "single" 用于单通道
channel_index = 1  # 用于 RGB 模式；0: R, 1: G, 2: B 用于 single 模式
full_dataset = ParquetDatasetProcessed(df, mode=mode, channel_index=channel_index, transform=transform)

# 分割数据集
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])

# 创建数据加载器
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

In [23]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=7, input_channels=1):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(input_channels, 16, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(32 * 16 * 16, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [24]:
# 训练配置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_channels = 1  # 如果是单通道
# input_channels = 3  # 如果是三通道
model = SimpleCNN(num_classes=len(full_dataset.label_mapping), input_channels=1).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.001)

In [25]:
# 训练循环
for epoch in range(10):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # 计算准确率
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # 计算平均损失和准确率
    running_loss += loss.item()
    running_acc = 100 * correct / total

    print(f"Epoch {epoch+1} | Loss: {running_loss/len(train_loader):.4f} | Accuracy: {running_acc:.2f}%")

Epoch 1 | Loss: 1.0594 | Accuracy: 72.67%
Epoch 2 | Loss: 0.9964 | Accuracy: 73.45%
Epoch 3 | Loss: 0.9909 | Accuracy: 73.45%
Epoch 4 | Loss: 0.9645 | Accuracy: 73.45%
Epoch 5 | Loss: 0.9656 | Accuracy: 73.45%
Epoch 6 | Loss: 0.9379 | Accuracy: 73.45%
Epoch 7 | Loss: 0.9263 | Accuracy: 73.45%
Epoch 8 | Loss: 0.9011 | Accuracy: 73.51%
Epoch 9 | Loss: 0.8652 | Accuracy: 73.54%
Epoch 10 | Loss: 0.8326 | Accuracy: 73.58%


In [26]:
# 测试代码
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy_test: {100 * correct / total:.2f}%")

Accuracy_test: 72.59%


<font size="3">**B**</font>

In [27]:
# 创建数据集
mode = "single"  # 或 "L" 用于灰度图，或 "single" 用于单通道
channel_index = 2  # 用于 RGB 模式；0: R, 1: G, 2: B 用于 single 模式
full_dataset = ParquetDatasetProcessed(df, mode=mode, channel_index=channel_index, transform=transform)

# 分割数据集
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])

# 创建数据加载器
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

In [28]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=7, input_channels=1):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(input_channels, 16, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(32 * 16 * 16, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [29]:
# 训练配置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_channels = 1  # 如果是单通道
# input_channels = 3  # 如果是三通道
model = SimpleCNN(num_classes=len(full_dataset.label_mapping), input_channels=1).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.001)

In [30]:
# 训练循环
for epoch in range(10):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # 计算准确率
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # 计算平均损失和准确率
    running_loss += loss.item()
    running_acc = 100 * correct / total

    print(f"Epoch {epoch+1} | Loss: {running_loss/len(train_loader):.4f} | Accuracy: {running_acc:.2f}%")

Epoch 1 | Loss: 1.0556 | Accuracy: 72.14%
Epoch 2 | Loss: 0.9992 | Accuracy: 72.95%
Epoch 3 | Loss: 0.9692 | Accuracy: 72.95%
Epoch 4 | Loss: 0.9497 | Accuracy: 72.95%
Epoch 5 | Loss: 0.9225 | Accuracy: 72.95%
Epoch 6 | Loss: 0.9002 | Accuracy: 72.92%
Epoch 7 | Loss: 0.8754 | Accuracy: 73.01%
Epoch 8 | Loss: 0.8348 | Accuracy: 73.26%
Epoch 9 | Loss: 0.7817 | Accuracy: 73.76%
Epoch 10 | Loss: 0.7529 | Accuracy: 74.08%


In [31]:
# 测试代码
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy_test: {100 * correct / total:.2f}%")

Accuracy_test: 74.09%
