In [None]:
#進度條
from tqdm.auto import tqdm 
#處理資料
import os
import csv
import shutil
# 設定圖片和 CSV 檔案的路徑
#image_dir = 'C:\\Decktop\\DeepLearning\\hw2_music_test\\music_train'   #路徑要"\\""
#csv_file = 'C:\Decktop\\DeepLearning\\hw2_music_test\\train_truth.csv' #路徑"\"是跳脫字元，要用\\
image_dir = "./music_train"
csv_file = "./train_truth.csv"
# 逐行讀取 CSV 檔案
with open(csv_file, 'r') as file: #'r': 讀取模式
    reader = csv.reader(file)
    next(reader)  # 跳過標題行
    for row in tqdm(reader):
        # 讀取檔案名稱和標籤
        filename, label = row
        # 將標籤轉換成整數
        label = int(label)
        # 確定標籤資料夾是否存在，若不存在就創建一個
        label_dir = os.path.join(image_dir, str(label))
        if not os.path.exists(label_dir):
            os.makedirs(label_dir)
        # 複製圖片到對應的標籤資料夾中
        src_path = os.path.join(image_dir, filename)
        dst_path = os.path.join(label_dir, filename)
        shutil.copy(src_path, dst_path)

In [None]:
import torch #PyTorch
import torch.nn as nn #神經網路
import torch.optim as optim #優化器
from torch.utils.data import random_split #數據集切割
import torchvision.datasets as datasets #數據集
import torchvision.transforms as transforms #圖片轉換
from torch.utils.data import DataLoader #數據加載器
import pandas as pd #處理CSV檔案
from tqdm.auto import tqdm #進度條
import os #處理資料
from PIL import Image #圖片處理
# 定義數據轉換
data_transform = transforms.Compose([
    transforms.Resize((224, 224)), #將圖片轉換成224*224
    transforms.ToTensor(), #將圖片轉換成Tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) #resnet50的數據標準化
])
# 加載數據集
dataset = datasets.ImageFolder('./music_classified', transform=data_transform)
# 將dataset切割成train和test
train_size = int(0.85 * len(dataset)) #訓練集佔85%
test_size = len(dataset) - train_size #測試集佔15%  
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
# 定義數據加載器
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) #BATCH大小32，打亂數據
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True) #BATCH大小32，打亂數據
# 定義模型
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(128 * 28 * 28, 128) 
        self.fc2 = nn.Linear(128, 88) 
    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = self.pool(nn.functional.relu(self.conv3(x)))
        x = x.view(-1, 128 * 28 * 28)
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x
# 初始化模型和損失函數
model = Net()
criterion = nn.CrossEntropyLoss()
# 配置GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')
model = model.to(device)
criterion = criterion.to(device)
# 定義優化器
optimizer = optim.AdamW(model.parameters())
# 訓練模型
model.train() #模型設為訓練模式
num_epochs = 10 #訓練10個epoch
for epoch in tqdm(range(num_epochs)):
    running_loss = 0.0 
    for i, (inputs, labels) in enumerate(train_loader): #每個batch的loss
        inputs, labels = inputs.to(device), labels.to(device) #將資料放入GPU
        optimizer.zero_grad() #梯度歸零
        outputs = model(inputs) #跑模型
        loss = criterion(outputs, labels) #計算loss
        loss.backward() #反向傳播
        optimizer.step() #更新參數
        running_loss += loss.item() #計算每個epoch的loss
    epoch_loss = running_loss / len(train_loader) #計算每個epoch的loss
    print('Epoch {} loss: {:.4f}'.format(epoch+1, epoch_loss))
# 評估模型
model.eval() #模型設為評估模式
with torch.no_grad(): #不計算梯度(純預測)
    correct, total = 0, 0
    for images, labels in test_loader: #每個batch的準確率
        images = images.to(device) #將資料放入GPU
        labels = labels.to(device) #將資料放入GPU
        outputs = model(images) #跑模型
        _, predicted = torch.max(outputs.data, 1) #取得預測結果
        total += labels.size(0) #計算總數
        correct += (predicted == labels).sum().item() #計算正確數 
# 計算測試集的準確率
accuracy = 100 * correct / total
print('Test accuracy: {:.2f}%'.format(accuracy))

#加載music_test資料夾
exam_dir = './music_test'
exam_imgs = os.listdir(exam_dir)
#預測music_test的label，並將結果存入results
model.eval() #模型設為評估模式
results = [] #存放結果
with torch.no_grad(): #不計算梯度(純預測)
    for img_name in exam_imgs: #每張圖片的預測結果
        img_path = os.path.join(exam_dir, img_name) #圖片路徑
        img = Image.open(img_path).convert('RGB') #讀取圖片
        inputs = data_transform(img).unsqueeze(0).to(device) 
        outputs = model(inputs) #跑模型
        _, predicted = torch.max(outputs.data, 1) #取得預測結果
        predicted = predicted.cpu().numpy().tolist() #轉成list
        results.append([img_name[:], dataset.classes[predicted[0]]]) #將結果存入results
#將結果存入ans.csv
df = pd.DataFrame(results, columns=['filename', 'category'])
df.to_csv('ans.csv', index=False)