### Import Packages

In [1]:
import os
from pathlib import Path
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as functional
from sklearn.model_selection import train_test_split

### Load Data

In [2]:
def load_data(base_dirs, stack):
    all_data = []
    all_labels = []
    
    # 遍歷每個目錄
    for label, directory in enumerate(base_dirs):
        dir_path = Path(directory)
        print(f"Processing directory: {dir_path} (Label: {label})")
        
        # 取得目錄下所有的 .npy 檔案
        npy_files = list(dir_path.glob("*.npy"))
        
        for npy_file in npy_files:
            try:
                # 載入 npy 檔案
                data = np.load(npy_file)
                
                # 檢查數據是否是 2D
                if len(data.shape) != 2:
                    raise ValueError(f"Expected 2D data, got shape {data.shape}")
                if data.shape[1] != 157:
                    data = data[:, 0:157]
                # 將數據和標籤加入列表
                all_data.append(data)
                all_labels.append(label)  # 每個檔案對應一個標籤
                
                print(f"Loaded {npy_file.name}: Shape {data.shape}")
                
            except Exception as e:
                print(f"Error loading {npy_file}: {str(e)}")

        
        all_data = np.stack(all_data, axis=0)  # (N, H, W)
        all_labels = np.array(all_labels)  # (N,)
    
        all_data = np.expand_dims(all_data, axis=1)
    # 正規化數據到 [0, 1] 區間
    # all_data = all_data.astype(np.float32) / 255.0
    print(f"\nFinal dataset shape: {all_data.shape}")
    print(f"Labels shape: {all_labels.shape}")
    print(f"Unique labels: {np.unique(all_labels)}")
    
    return all_data, all_labels

In [3]:
class MelDataset(Dataset):
    def __init__(self, data, labels):
        self.data = torch.FloatTensor(data)
        self.labels = torch.LongTensor(labels)
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

### Model

In [4]:
class MelClassifier(nn.Module):
    def __init__(self, input_channels):
        super(MelClassifier, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(input_channels, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        
        self.classifier = None  # 延遲初始化
        
    def forward(self, x):
        x = self.features(x)
        if self.classifier is None:  # 第一次執行 forward 時初始化
            num_features = x.view(x.size(0), -1).size(1)
            self.classifier = nn.Sequential(
                nn.Flatten(),
                nn.Linear(num_features, 512),
                nn.ReLU(),
                nn.Dropout(0.5),
                nn.Linear(512, 3)
            ).to(x.device)
        x = self.classifier(x)
        return x

In [5]:
# Hyperparameters
learning_rate = 0.005
num_epochs = 20
batch_size = 64

In [6]:
class MelDataset(Dataset):
    def __init__(self, data, labels):
        self.data = torch.FloatTensor(data)
        self.labels = torch.LongTensor(labels)
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

### Training

In [7]:
def train_model(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        # print(f"Model output shape: {outputs.shape}")
        # print(f"Labels shape: {labels.shape}")
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    return running_loss / len(train_loader), 100. * correct / total



def evaluate_model(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    return running_loss / len(test_loader), 100. * correct / total
    

In [8]:
def save_classified_results_npy(model, test_data, output_dirs, device):
    """
    保存分類結果為 .npy 檔案到不同資料夾
    """
    model.eval()
    os.makedirs(output_dirs[0], exist_ok=True)  # human_output
    os.makedirs(output_dirs[1], exist_ok=True)  # machine_output
    os.makedirs(output_dirs[2], exist_ok=True)  # nature_output
    
    for idx, sample in enumerate(test_data):
            # 增加 batch 和通道維度 (1, C, H, W)
            # sample = np.expand_dims(sample, axis=0)  # (1, H, W)
            sample = np.expand_dims(sample, axis=1)  # (1, 1, H, W)
            print(sample.shape)
            sample = torch.FloatTensor(sample).to(device)
            
            # 推理
            output = model(sample)
            predicted = output.argmax(dim=1).item()
            
            # 保存結果
            output_dir = output_dirs[predicted]
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            np.save(f"{output_dir}/sample_{idx}.npy", sample.cpu().numpy())

In [9]:
# label of each directory: 0, 1, 2
base_dirs = ["../human/mixed", "../machine/mixed", "../nature/mixed"]

# load data
data, labels = load_data(base_dirs, True)

# train, test split
X_train, X_test, y_train, y_test = train_test_split(
        data, labels, test_size=0.2, random_state=42, stratify=labels)
train_dataset = MelDataset(X_train, y_train)
test_dataset = MelDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size, shuffle=False)


Processing directory: ..\human\mixed (Label: 0)
Loaded 0.npy: Shape (128, 157)
Loaded 1.npy: Shape (128, 157)
Loaded 10.npy: Shape (128, 157)
Loaded 100.npy: Shape (128, 157)
Loaded 1000.npy: Shape (128, 157)
Loaded 1001.npy: Shape (128, 157)
Loaded 1002.npy: Shape (128, 157)
Loaded 1003.npy: Shape (128, 157)
Loaded 1004.npy: Shape (128, 157)
Loaded 1005.npy: Shape (128, 157)
Loaded 1006.npy: Shape (128, 157)
Loaded 1007.npy: Shape (128, 157)
Loaded 1008.npy: Shape (128, 157)
Loaded 1009.npy: Shape (128, 157)
Loaded 101.npy: Shape (128, 157)
Loaded 1010.npy: Shape (128, 157)
Loaded 1011.npy: Shape (128, 157)
Loaded 1012.npy: Shape (128, 157)
Loaded 1013.npy: Shape (128, 157)
Loaded 1014.npy: Shape (128, 157)
Loaded 1015.npy: Shape (128, 157)
Loaded 1016.npy: Shape (128, 157)
Loaded 1017.npy: Shape (128, 157)
Loaded 1018.npy: Shape (128, 157)
Loaded 1019.npy: Shape (128, 157)
Loaded 102.npy: Shape (128, 157)
Loaded 1020.npy: Shape (128, 157)
Loaded 1021.npy: Shape (128, 157)
Loaded 1022

In [10]:
# for train_image, labels in train_loader:
#     print(train_image.shape)
#     break

In [11]:

input_channel = data.shape[1]
model = MelClassifier(input_channel)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_losses = []
train_accs = []
test_losses = []
test_accs = []
for epoch in range(num_epochs):
        train_loss, train_acc = train_model(model, train_loader, criterion, optimizer, device)
        test_loss, test_acc = evaluate_model(model, test_loader, criterion, device)
        
        train_losses.append(train_loss)
        train_accs.append(train_acc)
        test_losses.append(test_loss)
        test_accs.append(test_acc)
        
        print(f'Epoch [{epoch+1}/{num_epochs}]')
        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
        print(f'Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')

final_test_loss, final_test_acc = evaluate_model(model, test_loader, criterion, device)
print(f"\nFinal Test Accuracy: {final_test_acc:.2f}%")
print(f"Final Test Loss: {final_test_loss:.4f}")


    


RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [64, 1, 1, 1, 128, 157]

In [None]:
# 保存測試分類結果
test_data_dir = ["../test_data"]
test_data, _ = load_data(test_data_dir, False)
output_dirs = {0: "human_output", 1: "machine_output", 2: "nature_output"}
save_classified_results_npy(model, test_data, output_dirs, device)

Processing directory: ..\test_data (Label: 0)
Loaded 1034-121119-0049.npy: Shape (128, 157)
Loaded 1040-133433-0080.npy: Shape (128, 157)
Loaded 1081-125237-0053.npy: Shape (128, 157)
Loaded 1081-125237-0085.npy: Shape (128, 157)
Loaded 1183-133256-0005.npy: Shape (128, 157)
Loaded 1246-124550-0005.npy: Shape (128, 157)
Loaded 125-121124-0076.npy: Shape (128, 157)
Loaded 1334-135589-0002.npy: Shape (128, 157)
Loaded 1355-39947-0035.npy: Shape (128, 157)
Loaded 1578-140049-0004.npy: Shape (128, 157)
Loaded 1624-142933-0019.npy: Shape (128, 157)
Loaded 1898-145702-0006.npy: Shape (128, 157)
Loaded 19-198-0034.npy: Shape (128, 157)
Loaded 196-122150-0032.npy: Shape (128, 157)
Loaded 198-126831-0000.npy: Shape (128, 157)
Loaded 200-126784-0026.npy: Shape (128, 157)
Loaded 2002-139469-0093.npy: Shape (128, 157)
Loaded 2007-132570-0059.npy: Shape (128, 157)
Loaded 2092-145706-0067.npy: Shape (128, 157)
Loaded 2136-5143-0025.npy: Shape (128, 157)
Loaded 2136-5143-0028.npy: Shape (128, 157)
Lo

In [None]:
torch.save(model, "classify_model.pth") # TODO