### Import Packages

In [13]:
import os
from pathlib import Path
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as functional
from sklearn.model_selection import train_test_split

### Load Data

In [14]:
def load_data(base_dirs):
    all_data = []
    all_labels = []
    
    # 遍歷每個目錄
    for label, directory in enumerate(base_dirs):
        dir_path = Path(directory)
        print(f"Processing directory: {dir_path} (Label: {label})")
        
        # 取得目錄下所有的 .npy 檔案
        npy_files = list(dir_path.glob("*.npy"))
        
        for npy_file in npy_files:
            try:
                # 載入 npy 檔案
                data = np.load(npy_file)
                
                # 確保數據形狀正確
                if len(data.shape) == 3:  # 如果是單張圖片 (H, W, C)
                    data = data.transpose(2, 0, 1)  # 轉換為 (C, H, W)
                    data = np.expand_dims(data, 0)  # 添加批次維度 (1, C, H, W)
                elif len(data.shape) == 4:  # 如果是多張圖片 (N, H, W, C)
                    data = data.transpose(0, 3, 1, 2)  # 轉換為 (N, C, H, W)
                
                # 將數據和標籤加入列表
                all_data.append(data)
                all_labels.extend([label] * len(data))
                
                print(f"Loaded {npy_file.name}: Shape {data.shape}")
                
            except Exception as e:
                print(f"Error loading {npy_file}: {str(e)}")
    
    # 將所有數據合併成一個 numpy array
    all_data = np.concatenate(all_data, axis=0)
    all_labels = np.array(all_labels)
    
    # 正規化數據到 [0, 1] 區間
    all_data = all_data.astype(np.float32) / 255.0
    
    print(f"\nFinal dataset shape: {all_data.shape}")
    print(f"Labels shape: {all_labels.shape}")
    print(f"Unique labels: {np.unique(all_labels)}")
    
    return all_data, all_labels

In [15]:
class MelDataset(Dataset):
    def __init__(self, data, labels):
        self.data = torch.FloatTensor(data)
        self.labels = torch.LongTensor(labels)
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

### Model

In [16]:
class MelClassifier(nn.Module):
    def __init__(self, input_channels):
        super(MelClassifier, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(input_channels, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 4 * 4, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 3)  # 3個類別
        )
        
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

In [None]:
# Hyperparameters
learning_rate = 0.005
num_epochs = 20
batch_size = 64

In [18]:
class MelDataset(Dataset):
    def __init__(self, data, labels):
        self.data = torch.FloatTensor(data)
        self.labels = torch.LongTensor(labels)
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

### Training

In [19]:
def train_model(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    return running_loss / len(train_loader), 100. * correct / total

def evaluate_model(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    return running_loss / len(test_loader), 100. * correct / total
    

In [None]:
# label of each directory: 0, 1, 2
base_dirs = ["../human/mixed", "../machine/mixed", "../nature/mixed"]

# load data
data, labels = load_data(base_dirs)

# train, test split
X_train, X_test, y_train, y_test = train_test_split(
        data, labels, test_size=0.2, random_state=42, stratify=labels)
train_dataset = MelDataset(X_train, y_train)
test_dataset = MelDataset(X_test, y_test)
    
train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size, shuffle=False)

input_channel = data.shape[1]
model = MelClassifier(input_channel)
criterion = nn.CrossEntropyLoss
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

train_losses = []
train_accs = []
test_losses = []
test_accs = []
for epoch in range(num_epochs):
        train_loss, train_acc = train_model(model, train_loader, criterion, optimizer, device)
        test_loss, test_acc = evaluate_model(model, test_loader, criterion, device)
        
        train_losses.append(train_loss)
        train_accs.append(train_acc)
        test_losses.append(test_loss)
        test_accs.append(test_acc)
        
        print(f'Epoch [{epoch+1}/{num_epochs}]')
        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
        print(f'Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')
    


Processing directory: ..\human\mixed (Label: 0)
Loaded 1013.npy: Shape (128, 157)
Loaded 1016.npy: Shape (128, 157)
Loaded 1038.npy: Shape (128, 157)
Loaded 104.npy: Shape (128, 157)
Loaded 1053.npy: Shape (128, 157)
Loaded 1054.npy: Shape (128, 157)
Loaded 1055.npy: Shape (128, 157)
Loaded 1085.npy: Shape (128, 157)
Loaded 1163.npy: Shape (128, 157)
Loaded 1177.npy: Shape (128, 157)
Loaded 1181.npy: Shape (128, 157)
Loaded 1191.npy: Shape (128, 157)
Loaded 1207.npy: Shape (128, 157)
Loaded 1213.npy: Shape (128, 157)
Loaded 1217.npy: Shape (128, 157)
Loaded 1234.npy: Shape (128, 157)
Loaded 1260.npy: Shape (128, 157)
Loaded 1263.npy: Shape (128, 157)
Loaded 1312.npy: Shape (128, 157)
Loaded 1334.npy: Shape (128, 157)
Loaded 1343.npy: Shape (128, 157)
Loaded 1348.npy: Shape (128, 157)
Loaded 1358.npy: Shape (128, 157)
Loaded 1365.npy: Shape (128, 157)
Loaded 1367.npy: Shape (128, 157)
Loaded 1371.npy: Shape (128, 157)
Loaded 1398.npy: Shape (128, 157)
Loaded 1406.npy: Shape (128, 157)
L

KeyboardInterrupt: 