#### 基本設置 + 設cuda

In [1]:
# Load in relevant libraries, and alias where appropriate
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

# Define relevant variables for the ML task
batch_size = 64
num_classes = 2
learning_rate = 0.001
num_epochs = 20

# Device will determine whether to run the training on GPU or CPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


#### 分割資料集

In [7]:
import torchvision.datasets as dset

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((128, 480)),  # 将图像调整为指定大小
    transforms.ToTensor(),           # 将图像转换为张量
])

# 加載圖像數據集，並只考慮指定的資料夾
dataset = dset.ImageFolder(root='D:/Project/CH_AiSound', transform=transform)

# 獲取相關信息
print(dataset.classes)  # 根據分的文件夾的名字來確定類別
print(dataset.class_to_idx)  # 按順序為這些類別定義索引為 0, 1, ...
print(len(dataset))  # 返回數據集的大小

['fake', 'real']
{'fake': 0, 'real': 1}
778


In [8]:
from torch.utils.data import DataLoader, SubsetRandomSampler, ConcatDataset

# 划分数据集
# 首先确定数据集的大小
dataset_size = len(dataset)
# 然后确定划分比例
valid_split = 0.25
# 计算测试集的大小
valid_size = int(valid_split * dataset_size)
# 计算训练集的大小
train_size = dataset_size - valid_size
# 利用 random_split 函数随机划分数据集
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, valid_size])

# 创建 DataLoader 对象
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
print(len(train_loader))
print(len(test_loader))

10
4


#### 創立model

In [9]:
# Create CNN Model
class CNN_Model(nn.Module):
    def __init__(self):
        super(CNN_Model, self).__init__()
        # Convolution 1 , input_shape=(3,128,480)
        self.cnn1 = nn.Conv2d(in_channels=3, out_channels=25, kernel_size=3, stride=1, padding=0) #output_shape=(25,126,478)
        self.relu1 = nn.ReLU() # activation
        # Max pool 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=2) #output_shape=(25,63,239)
        # Convolution 2
        self.cnn2 = nn.Conv2d(in_channels=25, out_channels=32, kernel_size=6, stride=1, padding=0) #output_shape=(32,58,234)
        self.relu2 = nn.ReLU() # activation
        # Avg pool 1
        self.avgpool1 = nn.AvgPool2d(kernel_size=2) #output_shape=(32,29,117)
        # Convolution 3
        self.cnn3 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=4, stride=1, padding=0) #output_shape=(32,26,114)
        self.relu3 = nn.ReLU() # activation
        # Max pool 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=2) #output_shape=(32,13,57)
        # Fully connected 1 ,#input_shape=(32*13*57)
        self.fc1 = nn.Linear(32 * 13 * 57, 1024) 
        # Fully connected 2
        self.fc2 = nn.Linear(1024, 64)
        # Fully connected 3
        self.fc3 = nn.Linear(64, 2)
    
    def forward(self, x):
        # Convolution 1
        out = self.cnn1(x)
        out = self.relu1(out)
        # Max pool 1
        out = self.maxpool1(out)
        # Convolution 2 
        out = self.cnn2(out)
        out = self.relu2(out)
        # Avg pool 1
        out = self.avgpool1(out)
        # Convolution 3
        out = self.cnn3(out)
        out = self.relu3(out)
        # Max pool 2 
        out = self.maxpool2(out)
        out = out.view(out.size(0), -1)
        # Linear function (readout)
        out = self.fc1(out)
        out = self.fc2(out)
        out = self.fc3(out)
        return out

In [13]:
import torchinfo
import torchinfo

model = CNN_Model()
print(f"Model's structure:\n {torchinfo.summary(model)}")

Model's structure:
Layer (type:depth-idx)                   Param #
CNN_Model                                --
├─Conv2d: 1-1                            700
├─ReLU: 1-2                              --
├─MaxPool2d: 1-3                         --
├─Conv2d: 1-4                            28,832
├─ReLU: 1-5                              --
├─AvgPool2d: 1-6                         --
├─Conv2d: 1-7                            16,416
├─ReLU: 1-8                              --
├─MaxPool2d: 1-9                         --
├─Linear: 1-10                           24,282,112
├─Linear: 1-11                           65,600
├─Linear: 1-12                           130
Total params: 24,393,790
Trainable params: 24,393,790
Non-trainable params: 0


#### 訓練模型

In [18]:
import time
# Move model to "device".
model = CNN_Model().to(device)
# File name.
modelSaveName = "MyModel.pt"
# Create an optimizer.
optimizer = torch.optim.AdamW(params=model.parameters(), lr=learning_rate)
# Create a loss function
lossFunc = nn.CrossEntropyLoss()

bestValidAcc = 0
for epoch in range(1, num_epochs+1):
  start_time = time.time()  # 計算每個epoch開始的時間
  # Training mode.
  model.train()
  train_loss = 0.0
  for iter, (input, target) in enumerate(train_loader):
    # Move data to "device".
    input = input.to(device)
    target = target.to(device)
    # Forward pass.
    output = model(input)
    # Compute the loss.
    loss=lossFunc(output, target)
    # Clear optimizer gradients.
    optimizer.zero_grad()
    # Loss backward propagation.
    loss.backward()
    # Update all learnable parameters.
    optimizer.step()
    # update training loss
    train_loss += loss.item()*input.size(0)

  if epoch % 5 == 0: 
    # Evaluation mode.
    model.eval()
    valid_loss = 0.0
    with torch.no_grad():
      correctCount=0
      for input, target in train_loader:
        input, target = input.to(device), target.to(device)
        output = model(input).max(1)[1]
        correctCount+=torch.sum(output==target).item()
        trainAcc=correctCount/len(train_loader.dataset)

      correctCount=0
      for input, target in test_loader:
        input, target = input.to(device), target.to(device)
        output = model(input).max(1)[1]
        correctCount+=torch.sum(output==target).item()
        validAcc=correctCount/len(test_loader.dataset)

      # 計算每個樣本的平均損失
      train_loss = train_loss / len(train_loader.dataset)

    end_time = time.time()  # 計算每個epoch結束的時間
    epoch_time = end_time - start_time  # 計算每個epoch所花的時間
    print("Epoch:{}/{}, Time:{:.2f} seconds, Train acc:{}, Train Loss: {:.4f}, Valid acc:{}".format(epoch, num_epochs, epoch_time, trainAcc, train_loss, validAcc))
    if validAcc>bestValidAcc:
      bestValidAcc=validAcc
      torch.save(model, modelSaveName)

  else:
    # 計算每個樣本的平均損失
    train_loss = train_loss / len(train_loader.dataset)
    end_time = time.time()  # 計算每個epoch結束的時間
    epoch_time = end_time - start_time  # 計算每個epoch所花的時間
    print("Epoch:{}/{}, Time:{:.2f} seconds, Train Loss: {:.4f}".format(epoch, num_epochs, epoch_time, train_loss))

print("best valid acc:",bestValidAcc)

Epoch:1/20, Time:18.45 seconds, Train Loss: 0.8888
Epoch:2/20, Time:18.04 seconds, Train Loss: 0.6686
Epoch:3/20, Time:17.23 seconds, Train Loss: 0.6333
Epoch:4/20, Time:17.35 seconds, Train Loss: 0.4957
Epoch:5/20, Time:39.11 seconds, Train acc:0.9811643835616438, Train Loss: 0.2327, Valid acc:0.9948453608247423
Epoch:6/20, Time:17.39 seconds, Train Loss: 0.0638
Epoch:7/20, Time:17.60 seconds, Train Loss: 0.0644
Epoch:8/20, Time:17.04 seconds, Train Loss: 0.3024
Epoch:9/20, Time:17.26 seconds, Train Loss: 0.0391
Epoch:10/20, Time:39.43 seconds, Train acc:0.9982876712328768, Train Loss: 0.0350, Valid acc:0.979381443298969
Epoch:11/20, Time:17.11 seconds, Train Loss: 0.0127
Epoch:12/20, Time:17.64 seconds, Train Loss: 0.0081
Epoch:13/20, Time:17.47 seconds, Train Loss: 0.0042
Epoch:14/20, Time:17.13 seconds, Train Loss: 0.0040
Epoch:15/20, Time:40.94 seconds, Train acc:1.0, Train Loss: 0.0029, Valid acc:0.9896907216494846
Epoch:16/20, Time:17.87 seconds, Train Loss: 0.0015
Epoch:17/20, 

KeyboardInterrupt: 