In [1]:
import numpy as np 
import PIL as Image
import torch 
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [12]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # resize ảnh về kích thước cố định
    transforms.ToTensor(),          # chuyển ảnh thành tensor [0,1]
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # chuẩn hóa
])

train_dataset = datasets.ImageFolder(root=r'D:\classified_music\mel-images\train', transform=transform)
val_dataset = datasets.ImageFolder(root=r'D:\classified_music\mel-images\val', transform=transform)

In [13]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)


In [15]:

image,label = train_dataset[0]

In [16]:
image.size()

torch.Size([3, 224, 224])

In [17]:
class_names = train_dataset.classes
print(class_names)


['TruTinh', 'bolero', 'cailuong', 'cheo', 'danca']


In [39]:
class NeueralNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 12, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(12, 24, 5)
        self.fc1 = nn.Linear(24 * 53 * 53, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 7)  # <--- CHỈNH SỬA Ở ĐÂY

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [40]:
net  = NeueralNet()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001,momentum=0.9)


In [43]:
for epoch in range(30):
    print(f"Training epoch {epoch}.....")
    runing_loss = 0.0
    for i,data in enumerate(train_loader):
        inputs,label = data
        optimizer.zero_grad()
        out_put = net(inputs)
        loss= loss_function(out_put,label)
        loss.backward()
        optimizer.step()
        running_loss = loss.item()
        
    print(f'Loss:{running_loss/len(train_loader):.4f}')

Training epoch 0.....
Loss:0.0147
Training epoch 1.....
Loss:0.0110
Training epoch 2.....
Loss:0.0116
Training epoch 3.....
Loss:0.0104
Training epoch 4.....
Loss:0.0057
Training epoch 5.....
Loss:0.0057
Training epoch 6.....
Loss:0.0065
Training epoch 7.....
Loss:0.0066
Training epoch 8.....
Loss:0.0025
Training epoch 9.....
Loss:0.0057
Training epoch 10.....
Loss:0.0035
Training epoch 11.....
Loss:0.0022
Training epoch 12.....
Loss:0.0012
Training epoch 13.....
Loss:0.0011
Training epoch 14.....
Loss:0.0011
Training epoch 15.....
Loss:0.0005
Training epoch 16.....
Loss:0.0004
Training epoch 17.....
Loss:0.0002
Training epoch 18.....
Loss:0.0002
Training epoch 19.....
Loss:0.0003
Training epoch 20.....
Loss:0.0000
Training epoch 21.....
Loss:0.0000
Training epoch 22.....
Loss:0.0001
Training epoch 23.....
Loss:0.0000
Training epoch 24.....
Loss:0.0000
Training epoch 25.....
Loss:0.0000
Training epoch 26.....
Loss:0.0000
Training epoch 27.....
Loss:0.0000
Training epoch 28.....
Loss:0.