# Import Library

In [None]:
import cv2
import glob
import numpy as np
import matplotlib.pyplot as plt
from skimage.transform import resize
from IPython.display import clear_output
from matplotlib.pyplot import imshow
import pandas as pd
from sklearn.model_selection import train_test_split
import datetime
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchinfo import summary
from tqdm import tqdm
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from torchvision.transforms import ToTensor
import wandb
from sklearn.metrics import confusion_matrix
import random
import os
from sklearn.preprocessing import OneHotEncoder
from torchvision import transforms

  check_for_updates()


In [2]:
import kagglehub

path = kagglehub.dataset_download("pypiahmad/realistic-action-recognition-ucf50-dataset")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/realistic-action-recognition-ucf50-dataset


In [3]:
output_file = "/kaggle/working/train.txt"
ucf11 = r"/kaggle/input/realistic-action-recognition-ucf50-dataset/UCF11_updated_mpg"
ucf101 = "/kaggle/input/ucf101/UCF101"
with open(output_file, "w") as f:
    x = -1
    for action_folder in os.listdir(ucf11):
        action_folder_path = os.path.join(ucf11, action_folder)
        x +=1
        if os.path.isdir(action_folder_path):
            print(action_folder_path)
            for group_folder in os.listdir(action_folder_path):
                
                group_folder_path = os.path.join(action_folder_path, group_folder)
                
                if os.path.isdir(group_folder_path):
                    # Duyệt qua các video trong nhóm
                    for video_file in os.listdir(group_folder_path):
                        video_file_path = os.path.join(group_folder_path, video_file)
                        if video_file.endswith(".mpg") or video_file.endswith(".avi"):
                            f.write(f"{video_file_path} {x}\n")

/kaggle/input/realistic-action-recognition-ucf50-dataset/UCF11_updated_mpg/biking
/kaggle/input/realistic-action-recognition-ucf50-dataset/UCF11_updated_mpg/trampoline_jumping
/kaggle/input/realistic-action-recognition-ucf50-dataset/UCF11_updated_mpg/swing
/kaggle/input/realistic-action-recognition-ucf50-dataset/UCF11_updated_mpg/walking
/kaggle/input/realistic-action-recognition-ucf50-dataset/UCF11_updated_mpg/golf_swing
/kaggle/input/realistic-action-recognition-ucf50-dataset/UCF11_updated_mpg/soccer_juggling
/kaggle/input/realistic-action-recognition-ucf50-dataset/UCF11_updated_mpg/tennis_swing
/kaggle/input/realistic-action-recognition-ucf50-dataset/UCF11_updated_mpg/volleyball_spiking
/kaggle/input/realistic-action-recognition-ucf50-dataset/UCF11_updated_mpg/basketball
/kaggle/input/realistic-action-recognition-ucf50-dataset/UCF11_updated_mpg/horse_riding
/kaggle/input/realistic-action-recognition-ucf50-dataset/UCF11_updated_mpg/diving


In [4]:
train_df = pd.read_csv("train.txt",sep = " ",header = None,names = ['path','class'])
train_df['path'] = train_df['path'].str.replace(ucf11, '', regex=False).str.lstrip("\\")
train_df

Unnamed: 0,path,class
0,/biking/v_biking_05/v_biking_05_05.mpg,0
1,/biking/v_biking_05/v_biking_05_04.mpg,0
2,/biking/v_biking_05/v_biking_05_08.mpg,0
3,/biking/v_biking_05/v_biking_05_01.mpg,0
4,/biking/v_biking_05/v_biking_05_02.mpg,0
...,...,...
1595,/diving/v_diving_21/v_diving_21_02.mpg,10
1596,/diving/v_diving_18/v_diving_18_04.mpg,10
1597,/diving/v_diving_18/v_diving_18_01.mpg,10
1598,/diving/v_diving_18/v_diving_18_03.mpg,10


In [5]:
class_counts = train_df['class'].value_counts()
print(class_counts)

class
9     198
6     167
10    156
5     156
0     145
4     142
8     141
2     137
3     123
1     119
7     116
Name: count, dtype: int64


In [None]:
def prepareData2(video_path, _resize, num_classes, num_frames):
    content = []
    
    for i in range(6,num_classes):
        df_temp = train_df[train_df['class'] == i]  
        if not df_temp.empty: 
            path = df_temp['path'].tolist() 
            content.extend(path) 
        else:
            print(f"Class {i} fail")

    if len(content) == 0:
        raise ValueError("None video found!")

    content = np.array(content) 
    videos = []

    for j in range(len(content)):
        print(f"Processing {np.round(100 * j / len(content), 3)}%: {content[j]}")
        x = video_path + '/' + content[j]
        vcap = cv2.VideoCapture(x)
        total_frames = int(vcap.get(cv2.CAP_PROP_FRAME_COUNT))

        if total_frames < num_frames:
            print("total frames < num frames")
            selected_indices = range(total_frames)  
        else:
            selected_indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)  

        frames = []
        count = 0
        success = True
        while success:
            success, image = vcap.read()
            if not success:
                break
            if count in selected_indices:
                try:
                    image = resize(image, _resize)  
                    frames.append(image)
                except Exception as e:
                    print(f"Error in frame {count} in video {content[j]}: {e}")
            count += 1

        if len(frames) == num_frames: 
            videos.append(frames)
        else:
            print(f"Video {content[j]} not enough frame")

        vcap.release()
    videos = np.array(videos)
    print(f"Shape của videos: {videos.shape}")

    y = []
    for i in range(6, num_classes):
        count_videos = len(train_df[train_df['class'] == i])
        y.extend([i] * count_videos)
    y = np.array(y[:len(videos)]) 
    print(f"Shape của labels: {y.shape}")
    return videos, y


In [None]:
def prepareData(video_path, _resize, num_classes, num_frames):
    content = []
    
    for i in range(num_classes):
        df_temp = train_df[train_df['class'] == i]  
        if not df_temp.empty: 
            path = df_temp['path'].tolist() 
            content.extend(path)  
        else:
            print(f"Class {i} fail")

    content = np.array(content)  
    videos = []

    for j in range(len(content)):
        
        print(np.round(100 * j / len(content), 3))
        x = video_path + '/' + content[j]
        print(x)
        vcap = cv2.VideoCapture(x)
        total_frames = int(vcap.get(cv2.CAP_PROP_FRAME_COUNT))
        selected_indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)
        success = True

        frames = []
        count = 0
        while success:
            try:
                success, image = vcap.read()
                count += 1
                if count in selected_indices:
                    print(image.shape)
                    image = resize(image, _resize)
                    print(image.shape)
                    frames.append(image)
                    print(f"Number of images per frame: {len(frames)}")
            except Exception as e:
                print(e)

        videos.append(frames)
        print(f"Number of videos: {len(videos)}")
    vcap.release()

    videos = np.array(videos)
    print(videos.shape)
    videos = videos.reshape((videos.shape[0], 3, videos.shape[1], videos.shape[2], videos.shape[3]))

    y = np.array([i for i in range(num_classes) for _ in range(len(train_df[train_df['class'] == i]))])
    y = y.reshape(-1)
    print(y.shape)
    return videos, y

In [8]:
VIDEO_PATH = ucf11
RESIZE = (64,64)
NUM_CLASSES = 101
NUM_FRAMES = 17
BATCH_SIZE = 16
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {DEVICE}")

Using device: cuda


In [None]:
class PrepareData(Dataset):
    def __init__(self, data, target, transform=None):
        self.data = data
        self.target = target
        self.transform = transform
        self.target = target

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        data = self.data[index]
        target = self.target[index]
    
        if isinstance(data, torch.Tensor):
            data = data.numpy()
        elif not isinstance(data, np.ndarray):
            data = np.array(data)
    
        data = torch.tensor(data, dtype=torch.float)
    
        if self.transform:
            data = self.transform(data) 
        data = data.squeeze()
        return data, target

In [10]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("wandb_api_key")


In [11]:
import wandb
wandb.login(key=secret_value_0)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [12]:
wandb.init(
    project = "Project_DL"
)

[34m[1mwandb[0m: Currently logged in as: [33mnguyenhoangson_hsgs19[0m ([33mnguyenhoangson_hsgs19-hanoi-university-of-science-and-te[0m). Use [1m`wandb login --relogin`[0m to force relogin


# Prepare Data

In [16]:
data1, targets1 = prepareData(video_path=VIDEO_PATH,
                            _resize=RESIZE,
                            num_classes=6,
                            num_frames=NUM_FRAMES)

0.0
/kaggle/input/realistic-action-recognition-ucf50-dataset/UCF11_updated_mpg//biking/v_biking_05/v_biking_05_05.mpg
(240, 320, 3)
(64, 64, 3)
Number of images per frame: 1
(240, 320, 3)
(64, 64, 3)
Number of images per frame: 2
(240, 320, 3)
(64, 64, 3)
Number of images per frame: 3
(240, 320, 3)
(64, 64, 3)
Number of images per frame: 4
(240, 320, 3)
(64, 64, 3)
Number of images per frame: 5
(240, 320, 3)
(64, 64, 3)
Number of images per frame: 6
(240, 320, 3)
(64, 64, 3)
Number of images per frame: 7
(240, 320, 3)
(64, 64, 3)
Number of images per frame: 8
(240, 320, 3)
(64, 64, 3)
Number of images per frame: 9
(240, 320, 3)
(64, 64, 3)
Number of images per frame: 10
(240, 320, 3)
(64, 64, 3)
Number of images per frame: 11
(240, 320, 3)
(64, 64, 3)
Number of images per frame: 12
(240, 320, 3)
(64, 64, 3)
Number of images per frame: 13
(240, 320, 3)
(64, 64, 3)
Number of images per frame: 14
(240, 320, 3)
(64, 64, 3)
Number of images per frame: 15
(240, 320, 3)
(64, 64, 3)
Number of 

In [17]:
print(data1.shape)
print(targets1.shape)

(822, 3, 16, 64, 64)
(822,)


In [15]:
# np.save('/kaggle/working/data1.npy', data1)  
# np.save('/kaggle/working/targets1.npy', targets1)

In [18]:
data2, targets2 = prepareData2(video_path=VIDEO_PATH,
                            _resize=RESIZE,
                            num_classes=11,
                            num_frames=16)


Processing 0.0%: /tennis_swing/v_tennis_14/v_tennis_14_05.mpg
Processing 0.129%: /tennis_swing/v_tennis_14/v_tennis_14_06.mpg
Processing 0.257%: /tennis_swing/v_tennis_14/v_tennis_14_02.mpg
Processing 0.386%: /tennis_swing/v_tennis_14/v_tennis_14_03.mpg
Processing 0.514%: /tennis_swing/v_tennis_14/v_tennis_14_07.mpg
Processing 0.643%: /tennis_swing/v_tennis_14/v_tennis_14_04.mpg
Processing 0.771%: /tennis_swing/v_tennis_14/v_tennis_14_01.mpg
Processing 0.9%: /tennis_swing/v_tennis_05/v_tennis_05_05.mpg
Processing 1.028%: /tennis_swing/v_tennis_05/v_tennis_05_06.mpg
Processing 1.157%: /tennis_swing/v_tennis_05/v_tennis_05_07.mpg
Processing 1.285%: /tennis_swing/v_tennis_05/v_tennis_05_04.mpg
Processing 1.414%: /tennis_swing/v_tennis_05/v_tennis_05_02.mpg
Processing 1.542%: /tennis_swing/v_tennis_05/v_tennis_05_03.mpg
Processing 1.671%: /tennis_swing/v_tennis_05/v_tennis_05_01.mpg
Processing 1.799%: /tennis_swing/v_tennis_23/v_tennis_23_05.mpg
Processing 1.928%: /tennis_swing/v_tennis_23

In [20]:
print(data2.shape)
print(targets2.shape)

(775, 16, 64, 64, 3)
(775,)


In [21]:
data2 = np.transpose(data2, (0, 4, 1, 2, 3))
data2.shape

(775, 3, 16, 64, 64)

In [22]:
print(data1.shape)
print(data2.shape)

(822, 3, 16, 64, 64)
(775, 3, 16, 64, 64)


In [None]:
data = np.concatenate((data1, data2), axis=0)
targets = np.concatenate((targets1, targets2), axis=0)
print(data.shape)
print(targets.shape)

In [None]:
classes, counts = np.unique(targets, return_counts=True)

for class_id, count in zip(classes, counts):
    print(f"Class {class_id}: {count} samples")

In [None]:
class_counts = train_df['class'].value_counts()
print(class_counts)

In [None]:
class VideoNormalize:
    def __init__(self, mean, std):

        self.mean = torch.tensor(mean).view(1, -1, 1, 1, 1) 
        self.std = torch.tensor(std).view(1, -1, 1, 1, 1)

    def __call__(self, x):
        x = x.float()
        return (x - self.mean) / self.std


In [None]:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

class ScaleTransform:
    def __call__(self, x):
        return x / 255.0
transform = transforms.Compose([
    ScaleTransform(),
    transforms.RandomHorizontalFlip(p=0.3) 
])

In [27]:
X_train, X_test, y_train, y_test = train_test_split(data, targets, shuffle=True, test_size = 0.2, random_state = 42, stratify= targets)


In [28]:
print(2)
train_dataset = PrepareData(X_train, y_train, transform=transform)
test_dataset = PrepareData(X_test, y_test, transform=transform)
print(1)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=True)

2
1


# Load Dataset

*Run from here***** 

In [30]:
# import torch
# torch.save(train_dataset, '/kaggle/working/train_dataset.pt')
# torch.save(test_dataset, '/kaggle/working/test_dataset.pt')

In [31]:
data, target = next(iter(train_loader))

print("Data shape:", data.shape)
print("Target shape:", target.shape)

Data shape: torch.Size([16, 3, 16, 64, 64])
Target shape: torch.Size([16])


# Create Model

In [33]:
class Conv3D(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(Conv3D, self).__init__()
        self.conv = nn.Conv3d(in_channels=in_channels, out_channels=out_channels, kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1), bias=False)
        self.bn = nn.BatchNorm3d(out_channels)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        return x

In [None]:
class ResBlock3D(nn.Module):
    def __init__(self, in_channels, mid_channels, out_channels, stride=1):
        super(ResBlock3D, self).__init__()
        self.conv1 = nn.Conv3d(in_channels, mid_channels, kernel_size=(1, 1, 1), stride=1, bias=False)
        self.bn1 = nn.BatchNorm3d(mid_channels)
        self.conv2 = nn.Conv3d(mid_channels, mid_channels, kernel_size=(3, 3, 3), stride=stride, padding=(1, 1, 1), bias=False)
        self.bn2 = nn.BatchNorm3d(mid_channels)
        self.conv3 = nn.Conv3d(mid_channels, out_channels, kernel_size=(1, 1, 1), stride=1, bias=False)
        self.bn3 = nn.BatchNorm3d(out_channels)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv3d(in_channels, out_channels, kernel_size=(1, 1, 1), stride=stride, bias=False),
                nn.BatchNorm3d(out_channels)
            )
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


In [None]:
class Bottleneck3D(nn.Module):
    def __init__(self, in_channels, mid_channels, out_channels, stride=1):
        super(Bottleneck3D, self).__init__()
        self.conv1 = nn.Conv3d(in_channels, mid_channels, kernel_size=1, stride=1, bias=False)
        self.bn1 = nn.BatchNorm3d(mid_channels)
        self.conv2 = nn.Conv3d(mid_channels, mid_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm3d(mid_channels)
        self.conv3 = nn.Conv3d(mid_channels, out_channels, kernel_size=1, stride=1, bias=False)
        self.bn3 = nn.BatchNorm3d(out_channels)
        self.relu = nn.ReLU(inplace=True)

        self.downsample = None
        if stride != 1 or in_channels != out_channels:
            self.downsample = nn.Sequential(
                nn.Conv3d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm3d(out_channels)
            )

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)
        out += identity
        out = self.relu(out)
        return out

In [None]:
class SEBlock3D(nn.Module):
    def __init__(self, channels, reduction=16):
        super(SEBlock3D, self).__init__()
        self.fc1 = nn.Linear(channels, channels // reduction)
        self.fc2 = nn.Linear(channels // reduction, channels)

    def forward(self, x):
        B, C, T, H, W = x.size()
        y = x.mean(dim=(2, 3, 4))  
        y = torch.relu(self.fc1(y))
        y = torch.sigmoid(self.fc2(y))
        y = y.view(B, C, 1, 1, 1)
        return x * y

In [None]:
class BottleneckWithSE3D(Bottleneck3D):
    def __init__(self, in_channels, mid_channels, out_channels, stride=1):
        super().__init__(in_channels, mid_channels, out_channels, stride)
        self.se = SEBlock3D(out_channels)

    def forward(self, x):
        out = super().forward(x)
        out = self.se(out)
        return out

In [None]:
class Res1(nn.Module): # base model
    def __init__(self):
        super(Res1, self).__init__()
        self.layer = self._make_layer(in_channels=32, mid_channels=72, out_channels=162, num_blocks=5, stride=2)

    def _make_layer(self, in_channels, mid_channels, out_channels, num_blocks, stride):
        layers = []
        for i in range(num_blocks):
            if i == 0:
                layers.append(ResBlock3D(in_channels, mid_channels, out_channels, stride))
            else:
                layers.append(ResBlock3D(out_channels, mid_channels, out_channels, stride=1))
        return nn.Sequential(*layers)
    def forward(self, x):
        return self.layer(x)
class Res2(nn.Module):
    def __init__(self):
        super(Res2, self).__init__()
        self.layer = self._make_layer(in_channels=32, mid_channels=80, out_channels=200, num_blocks=10, stride=2)

    def _make_layer(self, in_channels, mid_channels, out_channels, num_blocks, stride):
        layers = []
        for i in range(num_blocks):
            if i == 0:
                layers.append(ResBlock3D(in_channels, mid_channels, out_channels, stride))
            else:
                layers.append(ResBlock3D(out_channels, mid_channels, out_channels, stride=1))
        return nn.Sequential(*layers)

    def forward(self, x):
        return self.layer(x)
class Res3(nn.Module):
    def __init__(self):
        super(Res3, self).__init__()
        self.layer = self._make_layer(in_channels=32, mid_channels=64, out_channels=128, num_blocks=5, stride=2)

    def _make_layer(self, in_channels, mid_channels, out_channels, num_blocks, stride):
        layers = []
        for i in range(num_blocks):
            if i == 0:
                layers.append(ResBlock3D(in_channels, mid_channels, out_channels, stride))
            else:
                layers.append(ResBlock3D(out_channels, mid_channels, out_channels, stride=1))
        return nn.Sequential(*layers)
    def forward(self, x):
        return self.layer(x)
class Res4(nn.Module):
    def __init__(self):
        super(Res4, self).__init__()
        self.layer = self._make_layer(in_channels=32, mid_channels=72, out_channels=162, num_blocks=5, stride=2)

    def _make_layer(self, in_channels, mid_channels, out_channels, num_blocks, stride):
        layers = []
        for i in range(num_blocks):
            if i == 0:
                layers.append(BottleneckWithSE3D(in_channels, mid_channels, out_channels, stride))
            else:
                layers.append(BottleneckWithSE3D(out_channels, mid_channels, out_channels, stride=1))
        return nn.Sequential(*layers)

    def forward(self, x):
        return self.layer(x)


In [None]:
class ChannelWise3D(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ChannelWise3D, self).__init__()
        self.conv3d = nn.Conv3d(  in_channels=in_channels,out_channels=out_channels,kernel_size=(1, 1, 1), stride=(1, 1, 1),    padding=(0, 0, 0),    bias=False)
        self.bn = nn.BatchNorm3d(out_channels)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv3d(x)
        x = self.bn(x)
        x = self.relu(x)
        return x

In [None]:
class DL_Project7(nn.Module):
    def __init__(self, num_classes):
        super(DL_Project7, self).__init__()
        self.conv1 = Conv3D(in_channels=3, out_channels=32) 
        self.res1 = Res4() # change to Res1(), Res2(), Res3() if you want to test
        self.conv2 = ChannelWise3D(in_channels=162, out_channels=128)
        self.globalaveragepooling = nn.AdaptiveAvgPool3d(1) 
        self.fc1 = nn.Linear(in_features=128, out_features=64) 
        self.fc2 = nn.Linear(in_features=64, out_features=num_classes)
     
        self.dropout2 = nn.Dropout(p=0.5) 
    def forward(self, x):
        x = self.conv1(x)
        x = self.res1(x) 
        x = self.conv2(x)
        x = self.globalaveragepooling(x) 
        x = x.view(x.size(0), -1) 
       

        x = self.fc1(x) 
        x = self.dropout2(x)
        x = self.fc2(x)
        return x


In [None]:
model = DL_Project7(NUM_CLASSES)

# Check output

In [80]:

b, seq_len, c, h, w = 8, 3, 16, 64,64  
input_tensor = torch.randn(b, seq_len, c, h, w)

print("Input shape:", input_tensor.shape)

output = model(input_tensor)
print("Output shape:", output.shape)

Input shape: torch.Size([8, 3, 16, 64, 64])
Output shape: torch.Size([8, 11])


In [81]:
NUM_EPOCHS = 50
LEARNING_RATE = 0.0001
CHECKPOINT_PATH = "/kaggle/working/model.pth"
project = "Project_DL"
resume = 'allow'
class_weights = torch.tensor([2.5 if i in [0, 3] else 1.0 for i in range(NUM_CLASSES)], dtype=torch.float32).to(DEVICE)
loss_fn = nn.CrossEntropyLoss(
    weight=class_weights.clone().detach(),
    label_smoothing=0.1
)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE,weight_decay= 0.0001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=4)
scheduler2 = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)


In [82]:
wandb.finish()

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
learning_rate,███████████████████▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▁▁▁▁▁
train_accuracy,▁▂▃▃▃▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇███████████████████
train_loss,█▇▆▆▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▄▄▅▅▅▆▆▆▆▆▇▇▇▆▇▇▆▇▇████████████████████
val_loss,█▅▄▄▃▄▂▂▂▂▂▂▂▂▁▁▂▂▂▁▁▁▁▂▂▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁

0,1
epoch,50.0
learning_rate,0.0
train_accuracy,99.13861
train_loss,0.63761
val_accuracy,85.0
val_loss,1.08151


In [None]:
wandb.init(
    project=project,
    resume= resume,
    config={
        "learning_rate": LEARNING_RATE,
        "epochs": NUM_EPOCHS,
        "batch_size": BATCH_SIZE,
    },
    name="Project_DL_model_9"
)

In [85]:

def train_and_validate(model, train_loader, val_loader, optimizer, criterion, scheduler, epochs):
    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []
    
    best_val_loss = float('inf') 
    model = model.to(DEVICE)
    for epoch in range(epochs):
        print(sum(p.numel() for p in model.parameters()))
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        # Training loop
        for images, labels in train_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE).long()  
            optimizer.zero_grad() 
            outputs = model(images) 
            loss = criterion(outputs, labels)  
            loss.backward() 
            optimizer.step()  
            running_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_accuracy = 100 * correct / total
        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)

        # Validation loop
        model.eval()
        val_running_loss = 0.0
        val_correct = 0
        val_total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(DEVICE), labels.to(DEVICE).long()
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_running_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_loss = val_running_loss / len(val_loader)
        val_accuracy = 100 * val_correct / val_total
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)

        scheduler.step(val_loss) # cho scheduler
        # scheduler.step() # cho scheduler2

        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_accuracy": train_accuracy,
            "val_loss": val_loss,
            "val_accuracy": val_accuracy,
            "learning_rate": optimizer.param_groups[0]["lr"]
        })

        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(),"model.pth")
            wandb.save("model.pth")

        print(f"Epoch [{epoch + 1}/{epochs}]")
        print(f"    Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%")
        print(f"    Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")

    return train_losses, val_losses, train_accuracies, val_accuracies

# Train Model

In [86]:
train_and_validate(model, train_loader, test_loader, optimizer, loss_fn, scheduler, NUM_EPOCHS)

863639
Epoch [1/50]
    Train Loss: 2.2372, Train Accuracy: 19.03%
    Val Loss: 2.3757, Val Accuracy: 13.75%
863639
Epoch [2/50]
    Train Loss: 2.0241, Train Accuracy: 27.80%
    Val Loss: 1.8762, Val Accuracy: 38.12%
863639
Epoch [3/50]
    Train Loss: 1.8819, Train Accuracy: 34.14%
    Val Loss: 1.8235, Val Accuracy: 44.69%
863639
Epoch [4/50]
    Train Loss: 1.7518, Train Accuracy: 40.64%
    Val Loss: 1.5424, Val Accuracy: 50.00%
863639
Epoch [5/50]
    Train Loss: 1.6748, Train Accuracy: 46.44%
    Val Loss: 1.5193, Val Accuracy: 52.50%
863639
Epoch [6/50]
    Train Loss: 1.5956, Train Accuracy: 48.47%
    Val Loss: 1.3993, Val Accuracy: 56.88%
863639
Epoch [7/50]
    Train Loss: 1.5582, Train Accuracy: 51.53%
    Val Loss: 1.4091, Val Accuracy: 54.06%
863639
Epoch [8/50]
    Train Loss: 1.4564, Train Accuracy: 57.17%
    Val Loss: 1.3989, Val Accuracy: 58.44%
863639
Epoch [9/50]
    Train Loss: 1.4367, Train Accuracy: 56.15%
    Val Loss: 1.3741, Val Accuracy: 58.44%
863639
Epo

([2.2372146040201186,
  2.0240764051675795,
  1.881879797577858,
  1.7517979711294174,
  1.6748097971081735,
  1.5956433773040772,
  1.5581915453076363,
  1.4563925981521606,
  1.4366832822561264,
  1.3898507088422776,
  1.359531431645155,
  1.274062331020832,
  1.2256345458328723,
  1.1847680047154427,
  1.1655121803283692,
  1.1031337812542916,
  1.0321773804724217,
  1.0423201702535152,
  0.9835062317550183,
  0.9881703376770019,
  0.9600533574819565,
  0.9112992726266385,
  0.9081620424985886,
  0.8945342361927032,
  0.871157082170248,
  0.8461157143115997,
  0.8182184092700482,
  0.7598855376243592,
  0.7802198581397534,
  0.7513516798615456,
  0.7364035062491894,
  0.7654045417904853,
  0.7541856452822685,
  0.7311099246144295,
  0.7079723540693521,
  0.6806116320192814,
  0.6455041497945786,
  0.664447533339262,
  0.6492742396891117,
  0.6562551192939281,
  0.6442303784191609,
  0.6302946213632822,
  0.6443831607699394,
  0.6405522886663675,
  0.6339060012251139,
  0.63868442848