In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install fvcore

Collecting fvcore
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/50.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting yacs>=0.1.6 (from fvcore)
  Downloading yacs-0.1.8-py3-none-any.whl.metadata (639 bytes)
Collecting iopath>=0.1.7 (from fvcore)
  Downloading iopath-0.1.10.tar.gz (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting portalocker (from iopath>=0.1.7->fvcore)
  Downloading portalocker-3.1.1-py3-none-any.whl.metadata (8.6 kB)
Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Downloading portalocker-3.1.1-py3-none-any.whl (19 kB)
Building wheels for collected packages: fvcore, iop

In [None]:
import torch
import torch.utils.data as data
import torchvision.transforms as transforms
import cv2
import os
import numpy as np

class VideoDataset(data.Dataset):
    def __init__(self, list_file, video_dir, transform=None, num_frames=16, frame_size=(224, 224)):
        self.video_list = self._load_video_list(list_file)
        self.video_dir = video_dir
        self.transform = transform
        self.num_frames = num_frames
        self.frame_size = frame_size

    def _load_video_list(self, list_file):
        video_list = []
        with open(list_file, 'r') as f:
            for line in f:
                file_name, label = line.strip().split()
                video_list.append((file_name, int(label)))
        return video_list

    def _load_video(self, video_path):
        cap = cv2.VideoCapture(video_path)
        frames = []
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.resize(frame, self.frame_size)
            frames.append(frame)
        cap.release()
        return frames

    def _sample_frames(self, frames):
        total_frames = len(frames)
        if total_frames >= self.num_frames:
            indices = np.linspace(0, total_frames - 1, self.num_frames).astype(int)
            sampled_frames = [frames[i] for i in indices]
        else:
            sampled_frames = frames + [frames[-1]] * (self.num_frames - total_frames)
        return sampled_frames

    def __len__(self):
        return len(self.video_list)

    def __getitem__(self, idx):
        video_file, label = self.video_list[idx]
        video_path = os.path.join(self.video_dir, video_file)

        # 비디오 로드 및 프레임 샘플링
        frames = self._load_video(video_path)
        sampled_frames = self._sample_frames(frames)

        # 프레임들을 텐서로 변환 (C, D, H, W) 형태로
        frames_tensor = []
        for frame in sampled_frames:
            frame = frame[..., ::-1].copy()  # BGR → RGB
            frame = np.transpose(frame, (2, 0, 1))  # HWC → CHW
            frame = torch.tensor(frame, dtype=torch.float32).div(255.0)
            frames_tensor.append(frame)

        # (D, C, H, W) → (C, D, H, W)로 변경
        frames_tensor = torch.stack(frames_tensor).permute(1, 0, 2, 3)

        # 데이터 변환 적용
        if self.transform:
            # 각 프레임에 개별적으로 Normalize 적용
            # mean과 std를 (C, 1, 1, 1) 형태로 확장하여 브로드캐스팅
            mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1, 1)
            std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1, 1)
            frames_tensor = (frames_tensor - mean) / std

        return frames_tensor, label

# transform은 이제 Dataset 내에서 처리하므로 None으로 설정
transform = None

# 데이터셋 생성
train_dataset = VideoDataset(
    list_file='/content/drive/MyDrive/urp/train_list.txt',
    video_dir='/content/drive/MyDrive/urp/train_videos',
    transform=transform,
    num_frames=16,
    frame_size=(224, 224)
)

test_dataset = VideoDataset(
    list_file='/content/drive/MyDrive/urp/test_list.txt',
    video_dir='/content/drive/MyDrive/urp/test_videos',
    transform=transform,
    num_frames=16,
    frame_size=(224, 224)
)

# DataLoader 생성
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=8, shuffle=True, num_workers=4
)

test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=8, shuffle=False, num_workers=4
)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from tqdm import tqdm  # tqdm 추가

# 1. 모델 로드 및 수정
model_name = "i3d_r50"
model = torch.hub.load("facebookresearch/pytorchvideo:main", model=model_name, pretrained=True)
model.blocks[-1].proj = nn.Linear(model.blocks[-1].proj.in_features, 4)  # 클래스 4개

# 2. 장치 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# 3. 손실 함수와 옵티마이저
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 4. tqdm이 적용된 학습 함수
def train_epoch(model, loader, optimizer, criterion, epoch):
    model.train()
    total_loss = 0
    correct = 0

    # tqdm으로 래핑하여 진행률 표시
    with tqdm(loader, desc=f'Epoch {epoch+1} [Train]', unit='batch') as t:
        for inputs, targets in t:
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            _, predicted = outputs.max(1)
            correct += predicted.eq(targets).sum().item()

            # 진행바에 현재 loss 표시
            t.set_postfix(loss=loss.item())

    accuracy = 100. * correct / len(loader.dataset)
    print(f'\nTrain Epoch: {epoch+1} | Loss: {total_loss/len(loader):.4f} | Accuracy: {accuracy:.2f}%')

# 5. tqdm이 적용된 평가 함수
def evaluate(model, loader, criterion, epoch):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        # tqdm으로 래핑하여 진행률 표시
        with tqdm(loader, desc=f'Epoch {epoch+1} [Test]', unit='batch') as t:
            for inputs, targets in t:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, targets)

                total_loss += loss.item()
                total += targets.size(0)
                _, predicted = outputs.max(1)
                correct += predicted.eq(targets).sum().item()

                # 진행바에 현재 loss 표시
                t.set_postfix(loss=loss.item(), accuracy=100.*correct/total)

    accuracy = 100. * correct / len(loader.dataset)
    print(f'\nTest Epoch: {epoch+1} | Loss: {total_loss/len(loader):.4f} | Accuracy: {accuracy:.2f}%')

# 6. 데이터 로더 설정
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

# 7. tqdm이 적용된 학습 루프
num_epochs = 10
for epoch in range(num_epochs):
    train_epoch(model, train_loader, optimizer, criterion, epoch)
    evaluate(model, test_loader, criterion, epoch)
    print('-' * 50)

Using cache found in /root/.cache/torch/hub/facebookresearch_pytorchvideo_main
Epoch 1 [Train]: 100%|██████████| 26/26 [05:52<00:00, 13.56s/batch, loss=1.56]



Train Epoch: 1 | Loss: 1.4435 | Accuracy: 20.10%


Epoch 1 [Test]: 100%|██████████| 7/7 [01:52<00:00, 16.04s/batch, accuracy=19.2, loss=1.35]



Test Epoch: 1 | Loss: 3.3119 | Accuracy: 19.23%
--------------------------------------------------


Epoch 2 [Train]: 100%|██████████| 26/26 [05:23<00:00, 12.44s/batch, loss=1.39]



Train Epoch: 2 | Loss: 1.4130 | Accuracy: 19.61%


Epoch 2 [Test]: 100%|██████████| 7/7 [01:15<00:00, 10.80s/batch, accuracy=25, loss=1.3]



Test Epoch: 2 | Loss: 1.5106 | Accuracy: 25.00%
--------------------------------------------------


Epoch 3 [Train]: 100%|██████████| 26/26 [05:23<00:00, 12.46s/batch, loss=1.36]



Train Epoch: 3 | Loss: 1.3901 | Accuracy: 28.92%


Epoch 3 [Test]: 100%|██████████| 7/7 [01:16<00:00, 10.93s/batch, accuracy=19.2, loss=1.53]



Test Epoch: 3 | Loss: 1.4029 | Accuracy: 19.23%
--------------------------------------------------


Epoch 4 [Train]: 100%|██████████| 26/26 [05:20<00:00, 12.34s/batch, loss=1.49]



Train Epoch: 4 | Loss: 1.3914 | Accuracy: 24.02%


Epoch 4 [Test]: 100%|██████████| 7/7 [01:16<00:00, 10.91s/batch, accuracy=25, loss=1.9]



Test Epoch: 4 | Loss: 1.6816 | Accuracy: 25.00%
--------------------------------------------------


Epoch 5 [Train]:  54%|█████▍    | 14/26 [03:10<02:43, 13.61s/batch, loss=1.39]


KeyboardInterrupt: 