# Fine-Tuning Pretrained SlowFast Model on HEROES Dataset

In [None]:
# Google Drive 마운트 및 데이터셋 압축 해제
from google.colab import drive
import zipfile
import os

# Google Drive 마운트
drive.mount('/content/drive')

# 압축 파일 경로와 해제할 디렉토리 경로 지정
zip_file_path = '/content/drive/MyDrive/HEROES.zip'
extract_dir = '/content/drive/MyDrive/URP/dataset'

# 압축 파일 해제
'''
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)
    '''

In [None]:
!apt update
!apt install -y ffmpeg

In [None]:
!pip install av

In [None]:
!pip install pytorchvideo

In [None]:
# 데이터셋 분리 및 로드
import os
import random
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import torchvision.io as io
import torch
import torch.nn as nn
import pytorchvideo.models as models
import torch.optim as optim
import torch.nn.functional as F

# 데이터셋 디렉토리 설정
data_dir = '/content/drive/MyDrive/URP/dataset/HEROES'
emotions = ['BOREDOM', 'DISGUST', 'HAPPINESS', 'INTEREST']

# 데이터셋 파일 경로와 라벨 수집
samples = []
for emotion in emotions:
    emotion_dir = os.path.join(data_dir, emotion)
    for id_folder in os.listdir(emotion_dir):
        id_dir = os.path.join(emotion_dir, id_folder)
        for file_name in os.listdir(id_dir):
            file_path = os.path.join(id_dir, file_name)
            samples.append((file_path, emotions.index(emotion)))

# Train, Test 데이터셋 분리 (80% Train, 20% Test)
train_samples, test_samples = train_test_split(samples, test_size=0.2, random_state=42)

# Validation 데이터셋 분리 (Train 데이터의 12.5%를 Validation 데이터셋으로 사용)
train_samples, val_samples = train_test_split(train_samples, test_size=0.125, random_state=42)

print(f"Train samples: {len(train_samples)}")
print(f"Validation samples: {len(val_samples)}")
print(f"Test samples: {len(test_samples)}")

In [None]:
# 데이터셋 클래스 정의
class EmotionDataset(Dataset):
    def __init__(self, samples, transform=None):
        self.samples = samples
        self.transform = transform

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        file_path, label = self.samples[idx]
        video, _, _ = io.read_video(file_path, pts_unit='sec')
        if self.transform:
            video = self.transform(video)
        return video, label

# 데이터셋 변환 설정
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor()
])

# 데이터셋 로드
train_dataset = EmotionDataset(train_samples, transform=transform)
val_dataset = EmotionDataset(val_samples, transform=transform)
test_dataset = EmotionDataset(test_samples, transform=transform)

# DataLoader 설정
batch_size = 8
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# Kinetics400으로 사전학습된 SlowFast R50 모델 불러오기 및 수정
model = models.create_slowfast(model_num_class=4)

# 손실 함수 및 옵티마이저 설정
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# GPU 사용 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 모델을 GPU로 이동
model = model.to(device)

# 파인튜닝 함수 정의
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=1):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # 데이터를 GPU로 이동
            inputs = inputs.permute(0, 2, 1, 3, 4)  # 배치, 채널, 시간, 높이, 너비 순서로 변경
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        val_loss = 0.0
        model.eval()
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)  # 데이터를 GPU로 이동
                inputs = inputs.permute(0, 2, 1, 3, 4)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {running_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}")

# 파인튜닝 실행
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10)

In [None]:
# 성능 평가 함수 정의
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.permute(0, 2, 1, 3, 4)  # 배치, 채널, 시간, 높이, 너비 순서로 변경
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

# 성능 평가 실행
accuracy = evaluate_model(model, test_loader)
print(f"Accuracy on benchmark dataset: {accuracy * 100:.2f}%")