## Import

In [1]:
import sklearn
import random
import numpy as np
import os
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms


import torchvision.models as models

from tqdm.auto import tqdm
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import train_test_split

from sklearn.metrics import f1_score

import warnings
warnings.filterwarnings(action='ignore') 
import wandb

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

## Hyperparameter Setting

In [3]:
CFG = {
    'VIDEO_LENGTH':50, # 10프레임 * 5초
    'EPOCHS':100,
    'LEARNING_RATE':3e-4,
    'BATCH_SIZE':4,
    'SEED':42,
    'TRAIN_DIR':'./data_new/train',
    'TEST_DIR':'./data_new/test'
}

In [4]:
wandb.init(project="thermal_fall_new", config=CFG)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mis-jang[0m ([33mis-jang-pusan-national-university[0m). Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: 500 encountered ({"errors":[{"message":"context deadline exceeded","path":["project"]}],"data":{"project":null}}), retrying request
[34m[1mwandb[0m: Network error resolved after 0:01:03.020841, resuming normal operation.


## Fixed RandomSeed

In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## Data Load

## Train / Validation Split

## CustomDataset

In [6]:

class CustomDataset(Dataset):
    def __init__(self, root_dir):
        self.root_dir = root_dir
        self.classes = os.listdir(root_dir)
        self.num_frames = 50

        self.video_paths = []
        self.labels = []

        for label, cls in enumerate(self.classes):
            cls_path = os.path.join(root_dir, cls)
            video_files = os.listdir(cls_path)
            for video_file in video_files:
                video_path = os.path.join(cls_path, video_file)
                self.video_paths.append(video_path)
                # # print(video_path + str(label)) # no 0 good 1 fall 2
                # if label == 0:
                #     print("bin")
                #     label = 1
                self.labels.append(label)

    def __len__(self):
        return len(self.video_paths)

    def __getitem__(self, idx):
        video_path = self.video_paths[idx]
        frames = sorted(os.listdir(video_path))[:self.num_frames]

        video_frames = []
        for frame in frames:
            img_path = os.path.join(video_path, frame)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (64, 64))
            img = img / 255.0

            angle = random.uniform(-10, 10)
            M = cv2.getRotationMatrix2D((img.shape[1] / 2, img.shape[0] / 2), angle, 1)
            img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))
            
            video_frames.append(img)
        
        video_frames = np.stack(video_frames)
        video_frames = np.expand_dims(video_frames, axis=0)
        video_frames = torch.FloatTensor(video_frames)

        label = self.labels[idx]
        label = torch.tensor(label, dtype=torch.long)

        return video_frames, label


In [7]:

# Dataset 및 타겟 정의
train_dataset = CustomDataset(CFG['TRAIN_DIR'])
targets = train_dataset.labels

# StratifiedShuffleSplit 사용
stratified_split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=CFG['SEED'])

# StratifiedShuffleSplit은 인덱스를 반환하므로 이를 활용해 train/val 인덱스 분리
for train_idx, val_idx in stratified_split.split(train_dataset, targets):
    train_dataset_split = Subset(train_dataset, train_idx)
    val_dataset_split = Subset(train_dataset, val_idx)

# DataLoader 설정
train_loader = DataLoader(train_dataset_split, batch_size=CFG['BATCH_SIZE'], shuffle=True, num_workers=0)
valid_loader = DataLoader(val_dataset_split, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)


## Model Define

## Train

In [8]:
class EarlyStopping:
    def __init__(self, patience=3, delta=0.0, mode='min', verbose=True):
        """
        patience (int): loss or score가 개선된 후 기다리는 기간. default: 3
        delta  (float): 개선시 인정되는 최소 변화 수치. default: 0.0
        mode     (str): 개선시 최소/최대값 기준 선정('min' or 'max'). default: 'min'.
        verbose (bool): 메시지 출력. default: True
        """
        self.early_stop = False
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        
        self.best_score = np.Inf if mode == 'min' else 0
        self.mode = mode
        self.delta = delta
        

    def __call__(self, score):

        if self.best_score is None:
            self.best_score = score
            self.counter = 0
        elif self.mode == 'min':
            if score < (self.best_score - self.delta):
                self.counter = 0
                self.best_score = score
                if self.verbose:
                    print(f'[EarlyStopping] (Update) Best Score: {self.best_score:.5f}')
            else:
                self.counter += 1
                if self.verbose:
                    print(f'[EarlyStopping] (Patience) {self.counter}/{self.patience}, ' \
                          f'Best: {self.best_score:.5f}' \
                          f', Current: {score:.5f}, Delta: {np.abs(self.best_score - score):.5f}')
                
        elif self.mode == 'max':
            if score > (self.best_score + self.delta):
                self.counter = 0
                self.best_score = score
                if self.verbose:
                    print(f'[EarlyStopping] (Update) Best Score: {self.best_score:.5f}')
            else:
                self.counter += 1
                if self.verbose:
                    print(f'[EarlyStopping] (Patience) {self.counter}/{self.patience}, ' \
                          f'Best: {self.best_score:.5f}' \
                          f', Current: {score:.5f}, Delta: {np.abs(self.best_score - score):.5f}')
                
            
        if self.counter >= self.patience:
            if self.verbose:
                print(f'[EarlyStop Triggered] Best Score: {self.best_score:.5f}')
            # Early Stop
            self.early_stop = True
        else:
            # Continue
            self.early_stop = False

In [9]:
def train(model, optimizer, train_loader, val_loader, device):
    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for videos, labels in tqdm(iter(train_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            output = model(videos)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1 : [{_val_score:.5f}]')
        
        wandb.log({
            'epoch': epoch,
            'train_loss': _train_loss,
            'val_loss': _val_loss,
            'val_f1': _val_score
        })

        es(_val_loss)

        if es.early_stop:
            print("Early Stopping")
            break
    torch.save(model.state_dict(),  'r3d.pt')
        

            

In [10]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    preds, trues = [], []
    
    with torch.no_grad():
        for videos, labels in tqdm(iter(val_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            
            logit = model(videos)
            
            loss = criterion(logit, labels)
            
            val_loss.append(loss.item())
            
            preds += logit.argmax(1).detach().cpu().numpy().tolist()
            trues += labels.detach().cpu().numpy().tolist()
        
        _val_loss = np.mean(val_loss)
    
    _val_score = f1_score(trues, preds, average='weighted')
    return _val_loss, _val_score

In [11]:
class VideoResNet(nn.Module):
    def __init__(self, num_classes=2):
        super(VideoResNet, self).__init__()
        self.model = models.video.r3d_18(weights='KINETICS400_V1')
        self.model.stem[0] = nn.Conv3d(1, 64, kernel_size=(3, 7, 7), stride=(1, 2, 2), padding=(1, 3, 3), bias=False)
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)

    def forward(self, x):
        return self.model(x)

## Run!!

In [12]:
model = VideoResNet()
es = EarlyStopping(patience=10, delta=0.0, mode='min', verbose=True)

# model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
# torch.quantization.prepare_qat(model, inplace=True)
optimizer = torch.optim.AdamW(params = model.parameters(), lr = CFG["LEARNING_RATE"])


In [13]:
train(model, optimizer, train_loader, valid_loader, device)

100%|██████████| 17/17 [00:10<00:00,  1.65it/s]
100%|██████████| 5/5 [00:01<00:00,  4.41it/s]


Epoch [1], Train Loss : [0.72205] Val Loss : [0.75237] Val F1 : [0.68056]
[EarlyStopping] (Update) Best Score: 0.75237


100%|██████████| 17/17 [00:04<00:00,  3.78it/s]
100%|██████████| 5/5 [00:00<00:00, 14.85it/s]


Epoch [2], Train Loss : [0.52299] Val Loss : [0.75346] Val F1 : [0.68056]
[EarlyStopping] (Patience) 1/10, Best: 0.75237, Current: 0.75346, Delta: 0.00109


100%|██████████| 17/17 [00:04<00:00,  3.78it/s]
100%|██████████| 5/5 [00:00<00:00, 14.73it/s]


Epoch [3], Train Loss : [0.26954] Val Loss : [0.31783] Val F1 : [0.79140]
[EarlyStopping] (Update) Best Score: 0.31783


100%|██████████| 17/17 [00:04<00:00,  3.78it/s]
100%|██████████| 5/5 [00:00<00:00, 14.79it/s]


Epoch [4], Train Loss : [0.19673] Val Loss : [0.16251] Val F1 : [0.94143]
[EarlyStopping] (Update) Best Score: 0.16251


100%|██████████| 17/17 [00:04<00:00,  3.79it/s]
100%|██████████| 5/5 [00:00<00:00, 14.84it/s]


Epoch [5], Train Loss : [0.30022] Val Loss : [0.78225] Val F1 : [0.53818]
[EarlyStopping] (Patience) 1/10, Best: 0.16251, Current: 0.78225, Delta: 0.61973


100%|██████████| 17/17 [00:04<00:00,  3.77it/s]
100%|██████████| 5/5 [00:00<00:00, 14.82it/s]


Epoch [6], Train Loss : [0.16831] Val Loss : [0.48475] Val F1 : [0.79630]
[EarlyStopping] (Patience) 2/10, Best: 0.16251, Current: 0.48475, Delta: 0.32224


100%|██████████| 17/17 [00:04<00:00,  3.79it/s]
100%|██████████| 5/5 [00:00<00:00, 15.07it/s]


Epoch [7], Train Loss : [0.17835] Val Loss : [0.66290] Val F1 : [0.83951]
[EarlyStopping] (Patience) 3/10, Best: 0.16251, Current: 0.66290, Delta: 0.50039


100%|██████████| 17/17 [00:04<00:00,  3.83it/s]
100%|██████████| 5/5 [00:00<00:00, 14.88it/s]


Epoch [8], Train Loss : [0.13968] Val Loss : [1.55036] Val F1 : [0.74545]
[EarlyStopping] (Patience) 4/10, Best: 0.16251, Current: 1.55036, Delta: 1.38785


100%|██████████| 17/17 [00:04<00:00,  3.80it/s]
100%|██████████| 5/5 [00:00<00:00, 14.87it/s]


Epoch [9], Train Loss : [0.42564] Val Loss : [0.44729] Val F1 : [0.79140]
[EarlyStopping] (Patience) 5/10, Best: 0.16251, Current: 0.44729, Delta: 0.28478


100%|██████████| 17/17 [00:04<00:00,  3.73it/s]
100%|██████████| 5/5 [00:00<00:00, 14.45it/s]


Epoch [10], Train Loss : [0.17061] Val Loss : [1.27561] Val F1 : [0.59019]
[EarlyStopping] (Patience) 6/10, Best: 0.16251, Current: 1.27561, Delta: 1.11310


100%|██████████| 17/17 [00:04<00:00,  3.67it/s]
100%|██████████| 5/5 [00:00<00:00, 14.48it/s]


Epoch [11], Train Loss : [0.24279] Val Loss : [0.24887] Val F1 : [0.94143]
[EarlyStopping] (Patience) 7/10, Best: 0.16251, Current: 0.24887, Delta: 0.08636


100%|██████████| 17/17 [00:04<00:00,  3.67it/s]
100%|██████████| 5/5 [00:00<00:00, 14.52it/s]


Epoch [12], Train Loss : [0.14638] Val Loss : [0.60295] Val F1 : [0.83951]
[EarlyStopping] (Patience) 8/10, Best: 0.16251, Current: 0.60295, Delta: 0.44043


100%|██████████| 17/17 [00:04<00:00,  3.67it/s]
100%|██████████| 5/5 [00:00<00:00, 14.17it/s]


Epoch [13], Train Loss : [0.03620] Val Loss : [0.74651] Val F1 : [0.83951]
[EarlyStopping] (Patience) 9/10, Best: 0.16251, Current: 0.74651, Delta: 0.58399


100%|██████████| 17/17 [00:04<00:00,  3.67it/s]
100%|██████████| 5/5 [00:00<00:00, 14.50it/s]


Epoch [14], Train Loss : [0.02330] Val Loss : [1.23076] Val F1 : [0.74343]
[EarlyStopping] (Patience) 10/10, Best: 0.16251, Current: 1.23076, Delta: 1.06825
[EarlyStop Triggered] Best Score: 0.16251
Early Stopping


In [14]:
model.load_state_dict(torch.load('r3d.pt'))
model.eval()

VideoResNet(
  (model): VideoResNet(
    (stem): BasicStem(
      (0): Conv3d(1, 64, kernel_size=(3, 7, 7), stride=(1, 2, 2), padding=(1, 3, 3), bias=False)
      (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Sequential(
          (0): Conv3DSimple(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
          (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (conv2): Sequential(
          (0): Conv3DSimple(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
          (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (relu): ReLU(inplace=True)
      )
      (1): BasicBlock(
        (conv1): Sequential(
          (0): Conv3DSimple(64, 64, kernel_size=(3, 3,

## Inference

In [15]:
test_dataset = CustomDataset(CFG["TEST_DIR"])
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [16]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    preds = []
    true_labels = []
    with torch.no_grad():
        for videos, labels in tqdm(iter(test_loader)):
            videos = videos.to(device)
            
            logit = model(videos)

            preds += logit.argmax(1).detach().cpu().numpy().tolist()
            true_labels += labels.detach().cpu().numpy().tolist()

    # print(true_labels)
    # print(preds)

    f1 = f1_score(true_labels, preds, average='weighted')  # You can choose 'micro', 'macro', or 'weighted' based on your needs
    print(f"f1 score = [{f1}]")

    return preds, f1

In [17]:
preds, f1 = inference(model, test_loader, device)

100%|██████████| 6/6 [00:00<00:00,  6.58it/s]

f1 score = [0.8613322161709258]





wandb: ERROR Error while calling W&B API: context deadline exceeded (<Response [500]>)


## Submission