## Import

In [1]:
import sklearn

import random
import numpy as np
import os
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms


import torchvision.models as models

from typing import Any, Callable, Optional
from torchvision.ops.misc import Conv3dNormActivation


from tqdm.auto import tqdm
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import train_test_split

from sklearn.metrics import f1_score

import warnings
warnings.filterwarnings(action='ignore') 
import wandb

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

## Hyperparameter Setting

In [3]:
CFG = {
    'VIDEO_LENGTH':50,
    'EPOCHS':100,
    'LEARNING_RATE':3e-4,
    'BATCH_SIZE':4,
    'SEED':42,
    'TRAIN_DIR':'./data_new/train',
    'TEST_DIR':'./data_new/test'
}

In [4]:
wandb.init(project="thermal_fall_new", config=CFG)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mis-jang[0m ([33mis-jang-pusan-national-university[0m). Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: 500 encountered ({"errors":[{"message":"context deadline exceeded","path":["project"]}],"data":{"project":null}}), retrying request
[34m[1mwandb[0m: Network error resolved after 0:00:55.857242, resuming normal operation.


## Fixed RandomSeed

In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## Data Load

## Train / Validation Split

## CustomDataset

In [6]:

class CustomDataset(Dataset):
    def __init__(self, root_dir):
        self.root_dir = root_dir
        self.classes = os.listdir(root_dir)
        self.num_frames = 50

        self.video_paths = []
        self.labels = []

        for label, cls in enumerate(self.classes):
            cls_path = os.path.join(root_dir, cls)
            video_files = os.listdir(cls_path)
            for video_file in video_files:
                video_path = os.path.join(cls_path, video_file)
                self.video_paths.append(video_path)
                # # print(video_path + str(label)) # no 0 good 1 fall 2
                # if label == 0:
                #     print("bin")
                #     label = 1
                self.labels.append(label)

    def __len__(self):
        return len(self.video_paths)

    def __getitem__(self, idx):
        video_path = self.video_paths[idx]
        frames = sorted(os.listdir(video_path))[:self.num_frames]

        video_frames = []
        for frame in frames:
            img_path = os.path.join(video_path, frame)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (64, 64))
            img = img / 255.0

            angle = random.uniform(-10, 10)
            M = cv2.getRotationMatrix2D((img.shape[1] / 2, img.shape[0] / 2), angle, 1)
            img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))
            
            video_frames.append(img)
        
        video_frames = np.stack(video_frames)
        video_frames = np.expand_dims(video_frames, axis=0)
        video_frames = torch.FloatTensor(video_frames)

        label = self.labels[idx]
        label = torch.tensor(label, dtype=torch.long)

        return video_frames, label


In [7]:

# Dataset 및 타겟 정의
train_dataset = CustomDataset(CFG['TRAIN_DIR'])
targets = train_dataset.labels

# StratifiedShuffleSplit 사용
stratified_split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=CFG['SEED'])

# StratifiedShuffleSplit은 인덱스를 반환하므로 이를 활용해 train/val 인덱스 분리
for train_idx, val_idx in stratified_split.split(train_dataset, targets):
    train_dataset_split = Subset(train_dataset, train_idx)
    val_dataset_split = Subset(train_dataset, val_idx)

# DataLoader 설정
train_loader = DataLoader(train_dataset_split, batch_size=CFG['BATCH_SIZE'], shuffle=True, num_workers=0)
valid_loader = DataLoader(val_dataset_split, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)


In [8]:
class EarlyStopping:
    def __init__(self, patience=3, delta=0.0, mode='min', verbose=True):
        """
        patience (int): loss or score가 개선된 후 기다리는 기간. default: 3
        delta  (float): 개선시 인정되는 최소 변화 수치. default: 0.0
        mode     (str): 개선시 최소/최대값 기준 선정('min' or 'max'). default: 'min'.
        verbose (bool): 메시지 출력. default: True
        """
        self.early_stop = False
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        
        self.best_score = np.Inf if mode == 'min' else 0
        self.mode = mode
        self.delta = delta
        

    def __call__(self, score):

        if self.best_score is None:
            self.best_score = score
            self.counter = 0
        elif self.mode == 'min':
            if score < (self.best_score - self.delta):
                self.counter = 0
                self.best_score = score
                if self.verbose:
                    print(f'[EarlyStopping] (Update) Best Score: {self.best_score:.5f}')
            else:
                self.counter += 1
                if self.verbose:
                    print(f'[EarlyStopping] (Patience) {self.counter}/{self.patience}, ' \
                          f'Best: {self.best_score:.5f}' \
                          f', Current: {score:.5f}, Delta: {np.abs(self.best_score - score):.5f}')
                
        elif self.mode == 'max':
            if score > (self.best_score + self.delta):
                self.counter = 0
                self.best_score = score
                if self.verbose:
                    print(f'[EarlyStopping] (Update) Best Score: {self.best_score:.5f}')
            else:
                self.counter += 1
                if self.verbose:
                    print(f'[EarlyStopping] (Patience) {self.counter}/{self.patience}, ' \
                          f'Best: {self.best_score:.5f}' \
                          f', Current: {score:.5f}, Delta: {np.abs(self.best_score - score):.5f}')
                
            
        if self.counter >= self.patience:
            if self.verbose:
                print(f'[EarlyStop Triggered] Best Score: {self.best_score:.5f}')
            # Early Stop
            self.early_stop = True
        else:
            # Continue
            self.early_stop = False

In [9]:
def train(model, optimizer, train_loader, val_loader, device):
    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for videos, labels in tqdm(iter(train_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            output = model(videos)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1 : [{_val_score:.5f}]')
        
        wandb.log({
            'epoch': epoch,
            'train_loss': _train_loss,
            'val_loss': _val_loss,
            'val_f1': _val_score
        })

        es(_val_loss)

        if es.early_stop:
            print("Early Stopping")
            break
    torch.save(model.state_dict(),  's3d.pt')
        

            

In [10]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    preds, trues = [], []
    
    with torch.no_grad():
        for videos, labels in tqdm(iter(val_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            
            logit = model(videos)
            
            loss = criterion(logit, labels)
            
            val_loss.append(loss.item())
            
            preds += logit.argmax(1).detach().cpu().numpy().tolist()
            trues += labels.detach().cpu().numpy().tolist()
        
        _val_loss = np.mean(val_loss)
    
    _val_score = f1_score(trues, preds, average='weighted')
    return _val_loss, _val_score

In [11]:
class TemporalSeparableConv(nn.Sequential):
    def __init__(
        self,
        in_planes: int,
        out_planes: int,
        kernel_size: int,
        stride: int,
        padding: int,
        norm_layer: Callable[..., nn.Module],
    ):
        super().__init__(
            Conv3dNormActivation(
                in_planes,
                out_planes,
                kernel_size=(1, kernel_size, kernel_size),
                stride=(1, stride, stride),
                padding=(0, padding, padding),
                bias=False,
                norm_layer=norm_layer,
            ),
            Conv3dNormActivation(
                out_planes,
                out_planes,
                kernel_size=(kernel_size, 1, 1),
                stride=(stride, 1, 1),
                padding=(padding, 0, 0),
                bias=False,
                norm_layer=norm_layer,
            ),
        )

In [12]:
class CustomS3D(nn.Module):
    def __init__(self, original_model):
        super(CustomS3D, self).__init__()
        self.features = original_model.features
        
        # 첫 번째 TemporalSeparableConv 레이어 수정
        temporal_layer = self.features[0]

        first_conv = temporal_layer[0]  # TemporalSeparableConv의 첫 번째 레이어
        out_channels = first_conv[0].out_channels

        in_channels = 1  # 입력 채널을 1로 설정

        norm_layer = type(first_conv[1])  # normalization 레이어의 클래스 타입 가져오기

        # 새로운 TemporalSeparableConv 생성
        new_temporal_layer = TemporalSeparableConv(
            in_channels, out_channels, 
            kernel_size=7, stride=2, padding=3, 
            norm_layer=norm_layer
        )

        # 수정된 첫 번째 레이어 할당
        self.features[0] = new_temporal_layer

        # 이진 분류를 위한 분류기 레이어 수정 (출력 유닛 2개)
        # self.classifier = nn.Sequential(
        #     nn.AdaptiveAvgPool3d((1, 1, 1)),  # 크기를 1x1x1로 축소
        #     nn.Flatten(),  # 텐서를 평평하게 만들어줌
        #     nn.Linear(out_channels, 2)  # 여기서 out_channels가 이전 레이어의 출력 채널과 일치해야 합니다.
        # )

        # self.classifier = nn.Sequential(
        #     nn.Dropout(p=0.2),
        #     nn.Conv3d(1024, 2, kernel_size=1, stride=1, bias=True),
        # )

        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool3d((1, 1, 1)),  # 크기를 1x1x1로 축소
            nn.Flatten(),  # 텐서를 평평하게 만들어줌
            nn.Linear(1024, 2)  # 여기서 out_channels가 이전 레이어의 출력 채널과 일치해야 합니다.
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# 기존 S3D 모델을 불러와서 수정
original_s3d_model = models.video.s3d(weights='KINETICS400_V1')
custom_s3d_model = CustomS3D(original_s3d_model)

# 모델 출력 확인
print(custom_s3d_model)

CustomS3D(
  (features): Sequential(
    (0): TemporalSeparableConv(
      (0): Conv3dNormActivation(
        (0): Conv3d(1, 64, kernel_size=(1, 7, 7), stride=(1, 2, 2), padding=(0, 3, 3), bias=False)
        (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
      )
      (1): Conv3dNormActivation(
        (0): Conv3d(64, 64, kernel_size=(7, 1, 1), stride=(2, 1, 1), padding=(3, 0, 0), bias=False)
        (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
      )
    )
    (1): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1), dilation=1, ceil_mode=False)
    (2): Conv3dNormActivation(
      (0): Conv3d(64, 64, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
      (1): BatchNorm3d(64, eps=0.001, momentum=0.001, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (3): TemporalSeparableConv(
      (0): Conv

## Run!!

In [13]:
model = custom_s3d_model
es = EarlyStopping(patience=10, delta=0.0, mode='min', verbose=True)
# model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
# torch.quantization.prepare_qat(model, inplace=True)
optimizer = torch.optim.AdamW(params = model.parameters(), lr = CFG["LEARNING_RATE"])



In [14]:
train(model, optimizer, train_loader, valid_loader, device)

100%|██████████| 17/17 [00:14<00:00,  1.20it/s]
100%|██████████| 5/5 [00:04<00:00,  1.03it/s]


Epoch [1], Train Loss : [0.50078] Val Loss : [0.53381] Val F1 : [0.68056]
[EarlyStopping] (Update) Best Score: 0.53381


100%|██████████| 17/17 [00:01<00:00, 12.13it/s]
100%|██████████| 5/5 [00:00<00:00, 26.64it/s]


Epoch [2], Train Loss : [0.35011] Val Loss : [0.44372] Val F1 : [0.68056]
[EarlyStopping] (Update) Best Score: 0.44372


100%|██████████| 17/17 [00:01<00:00, 12.10it/s]
100%|██████████| 5/5 [00:00<00:00, 27.24it/s]


Epoch [3], Train Loss : [0.37311] Val Loss : [0.63023] Val F1 : [0.58291]
[EarlyStopping] (Patience) 1/10, Best: 0.44372, Current: 0.63023, Delta: 0.18651


100%|██████████| 17/17 [00:01<00:00, 12.15it/s]
100%|██████████| 5/5 [00:00<00:00, 27.66it/s]


Epoch [4], Train Loss : [0.31274] Val Loss : [1.37692] Val F1 : [0.51393]
[EarlyStopping] (Patience) 2/10, Best: 0.44372, Current: 1.37692, Delta: 0.93321


100%|██████████| 17/17 [00:01<00:00, 12.22it/s]
100%|██████████| 5/5 [00:00<00:00, 27.70it/s]


Epoch [5], Train Loss : [0.25277] Val Loss : [4.10820] Val F1 : [0.18836]
[EarlyStopping] (Patience) 3/10, Best: 0.44372, Current: 4.10820, Delta: 3.66448


100%|██████████| 17/17 [00:01<00:00, 12.35it/s]
100%|██████████| 5/5 [00:00<00:00, 27.16it/s]


Epoch [6], Train Loss : [0.25552] Val Loss : [2.29280] Val F1 : [0.57778]
[EarlyStopping] (Patience) 4/10, Best: 0.44372, Current: 2.29280, Delta: 1.84908


100%|██████████| 17/17 [00:01<00:00, 12.16it/s]
100%|██████████| 5/5 [00:00<00:00, 27.52it/s]


Epoch [7], Train Loss : [0.41464] Val Loss : [0.67420] Val F1 : [0.74545]
[EarlyStopping] (Patience) 5/10, Best: 0.44372, Current: 0.67420, Delta: 0.23049


100%|██████████| 17/17 [00:01<00:00, 12.23it/s]
100%|██████████| 5/5 [00:00<00:00, 27.59it/s]


Epoch [8], Train Loss : [0.33706] Val Loss : [1.67488] Val F1 : [0.63704]
[EarlyStopping] (Patience) 6/10, Best: 0.44372, Current: 1.67488, Delta: 1.23117


100%|██████████| 17/17 [00:01<00:00, 12.17it/s]
100%|██████████| 5/5 [00:00<00:00, 27.55it/s]


Epoch [9], Train Loss : [0.32515] Val Loss : [0.98069] Val F1 : [0.69264]
[EarlyStopping] (Patience) 7/10, Best: 0.44372, Current: 0.98069, Delta: 0.53698


100%|██████████| 17/17 [00:01<00:00, 12.09it/s]
100%|██████████| 5/5 [00:00<00:00, 26.96it/s]


Epoch [10], Train Loss : [0.20148] Val Loss : [1.40959] Val F1 : [0.79630]
[EarlyStopping] (Patience) 8/10, Best: 0.44372, Current: 1.40959, Delta: 0.96588


100%|██████████| 17/17 [00:01<00:00, 12.11it/s]
100%|██████████| 5/5 [00:00<00:00, 27.31it/s]


Epoch [11], Train Loss : [0.12088] Val Loss : [1.60955] Val F1 : [0.63704]
[EarlyStopping] (Patience) 9/10, Best: 0.44372, Current: 1.60955, Delta: 1.16583


100%|██████████| 17/17 [00:01<00:00, 12.08it/s]
100%|██████████| 5/5 [00:00<00:00, 25.87it/s]


Epoch [12], Train Loss : [0.17136] Val Loss : [1.04709] Val F1 : [0.69264]
[EarlyStopping] (Patience) 10/10, Best: 0.44372, Current: 1.04709, Delta: 0.60337
[EarlyStop Triggered] Best Score: 0.44372
Early Stopping


In [15]:
model.load_state_dict(torch.load('s3d.pt'))
model.eval()

CustomS3D(
  (features): Sequential(
    (0): TemporalSeparableConv(
      (0): Conv3dNormActivation(
        (0): Conv3d(1, 64, kernel_size=(1, 7, 7), stride=(1, 2, 2), padding=(0, 3, 3), bias=False)
        (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
      )
      (1): Conv3dNormActivation(
        (0): Conv3d(64, 64, kernel_size=(7, 1, 1), stride=(2, 1, 1), padding=(3, 0, 0), bias=False)
        (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
      )
    )
    (1): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1), dilation=1, ceil_mode=False)
    (2): Conv3dNormActivation(
      (0): Conv3d(64, 64, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
      (1): BatchNorm3d(64, eps=0.001, momentum=0.001, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (3): TemporalSeparableConv(
      (0): Conv

## Inference

In [16]:
test_dataset = CustomDataset(CFG["TEST_DIR"])
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [17]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    preds = []
    true_labels = []
    with torch.no_grad():
        for videos, labels in tqdm(iter(test_loader)):
            videos = videos.to(device)
            
            logit = model(videos)

            preds += logit.argmax(1).detach().cpu().numpy().tolist()
            true_labels += labels.detach().cpu().numpy().tolist()

    # print(true_labels)
    # print(preds)

    f1 = f1_score(true_labels, preds, average='weighted')  # You can choose 'micro', 'macro', or 'weighted' based on your needs
    print(f"f1 score = [{f1}]")

    return preds, f1

In [18]:
preds, f1 = inference(model, test_loader, device)

100%|██████████| 6/6 [00:04<00:00,  1.38it/s]

f1 score = [0.8613322161709258]





wandb: ERROR Error while calling W&B API: context deadline exceeded (<Response [500]>)
wandb: ERROR Error while calling W&B API: context deadline exceeded (<Response [500]>)
wandb: ERROR Error while calling W&B API: context deadline exceeded (<Response [500]>)


## Submission