## 라이브러리 불러오기

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
from copy import deepcopy

import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score
import cv2
from tqdm.auto import tqdm

import torch
from torch import nn
import torch.nn.functional as F
import torchvision.models as models 
from torch.utils.data import Dataset, DataLoader
import torchvision

In [None]:

# !unzip -qq '/content/drive/MyDrive/dacon/car_crash.zip' -d '/content/drive/MyDrive/dacon/car_crash'

In [None]:
PATH = '/content/drive/MyDrive/dacon/car_crash/'

In [None]:
FPS = 50
EPOCHS = 300
IMAGE_SIZE = 128
SEED = 22
LEARNING_RATE = 1e-4

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


## Seed 고정

In [None]:
def fix_seed(seed):
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.banchmark = True

fix_seed(SEED)

## 데이터 불러오기

In [None]:
train = pd.read_csv(PATH + 'train.csv')
train.tail()

Unnamed: 0,sample_id,video_path,label
2693,TRAIN_2693,./train/TRAIN_2693.mp4,3
2694,TRAIN_2694,./train/TRAIN_2694.mp4,5
2695,TRAIN_2695,./train/TRAIN_2695.mp4,0
2696,TRAIN_2696,./train/TRAIN_2696.mp4,0
2697,TRAIN_2697,./train/TRAIN_2697.mp4,0


In [None]:
train['label'].value_counts()

0     1783
1      318
7      317
3       78
2       51
9       34
11      33
8       30
5       28
4       13
12       6
10       4
6        3
Name: label, dtype: int64

In [None]:
X = train.iloc[:, 1].values
Y = train.iloc[:, 2].values

In [None]:
train_x, val_x, train_y, val_y = train_test_split(X, Y, test_size=0.2, random_state=SEED, stratify=Y)

## Crash 분류를 위한 데이터 생성

In [None]:
crash = pd.read_csv(PATH + 'train.csv')
crash['label'] = np.where(crash['label'] > 0, 1, 0)

crash['label'].value_counts()

In [None]:
crash_x = crash.iloc[:, 1].values
crash_y = crash.iloc[:, 2].values

In [None]:
train_crash_x, val_crash_x, train_crash_y, val_crash_y = train_test_split(crash_x, crash_y, random_state=22, test_size=0.2, stratify=crash_y)

## ego-involve 분류를 위한 데이터 생성

In [None]:
ego = pd.read_csv(PATH + 'train.csv')
ego.drop(ego[ego['label'] == 0].index, inplace=True, axis=0)
ego['label'].loc[ego['label'].isin([1,2,3,4,5,6])] = 1
ego['label'].loc[ego['label'].isin([7,8,9,10,11,12])] = 0

ego_x = ego.iloc[:, 1].values
ego_y = ego.iloc[:, 2].values

train_ego_x, val_ego_x, train_ego_y, val_ego_y = train_test_split(ego_x, ego_y, random_state=22, test_size=0.2, stratify=ego_y)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


## weather 분류를 위한 데이터 생성

In [None]:
weather = pd.read_csv(PATH + 'train.csv')
weather.drop(weather[weather['label'] == 0].index, inplace=True, axis=0)
weather['label'].loc[weather['label'].isin([1,2,7,8])] = 0
weather['label'].loc[weather['label'].isin([3,4,9,10])] = 1
weather['label'].loc[weather['label'].isin([5,6,11,12])] = 2
weather['label'].value_counts()

weather_x = weather.iloc[:, 1].values
weather_y = weather.iloc[:, 2].values

train_weather_x, val_weather_x, train_weather_y, val_weather_y = train_test_split(weather_x, weather_y, random_state=22, test_size=0.2, stratify=weather_y)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


## timimg 분류를 위한 데이터 생성

In [None]:
time = pd.read_csv(PATH + 'train.csv')
time.drop(time[time['label'] == 0].index, inplace=True, axis=0)
time['label'].loc[time['label'].isin([1,3,5,7,9,11])] = 0
time['label'].loc[time['label'].isin([2,4,6,8,10,12])] = 1
time['label'].value_counts()

time_x = time.iloc[:, 1].values
time_y = time.iloc[:, 2].values

train_time_x, val_time_x, train_time_y, val_time_y = train_test_split(time_x, time_y, random_state=22, test_size=0.2, stratify=time_y)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


## crash_ego 데이터 분류를 위한 데이터 생성

In [None]:
crash_ego = pd.read_csv(PATH+'train.csv')
crash_ego['label'].loc[crash_ego['label'] == 0] = 0
crash_ego['label'].loc[crash_ego['label'].isin([1,2,3,4,5,6])] = 2
crash_ego['label'].loc[crash_ego['label'].isin([7,8,9,10,11,12])] = 1

crash_ego_x = crash_ego.iloc[:, 1].values
crash_ego_y = crash_ego.iloc[:, 2].values

train_crash_ego_x, val_crash_ego_x, train_crash_ego_y, val_crash_ego_y = train_test_split(crash_ego_x, crash_ego_y, test_size=0.2, stratify=crash_ego_y, random_state=22)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


## Dataset, DataLoader 정의

In [None]:
class CustomDataset():
  def __init__(self, path, video_path, labels, transform=None):
    self.path = path
    self.video_path = video_path
    self.labels = labels

  def __len__(self):
    return len(self.video_path)

  def __getitem__(self, idx):
    video_path = self.path + self.video_path[idx][2:]
    frames = self.video_frames(video_path)
    if self.labels is not None:
      label = self.labels[idx]
      return frames, label
    else:
      return frames

  def video_frames(self, video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    for i in range(FPS):
      _, img = cap.read()
      img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
      img = img / 225
      frames.append(img)
    return torch.FloatTensor(np.array(frames)).permute(3, 0, 1, 2)

In [None]:
# All train
train_dataset = CustomDataset(PATH, train_x, train_y)
train_dataloader = DataLoader(train_dataset, batch_size=10, shuffle=True)

val_dataset = CustomDataset(PATH, val_x, val_y)
val_dataloader = DataLoader(val_dataset, batch_size=10, shuffle=False)

In [None]:
# crash
train_crash_dataset = CustomDataset(PATH, train_crash_x, train_crash_y)
train_crash_dataloader = DataLoader(train_crash_dataset, batch_size=10, shuffle=True)

val_crash_dataset = CustomDataset(PATH, val_crash_x, val_crash_y)
val_crash_dataloader = DataLoader(val_crash_dataset, batch_size=10, shuffle=False)

In [None]:
# ego-involve
train_ego_dataset = CustomDataset(PATH, train_ego_x, train_ego_y)
train_ego_dataloader = DataLoader(train_ego_dataset, batch_size=10, shuffle=True)

val_ego_dataset = CustomDataset(PATH, val_ego_x, val_ego_y)
val_ego_dataloader = DataLoader(val_ego_dataset, batch_size=10, shuffle=False)

In [None]:
# time
train_time_dataset = CustomDataset(PATH, train_time_x, train_time_y)
train_time_dataloader = DataLoader(train_time_dataset, batch_size=10, shuffle=True)

val_time_dataset = CustomDataset(PATH, val_time_x, val_time_y)
val_time_dataloader = DataLoader(val_time_dataset, batch_size=10, shuffle=False)

In [None]:
# weather
train_weather_dataset = CustomDataset(PATH, train_weather_x, train_weather_y)
train_weather_dataloader = DataLoader(train_weather_dataset, batch_size=10, shuffle=True, num_workers=2)

val_weather_dataset = CustomDataset(PATH, val_weather_x, val_weather_y)
val_weather_dataloader = DataLoader(val_weather_dataset, batch_size=10, shuffle=False, num_workers=2)

In [None]:
# crash_ego
train_crash_ego_dataset = CustomDataset(PATH, train_crash_ego_x, train_crash_ego_y)
train_crash_ego_dataloader = DataLoader(train_crash_ego_dataset, batch_size=10, shuffle=True)

val_crash_ego_dataset = CustomDataset(PATH, val_crash_ego_x, val_crash_ego_y)
val_crash_ego_dataloader = DataLoader(val_crash_ego_dataset, batch_size=10, shuffle=False)

## 모델 작성

In [None]:
class TrainedModel(nn.Module):
  def __init__(self, num_classes):
    super(TrainedModel, self).__init__()
    self.backbone = models.video.r3d_18(weights='KINETICS400_V1')
    # self.backbone = models.video.mc3_18(pretrained=True)
    self.fc = nn.Linear(400, num_classes)

  def forward(self, x):
    x = self.backbone(x)
    x = self.fc(x)
    return x

In [None]:
class BaseModel(nn.Module):
    def __init__(self, num_classes):
        super(BaseModel, self).__init__()
        self.feature_extract = nn.Sequential(
            nn.Conv3d(3, 8, (1, 3, 3)),
            nn.ReLU(),
            nn.BatchNorm3d(8),
            nn.MaxPool3d(2),
            nn.Conv3d(8, 32, (1, 2, 2)),
            nn.ReLU(),
            nn.BatchNorm3d(32),
            nn.MaxPool3d(2),
            nn.Conv3d(32, 64, (1, 2, 2)),
            nn.ReLU(),
            nn.BatchNorm3d(64),
            nn.MaxPool3d(2),
            nn.Conv3d(64, 128, (1, 2, 2)),
            nn.ReLU(),
            nn.BatchNorm3d(128),
            nn.MaxPool3d((3, 7, 7)),
        )
        self.classifier = nn.Linear(1024, num_classes)
        
    def forward(self, x):
        batch_size = x.size(0)
        x = self.feature_extract(x)
        x = x.view(batch_size, -1)
        x = self.classifier(x)
        return x

## 모델 학습

In [None]:
class FocalLoss(nn.Module):
  def __init__(self, weight=None, gamma=2, reduction='mean'):
    super(FocalLoss, self).__init__()
    self.weight = weight
    self.gamma = gamma
    self.reduction = reduction

  def forward(self, inputs, targets):
    ce_loss = F.cross_entropy(inputs, targets, weight=self.weight, reduction=self.reduction)
    pt = torch.exp(-ce_loss)
    focal_loss = ((1-pt)**self.gamma*ce_loss).mean()
    return focal_loss

In [None]:
def train_model(model, optimizer, train_dataloader, val_dataloader, scheduler, device, start_epoch=1, start_score=0):
  model.to(device)
  # criterion = nn.CrossEntropyLoss().to(device)
  criterion = FocalLoss().to(device)
  
  best_val_f1 = start_score
  best_model = None

  for epoch in range(start_epoch, EPOCHS):
    model.train()
    train_loss = []
    for videos, labels in tqdm(iter(train_dataloader)):
      videos = videos.to(device)
      labels = labels.to(device)

      optimizer.zero_grad()

      output = model(videos)
      loss = criterion(output, labels)

      loss.backward()
      optimizer.step()

      train_loss.append(loss.item())
    
    _val_loss, _val_f1, _val_acc = validation(model, criterion, val_dataloader, device)
    _train_loss = np.mean(train_loss)
    print(f'Epoch {epoch}, Train Loss: {_train_loss:.6f}, Val Loss: {_val_loss:.6f}, Val Acc:{_val_acc:.6f}, Val F1: {_val_f1:.6f}')

    torch.save({
        'model': model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'scheduler': scheduler.state_dict(),
        'epoch': epoch,
        'score': _val_f1,
        'acc': _val_acc
    }, PATH + 'weather_model_batchsize9_dict.pt')

    if scheduler is not None:
      scheduler.step(_val_f1)

    if best_val_f1 < _val_f1:
      best_val_f1 = _val_f1
      best_model = model
      torch.save(model.state_dict(), PATH + 'best_weather_model6.pt')
      print('Model Update!')
  return model

In [None]:
def validation(model, criterion, val_dataloader, device):
  model.eval()
  val_loss = []
  preds, trues = [], []

  with torch.no_grad():
    for videos, labels in tqdm(iter(val_dataloader)):
      videos = videos.to(device)
      labels = labels.to(device)

      output = model(videos)
      loss = criterion(output, labels)

      val_loss.append(loss.item())

      preds += output.argmax(1).detach().cpu().numpy().tolist()
      trues += labels.detach().cpu().numpy().tolist()

    _val_loss = np.mean(val_loss)

  _val_f1 = f1_score(trues, preds, average='macro')
  _val_acc = accuracy_score(trues, preds)
  return _val_loss, _val_f1, _val_acc

In [None]:
# weather model

weather_classificaiton_model = TrainedModel(3)
optimizer = torch.optim.Adam(params=weather_classificaiton_model.parameters(), lr=LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5, threshold_mode='abs', verbose=True)


if os.path.isfile(PATH+'weather_model_dict.pt'):
  checkpoint = torch.load(PATH + 'model_dict.pt')
  weather_classificaiton_model.load_state_dict(checkpoint['model'])
  optimizer.load_state_dict(checkpoint['optimizer'])
  scheduler.load_state_dict(checkpoint['scheduler'])
  start_epoch = checkpoint['epoch'] + 1
  weather_classificaiton_model.eval()
  infer_model = train_model(weather_classificaiton_model, optimizer, train_dataloader, val_dataloader, scheduler, device, start_epoch)
else:
  weather_classificaiton_model.eval()
  infer_model = train_model(weather_classificaiton_model, optimizer, train_dataloader, val_dataloader, scheduler, device)

In [None]:
# ego-invole model

ego_involve_classificaiton_model = TrainedModel(2)
optimizer = torch.optim.Adam(params=ego_involve_classificaiton_model.parameters(), lr=LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5, threshold_mode='abs', verbose=True)


if os.path.isfile(PATH+'ego_involve_model_dict.pt'):
  checkpoint = torch.load(PATH + 'ego_involve_model_dict.pt')
  ego_involve_classificaiton_model.load_state_dict(checkpoint['model'])
  optimizer.load_state_dict(checkpoint['optimizer'])
  scheduler.load_state_dict(checkpoint['scheduler'])
  start_epoch = checkpoint['epoch'] + 1
  ego_involve_classificaiton_model.eval()
  infer_model = train_model(ego_involve_classificaiton_model, optimizer, train_dataloader, val_dataloader, scheduler, device, start_epoch)
else:
  ego_involve_classificaiton_model.eval()
  infer_model = train_model(ego_involve_classificaiton_model, optimizer, train_dataloader, val_dataloader, scheduler, device)

In [None]:
# crash model

crash_classificaiton_model = TrainedModel(2)
optimizer = torch.optim.Adam(params=crash_classificaiton_model.parameters(), lr=LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5, threshold_mode='abs', verbose=True)


if os.path.isfile(PATH+'crash_model_dict.pt'):
  checkpoint = torch.load(PATH + 'crash_model_dict.pt')
  crash_classificaiton_model.load_state_dict(checkpoint['model'])
  optimizer.load_state_dict(checkpoint['optimizer'])
  scheduler.load_state_dict(checkpoint['scheduler'])
  start_epoch = checkpoint['epoch'] + 1
  crash_classificaiton_model.eval()
  infer_model = train_model(crash_classificaiton_model, optimizer, train_dataloader, val_dataloader, scheduler, device, start_epoch)
else:
  crash_classificaiton_model.eval()
  infer_model = train_model(crash_classificaiton_model, optimizer, train_dataloader, val_dataloader, scheduler, device)

In [None]:
# time model

time_classificaiton_model = TrainedModel(2)
optimizer = torch.optim.Adam(params=time_classificaiton_model.parameters(), lr=LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5, threshold_mode='abs', verbose=True)


if os.path.isfile(PATH+'time_model_dict.pt'):
  checkpoint = torch.load(PATH + 'time_model_dict.pt')
  time_classificaiton_model.load_state_dict(checkpoint['model'])
  optimizer.load_state_dict(checkpoint['optimizer'])
  scheduler.load_state_dict(checkpoint['scheduler'])
  start_epoch = checkpoint['epoch'] + 1
  time_classificaiton_model.eval()
  infer_model = train_model(time_classificaiton_model, optimizer, train_dataloader, val_dataloader, scheduler, device, start_epoch)
else:
  time_classificaiton_model.eval()
  infer_model = train_model(time_classificaiton_model, optimizer, train_dataloader, val_dataloader, scheduler, device)