In [None]:
import cv2
import pandas as pd
import numpy as np
import os
from google.colab.patches import cv2_imshow
import random

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings(action='ignore') 

In [None]:
colab_path = '/content/drive/MyDrive/ColabNotebooks/carcrash'

In [None]:
df = pd.read_csv(colab_path + '/train.csv')
test = pd.read_csv(colab_path + '/test.csv')
df

Unnamed: 0,sample_id,video_path,label
0,TRAIN_0000,./train/TRAIN_0000.mp4,7
1,TRAIN_0001,./train/TRAIN_0001.mp4,7
2,TRAIN_0002,./train/TRAIN_0002.mp4,0
3,TRAIN_0003,./train/TRAIN_0003.mp4,0
4,TRAIN_0004,./train/TRAIN_0004.mp4,1
...,...,...,...
2693,TRAIN_2693,./train/TRAIN_2693.mp4,3
2694,TRAIN_2694,./train/TRAIN_2694.mp4,5
2695,TRAIN_2695,./train/TRAIN_2695.mp4,0
2696,TRAIN_2696,./train/TRAIN_2696.mp4,0


In [None]:
df['video_path'] = colab_path + df['video_path'].str[1:]
test['video_path'] = colab_path + test['video_path'].str[1:]

In [None]:
df.video_path[0]

'/content/drive/MyDrive/ColabNotebooks/carcrash/train/TRAIN_0000.mp4'

In [None]:
CFG = {
    'VIDEO_LENGTH':150, # 10프레임 * 5초
    'IMG_SIZE':128,
    'EPOCHS':10,
    'LEARNING_RATE':3e-4,
    'BATCH_SIZE':4,
    'SEED':41
}

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [None]:
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CFG['SEED'])


In [None]:
class CustomDataset(Dataset):
    def __init__(self, video_path_list, label_list):
        self.video_path_list = video_path_list
        self.label_list = label_list
        
    def __getitem__(self, index):
        frames = self.get_video(self.video_path_list[index])
        
        if self.label_list is not None:
            label = self.label_list[index]
            return frames, label
        else:
            return frames
        
    def __len__(self):
        return len(self.video_path_list)
    
    def get_video(self, path):
        frames = []
        cap = cv2.VideoCapture(path)
        for _ in range(CFG['VIDEO_LENGTH']):
            _, img = cap.read()
            img = cv2.resize(img, (CFG['IMG_SIZE'], CFG['IMG_SIZE']))
            img = img / 255.
            frames.append(img)
        return torch.FloatTensor(np.array(frames)).permute(3, 0, 1, 2)

**DataLoader로 학습용 데이터 준비하기**
1. 배치 형태로 변환
2. batch_size : 배치 크기 설정
3. num_workers : 데이터를 불러올때 
subprocess를 몇개 사용할 것인지 (에러가 나면 0으로 설정)


In [None]:
train_dataset = CustomDataset(train['video_path'].values, train['label'].values)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val['video_path'].values, val['label'].values)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [None]:
# 데이터 길이 확인
# 데이터를 배치사이즈 4으로 나눴기 때문에 배치가 2160개
len(train_loader)

540

In [None]:
# trainloader안의 실제값 확인
# 데이터를 하나씩 불러온다.
# torch.Size([배치사이즈, 채널수,프레임, 이미지 너비, 이미지 높이])

dataiter = iter(train_loader)

error: ignored

In [None]:
%pip install ultralytics
%pip install timm
%pip install cmapy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timm
  Downloading timm-0.6.12-py3-none-any.whl (549 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m549.1/549.1 KB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub
  Downloading huggingface_hub-0.12.1-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.3/190.3 KB[0m [31m23.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: huggingface-hub, timm
Successfully installed huggingface-hub-0.12.1 timm-0.6.12
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting cmapy
  Downloading cmapy-0.6.6.tar.gz (4.2 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: cmapy
  Building wheel for cmapy (

In [None]:
from ultralytics import YOLO
from PIL import Image
from IPython.display import display

In [None]:
# YOLOv8
model = YOLO('yolov8l-seg.pt')  
# MiDaS
model_type = "DPT_Large" 
midas = torch.hub.load("intel-isl/MiDaS", model_type)
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)

midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
    transform = midas_transforms.dpt_transform
else:
    transform = midas_transforms.small_transform

def midas_predict(frame):
    input_batch = transform(frame).to(device)
    with torch.no_grad():
        prediction = midas(input_batch)

        prediction = torch.nn.functional.interpolate(
            prediction.unsqueeze(1),
            size=frame.shape[:2],
            mode="bicubic",
            align_corners=False,
        ).squeeze()

    frame = prediction.cpu().numpy()
    return frame

Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master
Downloading: "https://github.com/isl-org/MiDaS/releases/download/v3/dpt_large_384.pt" to /root/.cache/torch/hub/checkpoints/dpt_large_384.pt


  0%|          | 0.00/1.28G [00:00<?, ?B/s]

Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master


In [None]:
# Load video file
video_path = colab_path +'/train/TRAIN_0000.mp4'
cap = cv2.VideoCapture(video_path)

while True:
    # Read frame from video
    ret, frame = cap.read()
    if not ret:
        break
    
    # Perform object detection using YOLO v8
    # ...
    
    # Visualize results
    cv2_imshow(frame)
    if cv2.waitKey(1) == ord('q'):
        break
        
# Release video capture and close all windows
cap.release()
cv2.destroyAllWindows()

Output hidden; open in https://colab.research.google.com to view.

In [None]:
video_path

'/content/drive/MyDrive/ColabNotebooks/carcrash/train/TRAIN_0000.mp4'