In [1]:
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import PIL

import torch
import torchvision

from timm import timm

import warnings
pd.set_option('mode.chained_assignment',  None) # 경고 off
warnings.filterwarnings("ignore", message="The pts_unit 'pts' gives wrong results. Please use pts_unit 'sec'.")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
y_train = pd.read_csv('datasets/train.csv')
y_train.label.value_counts()[0]
y_train.label.value_counts()[1:].sum()

crashed = y_train.iloc[np.where(y_train.label != 0)[0]] 
crashed['involve'] = np.where(crashed.label.isin([1,2,3,4,5,6]), 'yes', 'no')
crashed['weather'] = np.where(crashed.label.isin([1,2,7,8]), 'normal', np.where(crashed.label.isin([3,4,9,10]), 'snowy', 'rainy'))
crashed['timing'] = np.where(crashed.label.isin([1,3,5,7,9,11]), 'day', 'night')
crashed = crashed.reset_index(drop=True)
crashed.head()

Unnamed: 0,sample_id,video_path,label,involve,weather,timing
0,TRAIN_0000,./train/TRAIN_0000.mp4,7,no,normal,day
1,TRAIN_0001,./train/TRAIN_0001.mp4,7,no,normal,day
2,TRAIN_0004,./train/TRAIN_0004.mp4,1,yes,normal,day
3,TRAIN_0006,./train/TRAIN_0006.mp4,3,yes,snowy,day
4,TRAIN_0007,./train/TRAIN_0007.mp4,7,no,normal,day


In [3]:
for key in crashed.columns[3:6].values.tolist():
    print(key)
    print(crashed[key].value_counts())
    print()

involve
yes    491
no     424
Name: involve, dtype: int64

weather
normal    716
snowy     129
rainy      70
Name: weather, dtype: int64

timing
day      808
night    107
Name: timing, dtype: int64



# TODO
    1. 512*512 downsampling
    2. balance 맞게 weather, timing 프레임 짜르기
       1. 0~3초 사이 사진에서 random sampling

In [4]:
def get_ratio(weather, timing):
    if timing == 'day':
        if weather == 'normal':
            return 1
        elif weather == 'snowy':
            return 2
        elif weather == 'rainy':
            return 3
        
    elif timing == 'night':
        if weather == 'normal':
            return 2    
        elif weather == 'snowy':
            return 10
        elif weather == 'rainy':
            return 15

In [5]:
import torchvision.transforms as transforms
import torchvision.transforms.functional as F
from PIL import Image

def my_transform(tensor):
    image_size = 512
    top = 100
    tensor = F.crop(tensor, top, 0, 720-2*top, 1280)
    
    transform = transforms.Compose([
        transforms.Resize((image_size,image_size)),
        transforms.ToPILImage() 
    ])
    
    return transform(tensor)

In [6]:
labels = pd.DataFrame(columns = ['index','sample_id', 'weather', 'timing'])

file_idx = 0

for file_name in timm(crashed.sample_id.values.tolist()):
    raw_video_tensor = torchvision.io.read_video(f'datasets/train/{file_name}.mp4')[0] # 비디오 하나 가져옴
    
    weather_label = crashed.weather.iloc[np.where(crashed.sample_id == file_name)].values[0] # 날씨 
    timing_label = crashed.timing.iloc[np.where(crashed.sample_id == file_name)].values[0] # 낮/밤
    ratio = get_ratio(weather_label, timing_label)
    
    indices = np.arange(0,30,int(30/ratio)) # 앞에 3초를 ratio만큼 등분해서 뽑기
    for idx in indices:
        image_transformed = my_transform(raw_video_tensor[idx].permute(2,0,1))
        image_transformed.save(f'datasets/balancing/{str(file_idx).zfill(4)}.png', format='PNG')
        file_idx += 1
        labels.loc[len(labels)] = [str(file_idx).zfill(4), f'{file_name}', weather_label, timing_label]

labels

Unnamed: 0,index,sample_id,weather,timing
0,0001,TRAIN_0000,normal,day
1,0002,TRAIN_0001,normal,day
2,0003,TRAIN_0004,normal,day
3,0004,TRAIN_0006,snowy,day
4,0005,TRAIN_0006,snowy,day
...,...,...,...,...
1504,1505,TRAIN_2693,snowy,day
1505,1506,TRAIN_2693,snowy,day
1506,1507,TRAIN_2694,rainy,day
1507,1508,TRAIN_2694,rainy,day


In [7]:
labels.to_csv('datasets/balancing_labels.csv', index=False)

In [10]:
for col in labels.columns[2:]:
    print(col)
    print(labels[col].value_counts())
    print()

weather
normal    797
snowy     394
rainy     318
Name: weather, dtype: int64

timing
day      1042
night     467
Name: timing, dtype: int64

