# # 데이터 분할
---

In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('/data/NIA48/raw/sync_cliplist_static-split-nas_20230421-123546.csv', encoding='utf-8', index_col=0)
df = df[['클립명', '수집일', 'weather', 'road_type', '최종선정객체', '프레임 구분']]
df

Unnamed: 0,클립명,수집일,weather,road_type,최종선정객체,프레임 구분
0,A_Clip_00073_11,2022-06-28,Fog,Highway,중앙분리대,100
1,A_Clip_00083_11,2022-06-28,Fog,Highway,중앙분리대,100
2,A_Clip_00084_11,2022-06-28,Fog,Highway,중앙분리대,100
3,A_Clip_00105_11,2022-06-28,Fog,Highway,중앙분리대,100
4,A_Clip_00109_11,2022-06-28,Fog,Highway,중앙분리대,100
...,...,...,...,...,...,...
18063,S_Clip_41456_17,2022-11-10,Clear,Route,표지판,34
18064,S_Clip_41486_17,2022-11-10,Clear,Route,표지판,100
18065,S_Clip_41503_17,2022-11-10,Clear,Route,표지판,65
18066,S_Clip_42487_17,2022-11-11,Clear,Expressway,표지판,31


In [3]:
# normal
df_n = df.loc[df['weather'] == 'Clear'].reset_index(drop=True)
df_ngr = df_n.set_index(['road_type', '수집일', '최종선정객체'])[['클립명']]


# abnormal
df_an = df.loc[df['weather'] != 'Clear'].reset_index(drop=True)
df_angr = df_an.set_index(['road_type', '수집일', '최종선정객체'])[['클립명']]

display(df_n.head(), df_an.head())

Unnamed: 0,클립명,수집일,weather,road_type,최종선정객체,프레임 구분
0,A_Clip_00373_11,2022-07-04,Clear,Highway,중앙분리대,100
1,A_Clip_00374_11,2022-07-04,Clear,Highway,중앙분리대,100
2,A_Clip_01496_11,2022-07-15,Clear,Expressway,중앙분리대,100
3,A_Clip_02904_11,2022-09-03,Clear,Highway,중앙분리대,100
4,A_Clip_02907_11,2022-09-03,Clear,Highway,중앙분리대,100


Unnamed: 0,클립명,수집일,weather,road_type,최종선정객체,프레임 구분
0,A_Clip_00073_11,2022-06-28,Fog,Highway,중앙분리대,100
1,A_Clip_00083_11,2022-06-28,Fog,Highway,중앙분리대,100
2,A_Clip_00084_11,2022-06-28,Fog,Highway,중앙분리대,100
3,A_Clip_00105_11,2022-06-28,Fog,Highway,중앙분리대,100
4,A_Clip_00109_11,2022-06-28,Fog,Highway,중앙분리대,100


In [4]:
# train, val, test split (normal)

train_ngr = []
val_ngr = []
test_ngr = []
for i in list(set(df_ngr.index)):
    scenes = df_ngr.loc[i]['클립명'].values

    if len(df_ngr.loc[i]) > 7:
        train_test, val = train_test_split(scenes, test_size=1/10, random_state=44)
        train, test = train_test_split(train_test, test_size=1/9, random_state=44)

        train_ngr.extend(train)
        val_ngr.extend(val)
        test_ngr.extend(test)

    else:
        train_ngr.extend(scenes)


# with open('train_normal.txt', 'w') as f:
#     f.write('\n'.join(sorted(train_ngr)))

# with open('val_normal.txt', 'w') as f:
#     f.write('\n'.join(sorted(val_ngr)))

# with open('test_normal.txt', 'w') as f:
#     f.write('\n'.join(sorted(test_ngr)))

len(train_ngr), len(val_ngr), len(test_ngr)

(6918, 874, 849)

In [14]:
df_n.set_index('클립명').loc[test_ngr]['최종선정객체'].value_counts()

최종선정객체
표지판      213
터널 입구    198
방음벽       92
램프        92
육교        90
중앙분리대     90
가로수       74
Name: count, dtype: int64

In [15]:
# train, val, test split (abnormal)

train_angr = []
val_angr = []
test_angr = []
for i in list(set(df_angr.index)):
    scenes = df_angr.loc[i]['클립명'].values

    if len(df_angr.loc[i]) > 7:
        train_test, val = train_test_split(scenes, test_size=1/10, random_state=44)
        train, test = train_test_split(train_test, test_size=1/9, random_state=44)

        train_angr.extend(train)
        val_angr.extend(val)
        test_angr.extend(test)

    else:
        train_angr.extend(scenes)

# with open('train_abnormal.txt', 'w') as f:
#     f.write('\n'.join(sorted(train_angr)))

# with open('val_abnormal.txt', 'w') as f:
#     f.write('\n'.join(sorted(val_angr)))

# with open('test_abnormal.txt', 'w') as f:
#     f.write('\n'.join(sorted(test_angr)))

len(train_angr), len(val_angr), len(test_angr)

(7533, 955, 939)

In [194]:
# 프레임 개수 확인

train_num = 0
for i in train_angr:
    frame_num = df.loc[df['클립명'] == i, '프레임 구분'].values
    train_num += frame_num

val_num = 0
for i in val_angr:
    frame_num = df.loc[df['클립명'] == i, '프레임 구분'].values
    val_num += frame_num

test_num = 0
for i in test_angr:
    frame_num = df.loc[df['클립명'] == i, '프레임 구분'].values
    test_num += frame_num

train_num, val_num, test_num

# # 데이터 비율 맞추기
---

In [2]:
import pickle 

with open('/data/kimgh/CenterPoint-custom/CenterPoint-static/data/selectsub5/infos_train_filter_True_radar.pkl', 'rb') as f:
    train = pickle.load(f)

with open('/data/kimgh/CenterPoint-custom/CenterPoint-static/data/selectsub5/infos_val_filter_True_radar.pkl', 'rb') as f:
    val = pickle.load(f)

In [4]:
import pickle
from collections import Counter

with open('/data/kimgh/CenterPoint-custom/CenterPoint-static/data/selectsub5/infos_train_filter_True_radar.pkl', 'rb') as f:
    train = pickle.load(f)

names = []
for i in train:
    i['gt_names']
    names.extend(i['gt_names'])

Counter(names)

Counter({'sound_barrier': 46449,
         'median_strip': 46006,
         'street_trees': 43900,
         'road_sign': 14144,
         'overpass': 8811,
         'ramp_sect': 6511,
         'tunnel': 4539})

In [26]:
import re

clip_name = re.findall('[A-Z]_Clip_[0-9]+_[0-9]+', lidar)[0]
clip_name

'A_Clip_00351_11'

In [27]:
train

[{'lidar_path': '/data/kimgh/CenterPoint-custom/CenterPoint-static/data/selectsub5/train/source/normal/11/A_Clip_00351_11/Radar/RadarFront/2-048_00351_RF_012.pcd',
  'cam_front_path': '/data/kimgh/CenterPoint-custom/CenterPoint-static/data/selectsub5/train/source/normal/11/A_Clip_00351_11/Camera/CameraFront/blur/2-048_00351_CF_012.png',
  'anno_path': '/data/kimgh/CenterPoint-custom/CenterPoint-static/data/selectsub5/train/label/normal/11/A_Clip_00351_11/result/2-048_00351_FC_012.json',
  'calib_path': '/data/kimgh/CenterPoint-custom/CenterPoint-static/data/selectsub5/train/source/normal/11/A_Clip_00351_11/calib/Lidar_radar_calib/2-048_00351_LRC_RF.txt',
  'token': '/data/kimgh/CenterPoint-custom/CenterPoint-static/data/selectsub5/train/source/normal/11/A_Clip_00351_11/Radar/RadarFront/2-048_00351_RF_012',
  'sweeps': [],
  'gt_boxes': array([[17.21344004, 15.76794624, -0.872     ,  1.672     ,  1.376     ,
           2.656     ,  0.        ,  0.        , -1.57079633],
         [22.635

In [39]:
import json

with open('/data/NIA48/raw/val/label/normal/16/A_Clip_13796_16/result/2-048_13796_FC_001.json') as f:
    data = json.load(f)

data

{'frame_no': 1,
 'annotation': [{'id': 1,
   'category': 'ROAD_SIGN',
   'obj_type': 1,
   'atypical_yn': 'n',
   '3d_box': [{'sub_id': -1,
     'location': [57.64591972623812, 2.5837684236978746, 4.510000000000003],
     'dimension': [3.8400000000000083, 3.42, 0.24000000000002394],
     'rotation_y': -0.08,
     'radar_point_count': 0,
     'lidar_point_count': 105,
     '2d_box': [898.1742127667358,
      507.80603390921624,
      71.19195119944197,
      63.5757227868628],
     '2d_area': 4526.079754111587}],
   'camera_visibility': 'y',
   '2d_polygon': []},
  {'id': 2,
   'category': 'ROAD_SIGN',
   'obj_type': 1,
   'atypical_yn': 'n',
   '3d_box': [{'sub_id': -1,
     'location': [57.19523611488736, -2.707827140332836, 5.250000000000002],
     'dimension': [3.720000000000002, 3.900000000000002, 0.3600000000000004],
     'rotation_y': -0.08,
     'radar_point_count': 5,
     'lidar_point_count': 135,
     '2d_box': [996.5034780863277,
      493.31917349595966,
      69.8418131822

In [69]:
annotations = data['annotation']

In [70]:
choice_idx = []
for idx, ann in enumerate(annotations):
    if ann['category'] == 'ROAD_SIGN':
        choice_idx.append(idx)
# remove_idx = list(set(remove_idx))
# remove_idx.sort(reverse=True)
# for i in remove_idx:
#     annotations.pop(i)
# annotations = np.delete(annotations, choice_idx).tolist()
annotations = np.asarray(annotations)[choice_idx].tolist()
annotations

[{'id': 1,
  'category': 'ROAD_SIGN',
  'obj_type': 1,
  'atypical_yn': 'n',
  '3d_box': [{'sub_id': -1,
    'location': [57.64591972623812, 2.5837684236978746, 4.510000000000003],
    'dimension': [3.8400000000000083, 3.42, 0.24000000000002394],
    'rotation_y': -0.08,
    'radar_point_count': 0,
    'lidar_point_count': 105,
    '2d_box': [898.1742127667358,
     507.80603390921624,
     71.19195119944197,
     63.5757227868628],
    '2d_area': 4526.079754111587}],
  'camera_visibility': 'y',
  '2d_polygon': []},
 {'id': 2,
  'category': 'ROAD_SIGN',
  'obj_type': 1,
  'atypical_yn': 'n',
  '3d_box': [{'sub_id': -1,
    'location': [57.19523611488736, -2.707827140332836, 5.250000000000002],
    'dimension': [3.720000000000002, 3.900000000000002, 0.3600000000000004],
    'rotation_y': -0.08,
    'radar_point_count': 5,
    'lidar_point_count': 135,
    '2d_box': [996.5034780863277,
     493.31917349595966,
     69.84181318225365,
     73.0490778902689],
    '2d_area': 5101.8800511480

In [55]:
len(annotations)

3