# Longtype
* 차량 쌍(TV, LV) 기준으로 정리하기
* 왜냐하면, 각 쌍별로 SSM이 계산되기 때문임

# Import

In [2]:
import pandas as pd
import numpy as np

from tqdm import tqdm
import warnings
import os

from scipy import stats # Z-score를 이용한 이상값 제거

import math # arctangent; math.atan 사용 목적
import statistics

import pyarrow.parquet as pq

# Load Dataset

In [3]:
working_dir = 'D:/OneDrive/Projects/2023_SSM_Feasibility/Dataset'

In [4]:
data_folder = '02_processed'
data_name = 'LV_merging_type1_03frm_0.10sec.parquet'
data_path = os.path.join(working_dir, data_folder, data_name)

In [5]:
df_1 = pq.read_pandas(data_path).to_pandas()
df_1.rename({'Vehicle ID' : 'veh_id', 'frame' : 'frm',
             'Local X (m)_before' : 'local_x_before',
             'Local Y(m)_before' : 'local_y_before',
             'Lane Identification' : 'lane',
             'Lane Identification Past' : 'lane_past',
             'LV0_lane_record' : 'LV0_lane_record', 
             'LV0_lane_record_split' : 'LV0_Lane_record_split',
             'LV0_lane_change_direction' : 'LV0_Lane_change_direction',
             'LVL_lane_record' : 'LVL_Lane_record',
             'LVL_lane_record_split' : 'LVL_Lane_record_split',
             'LVL_lane_change_direction' : 'LVL_Lane_change_direction',
             'LVR_lane_record' : 'LVR_Lane_record',
             'LVR_lane_record_split' : 'LVR_Lane_record_split',
             'LVR_lane_change_direction' : 'LVR_Lane_change_direction',
            }, axis = 1, inplace = True)

In [6]:
df_1.head(3)

Unnamed: 0_level_0,veh_id,frm,Total Frames,Global Time (Epoch Time),local_x,local_y,V_len,V_wid,Vehicle Class,velocity,...,LVR_delta_velocity_y,LVR_acc_x,LVR_acc_y,LVR_Lane_record,LVR_Lane_record_split,LVR_Lane_change_direction,LV0_D,LVL_D,LVR_D,LC_CF
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1,339,39,46811300,51.0625,10.617188,5.289062,2.193359,2,87.625,...,,,,,,,,,,CF
1,1,342,42,46811400,53.53125,10.640625,5.289062,2.193359,2,89.0625,...,,,,,,,,,,CF
2,1,345,45,46811500,56.0,10.648438,5.289062,2.193359,2,88.25,...,,,,,,,,,,CF


# Longtype
* pairs 리스트의 튜플(TV, LV)에 따라서 각 차량의 위치, 속도, 가속도 등을 df로부터 프레임별로 뽑아오기

In [7]:
for df, i in tqdm(zip([df_1], [1])):

    df_LV0 = df[['veh_id', 'LV0_ID', 'frm']].copy()
    df_LV0 = df_LV0.rename({'LV0_ID' : 'LV_ID'}, axis = 1)
    df_LV0['LV_type'] = 'LV0'
    
    df_LVL = df[['veh_id', 'LVL_ID', 'frm']].copy()
    df_LVL = df_LVL.rename({'LVL_ID' : 'LV_ID'}, axis = 1)
    df_LVL['LV_type'] = 'LVL'

    df_LVR = df[['veh_id', 'LVR_ID', 'frm']].copy()
    df_LVR = df_LVR.rename({'LVR_ID' : 'LV_ID'}, axis = 1)
    df_LVR['LV_type'] = 'LVR'
    
    total_long = pd.concat([df_LV0, df_LVL, df_LVR])
    
    # LV_ID가 0인 것은 제거한다
    total_long = total_long[(total_long['LV_ID'] != 0) & (pd.isna(total_long['LV_ID']) == False)]
    total_long['LV_ID'] = total_long['LV_ID'].astype('int')
    total_long.reset_index(inplace = True, drop = True)
    
    globals()[f'long_{i}'] = total_long

1it [00:00, 67.74it/s]


In [8]:
veh_pair = long_1[['veh_id', 'LV_ID']].drop_duplicates()

In [9]:
veh_pair

Unnamed: 0,veh_id,LV_ID
0,2,1
13,4,2
25,5,4
84,8,2
102,10,6
...,...,...
65251,1616,1614
65299,1618,1617
65326,1618,1620
65373,1626,1629


In [12]:
def make_int(x):
    if pd.isna(x) == False:
        return int(x)

    else:
        return None

In [27]:
def RL0(lane, LV_lane):
    if pd.isna(lane) == False and pd.isna(LV_lane) == False:
        lane_num = int(lane[1:])
        LV_lane_num = int(LV_lane[1:])

        if lane_num == LV_lane_num:
            type_LV = 'LV0'
            
        elif lane_num < LV_lane_num:
            type_LV = 'LVR'
            
        elif lane_num > LV_lane_num:
            type_LV = 'LVL'

        else:
            type_LV = None

    else:
        type_LV = None

    return type_LV

In [28]:
warnings.filterwarnings('ignore')

target_df_cols = ['local_x', 'local_y', 'V_len', 'V_wid', 'velocity', 'acc', 'lane', 'Time',
                  'local_x_before', 'local_y_before', 'delta_local_x', 'delta_local_y',
                  'velocity_x', 'velocity_y', 'velocity_x_before', 'velocity_y_before', 'delta_velocity_x', 'delta_velocity_y',
                  'acc_x', 'acc_y', 'lane_past', 'Lane_record', 'Lane_record_split', 'Lane_00', 'Lane_99',
                  'Lane_change', 'Lane_leave', 'Lane_change_direction', 'LC_CF']

column_order = ['pair', 'veh_id', 'LV_ID', 'frm', 'LV_type', 'local_x', 'local_y', 'V_len', 'V_wid', 'velocity',
       'acc', 'lane', 'Time', 'local_x_before', 'local_y_before',
       'delta_local_x', 'delta_local_y', 'velocity_x', 'velocity_y',
       'velocity_x_before', 'velocity_y_before', 'delta_velocity_x',
       'delta_velocity_y', 'acc_x', 'acc_y', 'lane_past', 'Lane_record',
       'Lane_record_split', 'Lane_00', 'Lane_99', 'Lane_change', 'Lane_leave',
       'Lane_change_direction', 'LC_CF', 'LV_local_x',
       'LV_local_y', 'LV_len', 'LV_wid', 'LV_velocity', 'LV_acc', 'LV_lane',
       'LV_Time', 'LV_local_x_before', 'LV_local_y_before', 'LV_delta_local_x',
       'LV_delta_local_y', 'LV_velocity_x', 'LV_velocity_y',
       'LV_velocity_x_before', 'LV_velocity_y_before', 'LV_delta_velocity_x',
       'LV_delta_velocity_y', 'LV_acc_x', 'LV_acc_y', 'LV_lane_past',
       'LV_Lane_record', 'LV_Lane_record_split', 'LV_Lane_00', 'LV_Lane_99',
       'LV_Lane_change', 'LV_Lane_leave', 'LV_Lane_change_direction',
       'LV_LC_CF', 'D_x', 'D_y', 'D', 'D_gap']

save_folder = '02_processed_long'
save_files = ['merging_type1_03frm_0.10sec.parquet']


for long, i in zip([long_1], [1]):

    # 레퍼런스 데이터프레임
    reference_df = globals()[f'df_{i}'][['veh_id', 'frm'] + target_df_cols].copy() # Merge의 reference가 될 데이터프레임
    veh_list = list(reference_df['veh_id'].unique())

    long_tot = pd.DataFrame()
    
    for i in tqdm(range(len(veh_pair))): # 각 차량에 대하여
        row = veh_pair.iloc[i] # 차량 쌍
        veh_id = row['veh_id']
        LV_ID = row['LV_ID']
        #LV_type = row['LV_type'] #> 안됨! LV_type은 나중에 매겨주어야 한다.

        veh_TV = reference_df[reference_df['veh_id'] == veh_id].copy()
        veh_LV = reference_df[reference_df['veh_id'] == LV_ID].copy()

        # LV 데이터프레임 컬럼이름을 적절하게 변경
        for col in target_df_cols:
            veh_LV = veh_LV.rename({col : 'LV_'+col}, axis = 1)

        veh_TV['LV_ID'] = LV_ID
        #veh_TV['LV_type'] #> 안됨! LV_type은 나중에 매겨주어야 한다.
        veh_tot = pd.merge(veh_TV, veh_LV, how = 'left', left_on = ['LV_ID', 'frm'], right_on = ['veh_id', 'frm']).rename({'veh_id_x' : 'veh_id'}, axis = 1).drop(['veh_id_y'], axis = 1)
        veh_tot['pair'] = str(veh_id) + '_' + str(LV_ID)

        # LV_type 매기기
        veh_tot['LV_type'] = veh_tot.apply(lambda row: RL0(row['lane'], row['LV_lane']), axis = 1)

        long_tot = pd.concat([long_tot, veh_tot])

    long_tot = long_tot.reset_index(drop = True).sort_values(by = ['veh_id', 'LV_ID', 'frm']).rename({'LV_V_len' : 'LV_len', 'LV_V_wid' : 'LV_wid'}, axis = 1)

    #long_tot = pd.concat([long_TV, long_LV], axis = 1)

    # 거리관련 변수
    long_tot['D_x'] = long_tot['LV_local_x'] - long_tot['local_x']
    long_tot['D_y'] = long_tot['LV_local_y'] - long_tot['local_y']
    long_tot['D'] = (long_tot['D_x']**2 + long_tot['D_y']**2) ** (1/2)
    long_tot.reset_index(inplace = True, drop = True)
    
    long_tot['D_lag'] = long_tot['D'].shift(1) # 직전 row의 값임
    long_tot['veh_id'] = long_tot['veh_id'].astype('int')
    long_tot['LV_ID'] = long_tot['LV_ID'].apply(make_int)
    long_tot['frm'] = long_tot['frm'].astype('int')
    
    long_tot['veh_id_lag'] = long_tot['veh_id'].shift(1) # 직전 row 값
    long_tot['LV_ID_lag'] = long_tot['LV_ID'].shift(1) # 직전 row 값
    
    long_tot['D_gap'] = long_tot['D'] - long_tot['D_lag'] # 거리 변화. (-)이면 줄어들고 있다는 뜻이다
    
    # 만약 veh_id, LV_id 의 lag값이 현재 row와 다르다면, 새로운 값이 시작된 것이므로 D_gap을 NaN으로 만들어줘야 한다.
    long_tot.loc[(long_tot['veh_id'] != long_tot['veh_id_lag']) | (long_tot['LV_ID'] != long_tot['LV_ID_lag']), 'D_gap'] = None
        
    #print(f'{i} : {len(long_final)}')
    long_tot.drop(['D_lag', 'veh_id_lag', 'LV_ID_lag'], axis = 1, inplace = True)
    long_tot = long_tot[column_order]
    long_tot = long_tot.sort_values(by = 'pair').reset_index(drop = True)
    
    # 저장하기
    save_path = os.path.join(working_dir, save_folder, save_files[0])
    long_tot.to_parquet(save_path, engine = 'fastparquet', compression = 'gzip') 

100%|██████████████████████████████████████████████████████████████████████████████| 2005/2005 [00:59<00:00, 33.55it/s]
