In [1]:
import json

import numpy as np
import polars as pl

In [2]:
train = pl.read_csv("/kaggle/input/atmaCup#18_dataset/train_features.csv")
train = (
    train.with_columns(
        pl.col("ID").str.split_exact("_", n=1).struct.rename_fields(["sceneID", "offset"]).alias("fields")
    )
    .unnest("fields")
    .with_columns(pl.col("offset").cast(pl.Float32))
)
print(train.shape)
train.head()

(43371, 32)


ID,vEgo,aEgo,steeringAngleDeg,steeringTorque,brake,brakePressed,gas,gasPressed,gearShifter,leftBlinker,rightBlinker,x_0,y_0,z_0,x_1,y_1,z_1,x_2,y_2,z_2,x_3,y_3,z_3,x_4,y_4,z_4,x_5,y_5,z_5,sceneID,offset
str,f64,f64,f64,f64,f64,bool,f64,bool,str,bool,bool,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,f32
"""00066be8e20318869c38c66be46663…",5.701526,1.538456,-2.165777,-139.0,0.0,False,0.25,True,"""drive""",False,False,2.82959,0.032226,0.045187,6.231999,0.065895,0.107974,9.785009,0.124972,0.203649,13.485472,0.163448,0.302818,17.574227,0.174289,0.406331,21.951269,0.199503,0.485079,"""00066be8e20318869c38c66be46663…",320.0
"""00066be8e20318869c38c66be46663…",11.176292,0.279881,-11.625697,-44.0,0.0,False,0.0,False,"""drive""",False,True,4.970268,-0.007936,0.005028,10.350489,-0.032374,-0.020701,15.770054,0.084073,0.008645,21.132415,0.391343,0.036335,26.316489,0.843124,0.065,31.383814,1.42507,0.073083,"""00066be8e20318869c38c66be46663…",420.0
"""00066be8e20318869c38c66be46663…",10.472548,0.231099,-2.985105,-132.0,0.0,False,0.18,True,"""drive""",False,False,4.815701,-0.000813,0.017577,10.153522,-0.0278,0.026165,15.446539,-0.155987,0.040397,20.61816,-0.356932,0.058765,25.677387,-0.576985,0.102859,30.460033,-0.841894,0.152889,"""00066be8e20318869c38c66be46663…",520.0
"""000fb056f97572d384bae4f5fc1e0f…",6.055565,-0.117775,7.632668,173.0,0.0,False,0.0,False,"""drive""",False,False,2.812608,0.033731,0.0059,5.975378,0.137848,0.01621,9.186793,0.322997,0.031626,12.37311,0.603145,0.031858,15.703514,0.960717,0.043479,19.311182,1.374655,0.058754,"""000fb056f97572d384bae4f5fc1e0f…",120.0
"""000fb056f97572d384bae4f5fc1e0f…",3.316744,1.276733,-31.725477,-114.0,0.0,False,0.255,True,"""drive""",False,False,1.55186,-0.041849,-0.008847,3.675162,-0.125189,-0.013725,6.113567,-0.239161,-0.012887,8.770783,-0.381813,-0.003898,11.619313,-0.554488,0.011393,14.657048,-0.7788,0.044243,"""000fb056f97572d384bae4f5fc1e0f…",20.0


In [3]:
from tqdm import tqdm

df = []
for ID in tqdm(train["ID"].unique()):
    path = "/kaggle/input/atmaCup#18_dataset/traffic_lights/" + ID + ".json"
    with open(path) as f:
        data = json.load(f)

    # bboxの中央が一番中心に近いものを選択
    min_distance = 1000
    min_bbox = None
    for bbox in data:
        left, top, right, bottom = bbox["bbox"]
        x = (left + right) / 2
        distance = abs(64 - x)
        if distance < min_distance:
            min_distance = distance
            min_bbox = bbox

    if min_bbox is not None:
        dic = {
            "ID": ID,
            "traffic_class": min_bbox["class"],
            "traffic_left": min_bbox["bbox"][0],
            "traffic_top": min_bbox["bbox"][1],
            "traffic_right": min_bbox["bbox"][2],
            "traffic_bottom": min_bbox["bbox"][3],
            "traffic_x": (min_bbox["bbox"][0] + min_bbox["bbox"][2]) / 2,
            "traffic_y": (min_bbox["bbox"][1] + min_bbox["bbox"][3]) / 2,
            "traffic_width": min_bbox["bbox"][2] - min_bbox["bbox"][0],
            "traffic_height": min_bbox["bbox"][3] - min_bbox["bbox"][1],
            "traffic_aspect_ratio": (min_bbox["bbox"][2] - min_bbox["bbox"][0])
            / (min_bbox["bbox"][3] - min_bbox["bbox"][1]),
            "traffic_size": (min_bbox["bbox"][2] - min_bbox["bbox"][0]) * (min_bbox["bbox"][3] - min_bbox["bbox"][1]),
            "traffic_count": len(data),
        }
        df.append(dic)
df = pl.DataFrame(df)

  0%|          | 0/43371 [00:00<?, ?it/s]

100%|██████████| 43371/43371 [00:05<00:00, 7381.96it/s]


In [4]:
df

ID,traffic_class,traffic_left,traffic_top,traffic_right,traffic_bottom,traffic_x,traffic_y,traffic_width,traffic_height,traffic_aspect_ratio,traffic_size,traffic_count
str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64
"""151ad8af335e13aea1b78457e8981a…","""red""",61.312279,11.054235,62.241833,11.549029,61.777056,11.301632,0.929554,0.494794,1.878669,0.459938,1
"""68d18d5eac76dc9ebb53abeac90fe4…","""red""",98.159027,0.364187,101.745415,1.787217,99.952221,1.075702,3.586388,1.42303,2.520247,5.103537,1
"""9f96b054204fc3e920c3bdbb79524c…","""red""",59.883232,0.064853,63.007294,0.981673,61.445263,0.523263,3.124062,0.916819,3.4075,2.8642,2
"""62e0f968295c01cd961afb508476aa…","""green""",62.902729,2.260682,66.143303,3.58838,64.523016,2.924531,3.240574,1.327697,2.440748,4.302501,3
"""cc5dcd9c428a8ea7f7e1bd18167279…","""green""",27.388376,2.244328,29.750156,3.332453,28.569266,2.788391,2.36178,1.088126,2.170503,2.569914,4
…,…,…,…,…,…,…,…,…,…,…,…,…
"""bd3bec73628aecdfde2423fba116c5…","""empty""",95.072571,2.871063,101.936646,5.449689,98.504608,4.160376,6.864075,2.578627,2.661911,17.699886,1
"""b8593dd7e1fa3c6eb67f47fa23ce54…","""red""",72.196838,2.09381,74.722435,3.359355,73.459637,2.726583,2.525597,1.265546,1.995658,3.196258,1
"""97f01fd21b484819f03eae93451e57…","""green""",63.668262,4.259882,66.181068,5.514286,64.924665,4.887084,2.512806,1.254404,2.003188,3.152073,2
"""55e4c6312f53cf8a4a35adc004bdbc…","""empty""",65.605865,2.881765,68.124924,3.8708,66.865395,3.376283,2.519058,0.989036,2.546984,2.491439,1


In [5]:
df.write_csv("traffic_lights.csv")