In [2]:
import pandas as pd
import numpy as np
from collections import defaultdict
import ruptures as rpt  # our package
import pickle


def determine_drive_state_changed(timeseries: list, n_bkps: int = 3) -> list:
    timeseries = np.array(timeseries)
    timeseries = timeseries / np.linalg.norm(timeseries)
    algo = rpt.KernelCPD(kernel="rbf").fit(timeseries)

    # Detect breakpoints
    breakpoints = algo.predict(n_bkps=n_bkps)
    driver_state_changed = [
        any(i >= bp for bp in breakpoints) for i in range(len(timeseries))
    ]
    assert len(driver_state_changed) == len(timeseries)
    return driver_state_changed

## Driver change ensemble

In [9]:
# Create template dataframe for submission
with open("resources/annotations_public.pkl", "rb") as f:
    anns = pickle.load(f)
data = []
for video, video_data in anns.items():
    for frame, frame_data in video_data.items():
        data.append(
            {
                "ID": f"{video}_{frame}",
                "video": video,
                "frame": frame,
            }
        )
df = pd.DataFrame(data)


# Load predictions
df1 = pd.read_csv("./submissions/results_driverstate_alltracks_bboxsizes.csv")
df1["frame"] = df1["ID"].apply(lambda x: int(x.split("_")[-1]))
df1["video"] = df1["ID"].apply(lambda x: "_".join(x.split("_")[:2]))

df2 = pd.read_csv("./submissions/results_driverstate_alltracks_opticalflow.csv")
df2 = df2[["ID", "Driver_State_Changed"]]
df2["frame"] = df2["ID"].apply(lambda x: int(x.split("_")[-1]))
df2["video"] = df2["ID"].apply(lambda x: "_".join(x.split("_")[:2]))


# Create dict with index of first True value in Driver_State_Changed
data = defaultdict(list)
for i, group in df1.groupby(["video"]):
    vals = group.reset_index(drop=True)["Driver_State_Changed"]
    data[i].append(vals.idxmax())

for i, group in df2.groupby(["video"]):
    vals = group.reset_index(drop=True)["Driver_State_Changed"]
    data[i].append(vals.idxmax())


# Mean is the new True
idx = pd.DataFrame(data).T.mean(1).round().astype(int)
df_idx = idx.reset_index()
df_idx.columns = ["video", "frame"]
df_idx["change"] = True

df_new = pd.merge(
    df[["ID", "video", "frame"]], df_idx, how="left", on=["video", "frame"]
)
df_new["change"] = df_new["change"].fillna(False)
df_new["Driver_State_Changed"] = df_new.groupby("video")["change"].cumsum().astype(bool)

df_new = df_new[["ID", "Driver_State_Changed"]]
df_new.to_csv("./submissions/results_driverchange_ensemble.csv")

  for i, group in df1.groupby(['video']):
  for i, group in df2.groupby(['video']):


In [10]:
df1

Unnamed: 0,ID,Driver_State_Changed,Hazard_Track_0,Hazard_Name_0,Hazard_Track_1,Hazard_Name_1,Hazard_Track_2,Hazard_Name_2,Hazard_Track_3,Hazard_Name_3,...,Hazard_Track_19,Hazard_Name_19,Hazard_Track_20,Hazard_Name_20,Hazard_Track_21,Hazard_Name_21,Hazard_Track_22,Hazard_Name_22,frame,video
0,video_0001_0,False,2,,3,,4,,5,,...,,,,,,,,,0,video_0001
1,video_0001_1,False,2,,3,,4,,5,,...,,,,,,,,,1,video_0001
2,video_0001_2,False,2,,3,,4,,5,,...,,,,,,,,,2,video_0001
3,video_0001_3,False,2,,3,,4,,5,,...,,,,,,,,,3,video_0001
4,video_0001_4,False,2,,3,,4,,5,,...,,,,,,,,,4,video_0001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55765,video_0200_365,True,1,,0,,,,,,...,,,,,,,,,365,video_0200
55766,video_0200_366,True,1,,0,,,,,,...,,,,,,,,,366,video_0200
55767,video_0200_367,True,1,,0,,,,,,...,,,,,,,,,367,video_0200
55768,video_0200_368,True,1,,0,,,,,,...,,,,,,,,,368,video_0200
