In [None]:
try:
    import zarr
except ModuleNotFoundError:
    !pip install --use-feature=2020-resolver zarr > /dev/null
try:
    import pytorch_lightning
except ModuleNotFoundError:
    !pip install  --use-feature=2020-resolver pytorch-lightning==0.8.4  > /dev/null

In [None]:
import os,sys
sys.path.insert(0, "../input/best-submission/src")

In [None]:
import models.pointnet as pointnet
# from data.dataset import CustomLyftDataset_V5 as CustomLyftDataset
from training.trainer import get_last_checkpoint, BaseLightningModule
from training.configs.base import GenericConfig
from data.dataset import collate_V5 as collate, CustomLyftDataset_V7 as CustomLyftDataset

In [None]:
from pathlib import Path
import zarr
from tqdm.notebook import tqdm
import numpy as np, pandas as pd
import torch, torch.nn as nn
from torch.utils.data import DataLoader

In [None]:
import bisect, yaml
import itertools as it

In [None]:
from joblib import Parallel, delayed
pool = Parallel(3)

In [None]:
DATA_ROOT = Path(r"../input/lyft-motion-prediction-autonomous-vehicles")
ZARR_PATH = Path("scenes/test.zarr")
# MODEL_PATH = r"../input/kkiller-lyft-checkpoints/model_epoch_17.ckpt"
HPARAM_PATH = r"../input/best-submission/hparams.yaml"
print("DATA_ROOT: {}\nZARR_PATH: {}".format(DATA_ROOT, ZARR_PATH))

In [None]:
%%time

mask = np.load(DATA_ROOT/"scenes/mask.npz")['arr_0']
mask

In [None]:
agent_ids = np.where(mask)[0]
agent_ids

In [None]:
with open(HPARAM_PATH) as f:
    hparams = yaml.load(f, Loader=yaml.SafeLoader)
hparams

In [None]:
hparams["data_options"]["train"]["kwargs"].keys()

In [None]:
HBACKWARD = hparams["data_options"]["train"]["kwargs"]["hbackward"]
FRAME_STRIDE = hparams["data_options"]["train"]["kwargs"]["frame_stride"]
AGENT_FEATURE_DIM = hparams["data_options"]["train"]["kwargs"]["agent_feature_dim"]
MAX_AGENTS = hparams["data_options"]["train"]["kwargs"]["max_agents"]
HBACKWARD,FRAME_STRIDE, AGENT_FEATURE_DIM, MAX_AGENTS

In [None]:
hparams["model_options"]["kwargs"]

In [None]:
HFORWARD = 0
NFRAMES = 1

In [None]:
z = zarr.open(DATA_ROOT.joinpath(ZARR_PATH).as_posix())

In [None]:
frames_ij = z.scenes["frame_index_interval"]

In [None]:
agents_ij = z.frames["agent_index_interval"]

In [None]:
def get_scene(agent_id):
    frame_id = bisect.bisect_right(agents_ij[:, 0], agent_id)-1
    scene_id = bisect.bisect_right(frames_ij[:, 0], frame_id)-1
    
#     print(scene_id,frame_id,agent_id)
    
    scene = z.scenes[scene_id]
    frame = z.frames[frame_id]
    agent = z.agents[agent_id]
    return scene,(frame,frame_id+1),agent

In [None]:
# from multiprocessing import Pool
# pool = Pool(2)

In [None]:
# %%time

# scene_data = pool.map(get_scene, agent_ids)
# len(scene_data)

In [None]:
agent_id = agent_ids[np.random.choice(len(agent_ids))]
scene,(frame,frame_id), agent = get_scene(agent_id)

In [None]:
scene

In [None]:
agent["track_id"]

In [None]:
scene.dtype

In [None]:
frame.dtype

In [None]:
agent.dtype

In [None]:
dt = CustomLyftDataset(
                    z,
#                     scenes = [s[0] for s in scene_data],
                    nframes=NFRAMES,
                    frame_stride=FRAME_STRIDE,
                    hbackward=HBACKWARD,
                    hforward=HFORWARD,
                    max_agents=MAX_AGENTS,
                    agent_feature_dim=AGENT_FEATURE_DIM,
                )
dt.nread_frames

In [None]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
model_name = "ListStackedPointNetWithProjection_V3"
model_options = GenericConfig(
    "model",
    instance_name=model_name,
    kwargs=hparams["model_options"]["kwargs"],
    context={"model": getattr(pointnet, model_name)},
)
model_options

In [None]:
def load_model(path):
    model = BaseLightningModule(model_options).to(DEVICE)
    print(model.load_state_dict(torch.load(path, map_location=DEVICE)["state_dict"]))
    model.eval()
    return model

In [None]:
paths = [
    "../input/best-submission/model_epoch15.ckpt",
    "../input/best-submission/model_epoch45.ckpt",
    "../input/best-submission/model_epoch46.ckpt",
    "../input/best-submission/model_epoch47.ckpt",
    "../input/best-submission/model_epoch50.ckpt",
    "../input/best-submission/model_epoch51.ckpt",
    "../input/best-submission/model_epoch52.ckpt",
    "../input/best-submission/model_epoch53.ckpt",
    "../input/best-submission/model_epoch54.ckpt",
    "../input/best-submission/model_epoch55 (1).ckpt",
    "../input/best-submission/model_epoch56.ckpt",
    "../input/best-submission/model_epoch57.ckpt",
    "../input/best-submission/model_epoch58.ckpt",
    "../input/best-submission/model_epoch63.ckpt",
    "../input/best-submission/model_epoch73.ckpt",
    "../input/best-submission/model_epoch89.ckpt",
    "../input/best-submission/model_epoch95.ckpt",
    "../input/best-submission/model_epoch99.ckpt"
    
    
]
weights = np.array([1,1,1,2,2,3,3,3,4,4,4,4,9,9,9,27,27,27], dtype=np.float32)
weights /= weights.sum()
print(weights)
nets = [ load_model(path) for path in paths]
assert len(paths) == len(weights) == len(nets)
nets[0]

In [None]:
def make_colnames():
    cols= list(
        it.chain(*[
    [f"coord_x0{i}", f"coord_y0{i}", f"coord_x1{i}", f"coord_y1{i}", f"coord_x2{i}", f"coord_y2{i}"]
    for i in range(50)]))
    return ["timestamp", "track_id"] + ["conf_0", "conf_1", "conf_2"] + cols

In [None]:
def get_agent_data(agent_id):
    scene, (frame,frame_id), agent = get_scene(agent_id)
    X, X_light, _, _, le = dt.read_frames(
            scene=scene,
            start=frame_id-HBACKWARD,
            white_tracks=[agent["track_id"]],
            encoder=True
    )
    
    X_track_id = np.array([agent["track_id"]], dtype=np.int64)
    X_track = np.array([le.labels[agent["track_id"]]], dtype=np.int64)
    X_timestamp = np.array([frame["timestamp"]], dtype=np.int64)
    return X, X_light, X_track, X_track_id, X_timestamp
    
    
def collate(inp):
    inp = zip(*inp)
    inp = map(np.concatenate, inp)
    inp = map(torch.from_numpy, inp)
    inp = tuple(inp)
    return inp


def get_agents_data(agent_ids):
    mapper = delayed(get_agent_data)
#     jobs = map(mapper, agent_ids)
    jobs = [mapper(agent_id) for agent_id in agent_ids]
    return collate(pool(tqdm(jobs, leave=False)))

In [None]:
# get_agent_data(agent_ids[0])

In [None]:
x, x_light, x_track,x_track_id, x_timestamp = get_agents_data(agent_ids[:10])
x.shape, x_light.shape, x_track.shape, x_track_id.shape

In [None]:
@torch.no_grad()
def predict_one_shot(x, x_light, x_track):
    assert len(x) == len(x_light) == len(x_track)
    
    x, x_light, x_track = x.to(DEVICE), x_light.to(DEVICE), x_track.to(DEVICE)
    
    try:
        c,y =  [],[]
        for net in nets:
            _c, _y = net((x, x_light))
            
            _c = torch.softmax(_c[torch.arange(len(_c)), x_track], dim=1).cpu().numpy()
            _y = _y[torch.arange(len(_c)), x_track].cpu().numpy()
            
            c.append(_c)
            y.append(_y)
        
        c = np.stack(c)
        y = np.stack(y)
        
        c = np.sum(c*weights[:, None, None], axis=0)
        y = np.sum(y*weights[:, None, None], axis=0)
        
    except Exception as e:
        c = np.ones((len(x), 3))/3.
        y = np.zeros((len(x), 300))
#         print(f'Exception : {e}')
        raise ValueError() from e
    return c,y



@torch.no_grad()
def predict(agent_ids, batch_size=200):
    c, y = [], []
    x_track_id, x_timestamp = [], []
    for i in tqdm(list(range(0, len(agent_ids), batch_size))):
        x, x_light, x_track, _x_track_id, _x_timestamp = get_agents_data(agent_ids[i:i+batch_size])
        
        _c, _y = predict_one_shot(x, x_light, x_track)
        c.append(_c)
        y.append(_y)
        x_track_id.append(_x_track_id)
        x_timestamp.append(_x_timestamp)
        
    c = np.vstack(c)
    y = np.vstack(y)
    
    x_track_id = np.concatenate(x_track_id)
    x_timestamp = np.concatenate(x_timestamp)
    
    return x_timestamp, x_track_id, c, y

In [None]:
timestamp, track_id, c, y = predict(agent_ids, batch_size=2000)
timestamp.shape, track_id.shape, c.shape, y.shape

In [None]:
pd.options.display.max_columns=305

In [None]:
y.shape

In [None]:
c.shape

In [None]:
%%time

cols = make_colnames()
df = pd.DataFrame(columns=cols)
df[cols[0]] = timestamp
df[cols[1]] = track_id
df[cols[2:5]] = c
df[cols[5:]] = y
sel = ~(df[cols[5:]]==0).all(1)
df = df[sel]
df.shape

In [None]:
df.head(20)

In [None]:
xcols = list(it.chain(*[[f"coord_x0{i}", f"coord_x1{i}", f"coord_x2{i}"] for i in range(50)]))
df[["timestamp", "track_id", 'conf_0', 'conf_1', 'conf_2',]+xcols].sample(20)

In [None]:
df.to_csv("submission.csv", index=False)

In [None]:
xpred = y[:, ::2].reshape(y.shape[0], -1, 3)
xpred[sel][:3, :10].round(3)

In [None]:
import shutil
shutil.copy(HPARAM_PATH, "hparams.yaml")