In [None]:
from glob import glob
import numpy as np
import pandas as pd
import math
import scipy.interpolate
import scipy.sparse
from scipy.sparse import linalg
from tqdm import tqdm
from joblib import Parallel,delayed
import yaml

from scipy.spatial import distance
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_colwidth', None)

In [None]:
def compute_l2dist(sub1, sub2):
    l2dist = np.sqrt((sub1.x.values-sub2.x.values)**2 +(sub1.y.values-sub2.y.values)**2)
    return np.mean(l2dist)

***

In [None]:
#submission = pd.read_csv("../input/iln-cnn1d-dset2/cnn1d-ds2-30f-5lt-30seq/submission.csv")
sub_cnn1d = pd.read_csv("../input/iln-cnn1d-dset2/cnn1d-ds2-30f-5lt-30seq-pl/submission.csv")
sub_tabnet = pd.read_csv("../input/iln-tabnet-dset2/tabnet-ds2-30f-5lt-30seq-pl/submission.csv")
sub_lgbm = pd.read_csv("../input/iln-lgbm-dset2/submission.csv")

In [None]:
submission = sub_cnn1d.copy(deep=True)
weights = [0.195101, 0.25560842, 0.54929058]
print(np.sum(weights))

submission["x"] = weights[0]*sub_cnn1d["x"] + weights[1]*sub_tabnet["x"] + weights[2]*sub_lgbm["x"]
submission["y"] = weights[0]*sub_cnn1d["y"] + weights[1]*sub_tabnet["y"] + weights[2]*sub_lgbm["y"]

assert submission[["x","y"]].isna().sum(axis=0).sum() == 0

submission.to_csv("./submission.csv", index=False)
submission

In [None]:
submission["site"] = submission.site_path_timestamp.apply(lambda x: x.split("_")[0])
submission["path"] = submission.site_path_timestamp.apply(lambda x: x.split("_")[1])
submission["timestamp"] = submission.site_path_timestamp.apply(lambda x: x.split("_")[2]).astype(int)
submission

In [None]:
target_sites = submission.site.unique()
deltas = pd.read_csv("../input/iln-imu-predictions/delta_preds_test.csv")

In [None]:
floor_map = {
    "B2":-2, "B1":-1, 
    "F1":0, "F2":1, "F3":2, "F4":3, "F5":4, "F6":5, "F7":6, "F8":7, "F9":8,
    "1F":0, "2F":1, "3F":2, "4F":3, "5F":4, "6F":5, "7F":6, "8F":7, "9F":8
}
waypoints = pd.read_csv("../input/iln-waypoints/waypoints.csv")
waypoints["floor"] = waypoints["floor"].map(floor_map)
waypoints = waypoints.loc[:,["site","floor","x","y"]].drop_duplicates(ignore_index=True)

In [None]:
#waypoints_aug = pd.read_csv("../input/indoor-navigation-hand-labeled-waypoints/waypoint_by_hand.csv")
#waypoints = pd.concat([waypoints, waypoints_aug], axis=0, ignore_index=True)
#waypoints

In [None]:
submission.rename({"floor":"_floor"}, axis=1, inplace=True)
waypoints.rename({"floor":"_floor"}, axis=1, inplace=True)

In [None]:
leaks = pd.read_csv("../input/ilnleaksbackup/leaks.csv")
leaks

In [None]:
# assert that floors are correct
floors = pd.read_csv("../input/iln-floor-corrections/floor_predictions.csv")
floors.rename({"floor":"floor_corr"}, axis=1, inplace=True)
mrg = pd.merge(submission, floors, how="inner", on=["site","path"])
assert (mrg["_floor"] == mrg["floor_corr"]).all()

***
### iterative correction

In [None]:
def plot_subs(sub_raw, sub_corr, waypoints):
    site = sub_raw.site.values[0]
    floor = sub_raw._floor.values[0]

    x_min = math.ceil(sub_raw.x.min() - 15)
    x_max = math.floor(sub_raw.x.max() + 15)
    y_min = math.ceil(sub_raw.y.min() - 15)
    y_max = math.floor(sub_raw.y.max() + 15)
    near_wps = waypoints.query("site==@site & _floor==@floor").query("@x_min <= x <= @x_max").query("@y_min <= y <= @y_max")

    print(f"\n{path}")
    plt.figure(figsize=(20,9))
    plt.subplot(1,2,1)
    plt.plot(sub_raw.x, sub_raw.y, "o--", label="pred")
    plt.grid()
    plt.legend(loc="best")
    plt.scatter(near_wps.x, near_wps.y, marker="*", s=150, c="r")
    plt.xlabel("x")
    plt.ylabel("y")
    plt.xticks(np.arange(x_min, x_max+1, 5.0))
    plt.yticks(np.arange(y_min, y_max+1, 5.0))

    plt.subplot(1,2,2)
    plt.plot(sub_corr.x, sub_corr.y, "o--", label="pred")
    plt.grid()
    plt.legend(loc="best")
    plt.scatter(near_wps.x, near_wps.y, marker="*", s=150, c="r")
    plt.xlabel("x")
    plt.ylabel("y")
    plt.xticks(np.arange(x_min, x_max+1, 5.0))
    plt.yticks(np.arange(y_min, y_max+1, 5.0))

    plt.show()

In [None]:
def add_closest_waypoint(preds, waypoints):
    preds = preds.copy()
    if 'x_wp' in preds.columns:
        preds = preds.drop("x_wp", axis=1)
    if 'y_wp' in preds.columns:
        preds = preds.drop("y_wp", axis=1)
        
    dist_matrix = distance.cdist(preds[["x","y"]], waypoints[["x","y"]].values)
    snap_idx = preds.query("snap").index.values

    n = len(preds)
    closest_idx = list()
    closest_dist = list()

    for i in range(n):
        if i in snap_idx:
            c_idx = np.argmin(dist_matrix[i,:])
            closest_idx.append(c_idx)
            closest_dist.append(dist_matrix[i,c_idx])
            continue
        else:
            if i-1 in snap_idx:
                idx_min = np.argmin(dist_matrix[i-1,:])
                dist_matrix[i,idx_min] = np.inf
            if i+1 in snap_idx:
                idx_min = np.argmin(dist_matrix[i+1,:])
                dist_matrix[i,idx_min] = np.inf
        c_idx = np.argmin(dist_matrix[i,:])
        closest_idx.append(c_idx)
        closest_dist.append(dist_matrix[i,c_idx])
        
    closest_wp = waypoints[["x","y"]].values[closest_idx]
    closest_wp_dist = np.array(closest_dist)

    preds["x_wp"] = closest_wp[:,0]
    preds["y_wp"] = closest_wp[:,1]
    preds["dist"] = closest_wp_dist

    return preds

def s2g_by_idx(pred, idx):
    pred = pred.copy()
    pred.loc[idx,"snap"] = True

    # avoid snap two consecutive preds to the same waypoint
    x_wp = pred.loc[idx,"x_wp"]
    y_wp = pred.loc[idx,"y_wp"]
    wp_index = pred.query("x_wp==@x_wp & y_wp==@y_wp & snap").index
    if (idx-1 in wp_index) or (idx+1 in wp_index):
        return pred
    else:
        pred.loc[idx,"x"] = pred.loc[idx,"x_wp"]
        pred.loc[idx,"y"] = pred.loc[idx,"y_wp"]
        return pred

def correct_path(wp_preds, delta_preds, fixed_idx, lambda1=0.1, lambda2=0.5, lambda3=2):

    T_ref  = wp_preds['timestamp'].values
    xy_preds = wp_preds[['x', 'y']].values
    delta_preds = delta_preds.loc[:,["timestamp","dx","dy"]].values

    if T_ref[-1] > delta_preds[-1, 0]:
        delta_preds = [np.array([[0, 0, 0]]), delta_preds, np.array([[T_ref[-1], 0, 0]])]
    else:
        delta_preds = [np.array([[0, 0, 0]]), delta_preds]
    delta_preds = np.concatenate(delta_preds)
    
    T_rel = delta_preds[:, 0]
    delta_xy_preds = np.diff(scipy.interpolate.interp1d(T_rel, np.cumsum(delta_preds[:, 1:3], axis=0), axis=0)(T_ref), axis=0)

    N = xy_preds.shape[0]
    delta_t = np.diff(T_ref)
        
    alpha = lambda1 * np.ones(N)
    beta  = (1-lambda1) * np.ones(N-1) / (1 + lambda2 * delta_t * 1e-3)**lambda3
    
    if fixed_idx is not None:
        alpha[fixed_idx] = 1e16
    
    A = scipy.sparse.spdiags(alpha, [0], N, N)
    B = scipy.sparse.spdiags( beta, [0], N-1, N-1)
    D = scipy.sparse.spdiags(np.stack([-np.ones(N), np.ones(N)]), [0, 1], N-1, N)

    Q = A + (D.T @ B @ D)
    c = (A @ xy_preds) + (D.T @ (B @ delta_xy_preds))
    xy_corr = scipy.sparse.linalg.spsolve(Q, c)
    
    output = wp_preds.copy()
    output["x"] = xy_corr[:, 0]
    output["y"] = xy_corr[:, 1]

    return output

def iterative_correction(preds, near_waypoints, deltas, influence=1, max_threshold=5, cm_params=None):
    df_corr = preds.reset_index(drop=True).copy()
    df_corr["snap"] = False

    while True:
        if df_corr.snap.all():
            break
        df_corr = add_closest_waypoint(df_corr, near_waypoints)
        target_idx = df_corr.query("not snap").dist.idxmin()
        threshold = df_corr.query("not snap").dist.min()
        if threshold > max_threshold:
            break

        df_corr = s2g_by_idx(df_corr, target_idx)
        fixed_idx = df_corr.query("snap").index
        imu_corrected = correct_path(df_corr, deltas, fixed_idx, **cm_params)

        idx_to_correct = np.arange(max(target_idx-influence,0), min(target_idx+influence+1,len(preds)))
        idx_to_correct = set(idx_to_correct) - set(df_corr.query("snap").index)
        idx_to_correct = list(idx_to_correct)

        df_corr.loc[idx_to_correct,"x"] = imu_corrected.loc[idx_to_correct,"x"]
        df_corr.loc[idx_to_correct,"y"] = imu_corrected.loc[idx_to_correct,"y"]

    return df_corr

def leak_correction(pred, leak, near_waypoints, deltas, influence=2, max_threshold=2, cm_params=None):
    pred_corr = pred.reset_index(drop=True)
    pred_corr["snap"] = False
    pred_corr["moved"] = False
    target_indexes = list()

    for _,row in leak.iterrows():
        if (row.st_point_x>=0) & (row.st_point_y>=0):
            pred_corr.loc[0,"x"] = row.st_point_x
            pred_corr.loc[0,"y"] = row.st_point_y
            pred_corr.loc[0,"snap"] = True
            target_indexes.append(0)

        if (row.en_point_x>=0) & (row.en_point_y>=0):
            pred_corr.loc[len(pred_corr)-1,"x"] = row.en_point_x
            pred_corr.loc[len(pred_corr)-1,"y"] = row.en_point_y
            pred_corr.loc[len(pred_corr)-1,"snap"] = True
            target_indexes.append(len(pred_corr)-1)
    
    fixed_idx = pred_corr.query("snap").index.values
    imu_corrected = correct_path(pred_corr, deltas, fixed_idx, **cm_params)
    
    for target_idx in target_indexes:
        idx_to_correct = np.arange(max(target_idx-influence,0), min(target_idx+influence+1,len(pred_corr)))
        idx_to_correct = set(idx_to_correct) - set(pred_corr.query("snap").index)
        idx_to_correct = list(idx_to_correct)
        pred_corr.loc[idx_to_correct,"x"] = imu_corrected.loc[idx_to_correct,"x"]
        pred_corr.loc[idx_to_correct,"y"] = imu_corrected.loc[idx_to_correct,"y"]
        pred_corr.loc[idx_to_correct,"moved"] = True
        
    influence = 1
        
    while True:
        pred_corr = add_closest_waypoint(pred_corr, near_waypoints)
        candidates = pred_corr.query("moved and not snap")
        if len(candidates)==0:
            break
        target_idx = candidates.dist.idxmin()
        threshold  = candidates.dist.min()
        if threshold >= max_threshold:
            break
            
        pred_corr = s2g_by_idx(pred_corr, target_idx)
        was_snapped = (pred_corr.loc[target_idx,"x"]==pred_corr.loc[target_idx,"x_wp"]) & \
                      (pred_corr.loc[target_idx,"y"]==pred_corr.loc[target_idx,"y_wp"])
        
        fixed_idx = pred_corr.query("snap").index
        imu_corrected = correct_path(pred_corr, deltas, fixed_idx, **cm_params)
        
        idx_to_correct = np.arange(max(target_idx-influence,0), min(target_idx+influence+1,len(pred_corr)))
        idx_to_correct = set(idx_to_correct) - set(pred_corr.query("snap").index)
        idx_to_correct = list(idx_to_correct)
        
        pred_corr.loc[idx_to_correct,"x"] = imu_corrected.loc[idx_to_correct,"x"]
        pred_corr.loc[idx_to_correct,"y"] = imu_corrected.loc[idx_to_correct,"y"]
        pred_corr.loc[idx_to_correct,"moved"] = True
    
    return pred_corr

In [None]:
cm_params = {
    'lambda1': 0.11931087505938183,
    'lambda2': 0.9788240879660587, 
    'lambda3': 1.1486681729781796
}

MAKE_PLOTS = False

In [None]:
corrected_preds = list()

for path in submission.path.unique():
    
    pred = submission.query("path==@path")
    site = pred.site.values[0]
    floor = pred._floor.values[0]

    x_min = math.ceil(pred.x.min() - 20)
    x_max = math.floor(pred.x.max() + 20)
    y_min = math.ceil(pred.y.min() - 20)
    y_max = math.floor(pred.y.max() + 20)
    near_wps = (
        waypoints
        .query("site==@site & _floor==@floor")
        .query("@x_min <= x <= @x_max")
        .query("@y_min <= y <= @y_max")
    )
    path_deltas = deltas.query("site==@site & path==@path")
    leak = leaks.query("path == @path").reset_index(drop=True)
    
    if len(leak)>0:
        pred = leak_correction(pred, leak, near_wps, path_deltas, influence=2, max_threshold=2, cm_params=cm_params)
    pred_corr = iterative_correction(pred, near_wps, path_deltas, influence=1, max_threshold=5, cm_params=cm_params)
    corrected_preds.append(pred_corr)
    
    if MAKE_PLOTS:
        plot_subs(pred, pred_corr, waypoints)

In [None]:
submission = pd.concat(corrected_preds, ignore_index=True)

In [None]:
cols_to_drop = ["x_wp","y_wp","snap","moved","dist", "site","path","timestamp"]
submission.drop(cols_to_drop, axis=1, inplace=True)
submission.rename({"_floor":"floor","x_hat":"x", "y_hat":"y"}, axis=1, inplace=True)

In [None]:
sub_raw = pd.read_csv("../input/indoor-location-navigation/sample_submission.csv")
submission_ic = pd.merge(sub_raw.loc[:,["site_path_timestamp"]], submission,
                         how="inner", validate="one_to_one")
submission_ic.to_csv("submission_ic.csv", index=False)
submission_ic

***