In [None]:
import sys
import os
import pickle
import multiprocessing
import numpy as np
import pandas as pd
import scipy.sparse
import scipy.sparse.linalg
from tqdm import tqdm

In [None]:
SUPPLEMENTALS_DIR = '/kaggle/input/indoor-supplementals-for-postprocessing'

with open(f'{SUPPLEMENTALS_DIR}/delta_using_device_id_v2.pkl', 'rb') as f:
    DELTA = pickle.load(f)

CONFIDENCE = pd.read_csv(f'{SUPPLEMENTALS_DIR}/wifi_confidence.csv').sort_values('site_path_timestamp').reset_index(drop=True)
WAYPOINTS = pd.read_csv(f'{SUPPLEMENTALS_DIR}/waypoint.csv')
EXTRA_GRID_POINTS = pd.read_csv('/kaggle/input/indoor-extra-grid-points/extra_grid_points_v2.csv')
EXTRA_HALLWAY_POINTS = pd.read_csv('/kaggle/input/indoor-extra-hallway-points/extra_hallway_points.csv')
XY = ['x', 'y']

In [None]:
from dataclasses import dataclass

@dataclass
class Waypoint:
    arr: np.ndarray
    snapped: bool = False

def find_nearest_waypoints(xy_hat, waypoints):
    N = xy_hat.shape[0]
    xy_nn = np.zeros((N, 2))
    for i in range(N):
        r = np.sum((waypoints - xy_hat[i, :])**2, axis=1)
        j = np.argmin(r)
        xy_nn[i, :] = waypoints[j, :]
    return xy_nn

def find_k_nearest_waypoints(xy_hat, waypoints, k):
    N = xy_hat.shape[0]
    xy_nn = np.zeros((N, k, 2))
    for i in range(N):
        r = np.sum((waypoints - xy_hat[i, :])**2, axis=1)
        j = np.argpartition(r, k)[0:k]
        xy_nn[i] = waypoints[j]
    return xy_nn

# def find_k_nearest_waypoints(xy_hat, waypoints, waypoints2, k):
#     N = xy_hat.shape[0]
#     ret = []
#     for i in range(N):
#         r = np.sum((waypoints - xy_hat[i, :])**2, axis=1)
#         indices = np.argpartition(r, k)[0:k]
#         found = []
#         for idx in indices:
#             found.append(waypoints2[idx])
#         ret.append(found)
#     return ret

In [None]:
def correct_path(args, retry=False):
    path, path_df = args
    site  = path_df['site'].iloc[0]
    floor = path_df['floor'].iloc[0]
    
    #========================================
    # waypoint
    #========================================
    waypoints = WAYPOINTS[(WAYPOINTS['site']  == site) &
                          (WAYPOINTS['floor'] == floor)]
    waypoints = waypoints[['x', 'y']].values
    waypoints2 = [Waypoint(arr=wp) for wp in waypoints]
    
    extra_grid_points = EXTRA_GRID_POINTS[(EXTRA_GRID_POINTS['site']  == site) &
                                          (EXTRA_GRID_POINTS['floor'] == floor)]
    extra_grid_points = extra_grid_points[['x', 'y']].values

    waypoints_all = np.concatenate([waypoints, extra_grid_points], axis=0)
    
    #========================================
    # 機械学習の予測位置
    #========================================
    ref_positions = path_df[['timestamp', 'x', 'y']].values
    T_ref   = ref_positions[:, 0]
    xy_hat  = ref_positions[:, 1:3]
    delta_t = 1e-3 * np.diff(T_ref)

    #========================================
    # 加速度・方向センサによる相対移動距離
    #========================================
    delta_xy_hat, delta_xy_std_hat, _ = DELTA[path]

    #========================================
    # 機械学習とセンサデータを統合
    #========================================
    N = xy_hat.shape[0]
    if retry == True:
        xy_std_hat = 2.0 * path_df['std'].values
    else:
        xy_std_hat = 0.6 * path_df['std'].values
    alpha = (xy_std_hat)**(-2)
    # beta  = (0.8722777 + (0.0128752 * delta_t) + (1.0412663 * delta_xy_std_hat))**(-2)
    beta  = (0.8676949 + (0.00874235 * delta_t) + (1.0868441 * delta_xy_std_hat))**(-2)
    A = scipy.sparse.spdiags(alpha, [0], N, N)
    B = scipy.sparse.spdiags( beta, [0], N-1, N-1)
    D = scipy.sparse.spdiags(np.stack([-np.ones(N), np.ones(N)]), [0, 1], N-1, N)

    Q = A + D.T @ B @ D
    c = (A @ xy_hat) + (D.T @ (B @ delta_xy_hat))
    xy_star = scipy.sparse.linalg.spsolve(Q, c)

    #========================================
    # 地図情報を統合
    #========================================
    gamma = (2.5**(-2) * np.ones(N))
    C = scipy.sparse.spdiags(gamma, [0], N, N)

    for r in np.arange(0.1, 0.8, 0.05):
        xy_nn = find_nearest_waypoints(xy_star, waypoints_all)
        Q = ((1 - r) * A) + (r * C) + D.T @ B @ D
        c = ((1 - r) * (A @ xy_hat)) + (r * (C @ xy_nn)) + (D.T @ (B @ delta_xy_hat))
        xy_star = scipy.sparse.linalg.spsolve(Q, c)
    r = 0.8
    for i in range(10):
        xy_nn = find_nearest_waypoints(xy_star, waypoints_all)
        Q = ((1 - r) * A) + (r * C) + D.T @ B @ D
        c = ((1 - r) * (A @ xy_hat)) + (r * (C @ xy_nn)) + (D.T @ (B @ delta_xy_hat))
        xy_star = scipy.sparse.linalg.spsolve(Q, c)
    for i in range(10):
        xy_nn = find_nearest_waypoints(xy_star, waypoints_all)
        Q = C + D.T @ B @ D
        c = (C @ xy_nn) + (D.T @ (B @ delta_xy_hat))
        xy_star = scipy.sparse.linalg.spsolve(Q, c)

    xy_nn = find_k_nearest_waypoints(xy_star, waypoints, 2)
    calc_r = lambda x1, x2 : np.sqrt(np.sum((x1 - x2)**2, axis=-1))
    for i in range(N):
        r01 = calc_r(xy_star[i] , xy_nn[i][0])
        r02 = calc_r(xy_star[i] , xy_nn[i][1])
        r12 = calc_r(xy_nn[i][0], xy_nn[i][1])
        cond0 = r01 < 1.5
        cond1 = r12 < r02
        cond2 = r01 < 5.0
        if cond0 or (cond1 and cond2):
            xy_star[i] =xy_nn[i][0]

    out_df = pd.DataFrame({
        'site_path_timestamp' : path_df['site_path_timestamp'],
        'floor' : path_df['floor'],
        'x' : xy_star[:, 0],
        'y' : xy_star[:, 1],
    })
    return out_df

In [None]:
def detect_error_by_hallway(path, out_df):
    site  = out_df['site_path_timestamp'].iloc[0].split('_')[0]
    floor = out_df['floor'].iloc[0]

    waypoints = WAYPOINTS[(WAYPOINTS['site']  == site) &
                          (WAYPOINTS['floor'] == floor)]
    waypoints = waypoints[['x', 'y']].values
    
    extra_hallway_points = EXTRA_HALLWAY_POINTS[(EXTRA_HALLWAY_POINTS['site']  == site) &
                                                (EXTRA_HALLWAY_POINTS['floor'] == floor)]
    extra_hallway_points = extra_hallway_points[['x', 'y']].values

    hallway_points_all = np.concatenate([waypoints, extra_hallway_points], axis=0)

    def calc_r(xy_in):
        xy_nn = find_nearest_waypoints(xy_in, hallway_points_all)
        r = np.sqrt(np.sum((xy_in - xy_nn)**2, axis=1))
        return r
    
    xy_hat   = out_df[['x', 'y']].values
    xy_start = xy_hat[0:-1]
    xy_end   = xy_hat[1:]
    xy_mid   = 0.5 * (xy_start + xy_end)

    r_start = calc_r(xy_start)
    r_end   = calc_r(xy_end)
    r_mid   = calc_r(xy_mid)
    retval  = (((r_start > 3.0) | (r_start > 3.0)) & (r_mid > 5.0)).any()
    return retval

In [None]:
def correct_path_with_retry(args):
    path, _ = args
    out_df = correct_path(args, retry=False)
    if detect_error_by_hallway(path, out_df):
        print('\ndetect error @', path)
        out_df = correct_path(args, retry=True)
    return out_df

In [None]:
def do_blending(sub1, sub2, rate):
    out = sub1.copy()
    out[XY] = rate * sub1[XY] + (1 - rate) * sub2[XY]
    return out

def read_sub(filename):
    sub = pd.read_csv(filename).sort_values('site_path_timestamp').reset_index(drop=True)
    tmp = sub['site_path_timestamp'].apply(lambda x : pd.Series(x.split('_')))
    sub['site'] = tmp[0]
    sub['path'] = tmp[1]
    sub['timestamp'] = tmp[2].astype(float)
    sub['std'] = CONFIDENCE['std']
    return sub

In [None]:
def main():
    sub_0 = read_sub('/kaggle/input/indoor-ensemble/no_hand_labeling_pseudo_labeling_from_lb_2.693.csv')
    sub_1 = read_sub('/kaggle/input/indoor-submissions/submission_raw_wifi_pseudo_labeling_nohand_from2586.csv')
    floor_df  = read_sub('/kaggle/input/indoor-submissions/submission_raw_wifi_pseudo_labeling_nohand_from3112.csv')
    sub_0['floor'] = floor_df['floor']
    assert( (sub_0['floor'] == sub_1['floor']).all() )
    
    sub_01 = do_blending(sub_0, sub_1, 0.5)

    processes = multiprocessing.cpu_count()
    with multiprocessing.Pool(processes=processes) as pool:
        dfs = pool.imap_unordered(correct_path_with_retry, sub_01.groupby('path'))
        dfs = tqdm(dfs)
        dfs = list(dfs)
    sub = pd.concat(dfs)
    sub = sub.sort_values('site_path_timestamp')
    sub.to_csv('11.no_hand_labeling_pseudo_labeling_from_lb_2.586_with_retry_submission.csv', index=False)
    return

In [None]:
main()