In [None]:
import numpy as np
import pandas as pd
from myutil import load_pickle
from scipy.signal import savgol_filter
import json
from PIL import Image 
import plotly.graph_objs as go
from pathlib import Path

import multiprocessing
import scipy.interpolate
import scipy.sparse
from tqdm import tqdm

from indoor_location_github_script import read_data_file
import indoor_location_github_script as compute_f

In [None]:
INPUT_PATH = '../input/indoor-location-navigation'

# functions

In [None]:
def floor_reverse(floor):
    floor = int(floor)
    if floor < 0:
        return f'B{abs(floor)}'
    else:
         return f'F{floor + 1}'
        


def visualize_trajectory(trajectory, floor_plan_filename, width_meter, height_meter, title=None, mode='lines + markers + text', show=False):
    fig = go.Figure()

    # add trajectory
    size_list = [6] * trajectory.shape[0]
    size_list[0] = 10
    size_list[-1] = 10

    color_list = ['rgba(4, 174, 4, 0.5)'] * trajectory.shape[0]
    color_list[0] = 'rgba(12, 5, 235, 1)'
    color_list[-1] = 'rgba(235, 5, 5, 1)'

    position_count = {}
    text_list = []
    for i in range(trajectory.shape[0]):
        if str(trajectory[i]) in position_count:
            position_count[str(trajectory[i])] += 1
        else:
            position_count[str(trajectory[i])] = 0
        text_list.append('        ' * position_count[str(trajectory[i])] + f'{i}')
    text_list[0] = 'Start Point: 0'
    text_list[-1] = f'End Point: {trajectory.shape[0] - 1}'

    fig.add_trace(
        go.Scattergl(
            x=trajectory[:, 0],
            y=trajectory[:, 1],
            mode=mode,
            marker=dict(size=size_list, color=color_list),
            line=dict(shape='linear', color='rgb(100, 10, 100)', width=2, dash='dot'),
#             text=text_list,
            textposition="top center",
            name='trajectory',
        ))

    # add floor plan
    floor_plan = Image.open(floor_plan_filename)
    fig.update_layout(images=[
        go.layout.Image(
            source=floor_plan,
            xref="x",
            yref="y",
            x=0,
            y=height_meter,
            sizex=width_meter,
            sizey=height_meter,
            sizing="contain",
            opacity=1,
            layer="below",
        )
    ])

    # configure
    fig.update_xaxes(autorange=False, range=[0, width_meter])
    fig.update_yaxes(autorange=False, range=[0, height_meter], scaleanchor="x", scaleratio=1)
    fig.update_layout(
        title=go.layout.Title(
            text=title or "No title.",
            xref="paper",
            x=0,
        ),
        autosize=True,
        width=900,
        height=200 + 900 * height_meter / width_meter,
        template="plotly_white",
    )

    if show:
        fig.show()

    return fig



def compute_rel_positions(acce_datas, ahrs_datas):
    step_timestamps, step_indexs, step_acce_max_mins = compute_f.compute_steps(acce_datas)
    headings = compute_f.compute_headings(ahrs_datas)
    stride_lengths = compute_f.compute_stride_length(step_acce_max_mins)
    step_headings = compute_f.compute_step_heading(step_timestamps, headings)
    rel_positions = compute_f.compute_rel_positions(stride_lengths, step_headings)
    return rel_positions



def correct_path(args):
    win = 31
    pol = 1
    path, path_df = args
    
    T_ref  = path_df['timestamp'].values
    
    trajectory = path_df.iloc[:, 1:3].values
    x = savgol_filter(trajectory[:, 0], win, pol)
    y = savgol_filter(trajectory[:, 1], win, pol)
    xy_hat = np.hstack((x.reshape(-1, 1), y.reshape(-1, 1)))
#     xy_hat = path_df[['x', 'y']].values
    
    example = read_data_file(f'{INPUT_PATH}/test/{path}.txt')
    rel_positions = compute_rel_positions(example.acce, example.ahrs)
    if T_ref[-1] > rel_positions[-1, 0]:
        rel_positions = [np.array([[0, 0, 0]]), rel_positions, np.array([[T_ref[-1], 0, 0]])]
    else:
        rel_positions = [np.array([[0, 0, 0]]), rel_positions]
    rel_positions = np.concatenate(rel_positions)
    
    T_rel = rel_positions[:, 0]
    delta_xy_hat = np.diff(scipy.interpolate.interp1d(T_rel, np.cumsum(rel_positions[:, 1:3], axis=0), axis=0)(T_ref), axis=0)

    N = xy_hat.shape[0]
    delta_t = np.diff(T_ref)
    alpha = (8.1)**(-2) * np.ones(N)
    beta  = (0.2 + 0.2 * 1e-3 * delta_t)**(-2)
    A = scipy.sparse.spdiags(alpha, [0], N, N)
    B = scipy.sparse.spdiags( beta, [0], N-1, N-1)
    D = scipy.sparse.spdiags(np.stack([-np.ones(N), np.ones(N)]), [0, 1], N-1, N)

    Q = A + (D.T @ B @ D)
    c = (A @ xy_hat) + (D.T @ (B @ delta_xy_hat))
    xy_star = scipy.sparse.linalg.spsolve(Q, c)

    return pd.DataFrame({
        'floor' : path_df['floor'],
        'x' : xy_star[:, 0],
        'y' : xy_star[:, 1],
        'path' : path_df['path'],
        'timestamp' : path_df['timestamp']
    })

# data

In [None]:
ssub = pd.read_csv('../input/indoor-location-navigation/sample_submission.csv')
# preds = pd.read_csv('../input/indoor-location-rnn-v2/predictions1617688222_embed128_cycle16_batch885.csv')
meta_data = load_pickle('../input/indoor-location-rnn-test-data-v2/test-meta-data.pickle')

In [None]:
files = list(Path('../input/indoor-location-rnn-v2').glob('*.csv'))
preds = 0
for file in files:
    pred = pd.read_csv(file)
    preds += pred
preds /= len(files)

In [None]:
preds['path'] = ''
preds['timestamp'] = 0

for trace in meta_data['pos_range']:
    start_idx, end_idx = meta_data['pos_range'][trace]
    preds.iloc[start_idx: end_idx, -2] = trace
    
tss = [int(meta_data['timestamps'][idx]) for idx in meta_data['timestamps']]
preds['timestamp'] = tss
preds.columns = ['floor', 'x', 'y', 'path', 'timestamp']
preds

In [None]:
site_path_timestamp = ssub['site_path_timestamp'].apply(lambda x: pd.Series(x.split('_')))
ssub['site'] = site_path_timestamp[0]
ssub['path'] = site_path_timestamp[1]
ssub['timestamp'] = site_path_timestamp[2]
ssub['timestamp'] = ssub['timestamp'].astype(int)

In [None]:
original_floors = preds.iloc[:, 0].copy()

# use mode for floors

In [None]:
print('fix')
for path in preds['path'].unique():
    rel_preds = preds[preds['path'] == path]
    preds.iloc[rel_preds.index, 0] = np.round(preds.iloc[rel_preds.index, 0].mean())
    
for path in preds['path'].unique():
    rel_preds = preds[preds['path'] == path]
    if rel_preds.iloc[:, 0].unique().shape[0] != 1:
        print('oof')

# get confidene of predictions

In [None]:
confidence = (preds.iloc[:, 0] - original_floors).abs() # lower is better

# visualize predictions

In [None]:
count = 0
for path in preds['path'].unique():
    cut = 28
    if path == 'b406c5c925f3b64d8972b2c0':
        rel_preds = preds[preds['path'] == path]
        path_file = f'../input/indoor-location-navigation/test/{path}.txt'
        with open(path_file, 'r') as f:
            for line in f:
                if 'SiteID' in line:
                    building = line[9:33]
                    break
        floor = floor_reverse(rel_preds.iloc[0, 0])
        try:
            with open(f'../input/indoor-location-navigation/metadata/{building}/{floor}/floor_info.json', 'rb') as f:
                floor_info = json.load(f)
        except:
            floor = floor[::-1]
            with open(f'../input/indoor-location-navigation/metadata/{building}/{floor}/floor_info.json', 'rb') as f:
                floor_info = json.load(f)
        height = floor_info['map_info']['height']
        width = floor_info['map_info']['width']
        floor_img = f'../input/indoor-location-navigation/metadata/{building}/{floor}/floor_image.png'
        trajectory = rel_preds.iloc[:, 1:3].values
        visualize_trajectory(trajectory, floor_img, width, height, show=True)
#         win = 5
#         pol = 1
#         x = savgol_filter(trajectory[:, 0], win, pol)
#         y = savgol_filter(trajectory[:, 1], win, pol)
#         filtered = np.hstack((x.reshape(-1, 1), y.reshape(-1, 1)))
#         visualize_trajectory(filtered, floor_img, width, height, show=True)
        
        processes = multiprocessing.cpu_count()
        with multiprocessing.Pool(processes=processes) as pool:
            dfs = pool.imap_unordered(correct_path, rel_preds.groupby('path'))
            dfs = tqdm(dfs)
            dfs = list(dfs)
        rel_preds = pd.concat(dfs).sort_index()
        trajectory = rel_preds.iloc[:, 1:3].values
        visualize_trajectory(trajectory, floor_img, width, height, show=True)
        
        rel_ssub = ssub[ssub['path'] == path]
        trajectory = rel_preds.iloc[:, 1:3].values
        ssub_points = []
        for row in rel_ssub.itertuples():
            try:
                pred = rel_preds[rel_preds['timestamp'] == row.timestamp].iloc[0, 1:3]
            except:
                print(f'missing ts at ssub index {row.Index}')
                pred = rel_preds.iloc[(rel_preds['timestamp'] - row.timestamp).abs().argmin(), 1:3]
                print(pred)
            ssub_points.append(pred)
        visualize_trajectory(np.array(ssub_points), floor_img, width, height, show=True)
    count += 1

# smooth predictions

In [None]:
processes = multiprocessing.cpu_count()
with multiprocessing.Pool(processes=processes) as pool:
    dfs = pool.imap_unordered(correct_path, preds.groupby('path'))
    dfs = tqdm(dfs)
    dfs = list(dfs)
processed_preds = pd.concat(dfs).sort_index()

In [None]:
# win = 35
# pol = 1
for path in ssub['path'].unique():
    rel_ssub = ssub[ssub['path'] == path]
    rel_preds = processed_preds[processed_preds['path'] == path]
    trajectory = rel_preds.iloc[:, :3].values
#     x = savgol_filter(trajectory[:, 0], win, pol)
#     y = savgol_filter(trajectory[:, 1], win, pol)
#     filtered = np.hstack((rel_preds.iloc[:, 0:1], x.reshape(-1, 1), y.reshape(-1, 1)))
    for row in rel_ssub.itertuples():
        pred = trajectory[rel_preds['timestamp'] == row.timestamp]
        if len(pred) == 0:
            print(f'missing ts at ssub index {row.Index}')
            pred = trajectory[(rel_preds['timestamp'] - row.timestamp).abs().argmin()]
            ssub.loc[row.Index, 'floor':'y'] = pred
        else:
            ssub.loc[row.Index, 'floor':'y'] = pred[0]
        
ssub.iloc[:, :4].to_csv('submission.csv', index=False)

In [None]:
ssub[ssub['y'] == ssub['x']]

# closest location

In [None]:
%%time
for path in ssub['path'].unique():
    rel_ssub = ssub[ssub['path'] == path]
    rel_preds = preds[preds['path'] == path]
    for row in rel_ssub.itertuples():
        try:
            pred = rel_preds[rel_preds['timestamp'] == row.timestamp].iloc[0, :3]
        except IndexError:
            print(f'missing ts at ssub index {row.Index}')
            pred = rel_preds.iloc[(rel_preds['timestamp'] - row.timestamp).abs().argmin()][:3]
        ssub.loc[row.Index, 'floor':'y'] = pred.tolist()
        
ssub.iloc[:, :4].to_csv('submission.csv', index=False)

# ------------------------------------------------