In this notebook, I'm going to explore the coverage of the training data of the site-surveyor, then train a simple LightGDM model to predict and validate data by visualization.

### LibrariesðŸ“š

In [None]:
!pip install pytorch-tabnet

In [None]:
import os
import glob
import math
import json

from dataclasses import dataclass

import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import plotly.graph_objs as go
from PIL import Image

## Reading in the data

In [None]:
!cp -r /kaggle/input/github-dataset/* ./

In [None]:
# Import custom function from the repository
from io_f import read_data_file

# How 1 path looks
base = '../input/indoor-location-navigation'
path = f'{base}/train/5a0546857ecc773753327266/B1/5e15730aa280850006f3d005.txt'

# Read in 1 random example
sample_file = read_data_file(path)

# You can access the information for each variable:
print("~~~ Example ~~~")
print("acce: {}".format(sample_file.acce.shape), "\n" +
      "acacce_uncalice: {}".format(sample_file.acce_uncali.shape), "\n" +
      "ahrs: {}".format(sample_file.ahrs.shape), "\n" +
      "gyro: {}".format(sample_file.gyro.shape), "\n" +
      "gyro_uncali: {}".format(sample_file.gyro_uncali.shape), "\n" +
      "ibeacon: {}".format(sample_file.ibeacon.shape), "\n" +
      "magn: {}".format(sample_file.magn.shape), "\n" +
      "magn_uncali: {}".format(sample_file.magn_uncali.shape), "\n" +
      "waypoint: {}".format(sample_file.waypoint.shape), "\n" +
      "wifi: {}".format(sample_file.wifi.shape))

# All waypoint on site 0

Let's find out the data coverage on 1 of the buildings.

In [None]:
floorplans = sorted(glob.glob(f"{base}/train/*/*"))
print("Number of floor plans:", len(floorplans))
floorplans[:5]

In [None]:
paths = {fp:glob.glob(f"{fp}/*.txt") for fp in floorplans}

In [None]:
def visualize_trajectories(trajectories, floor_plan_filename, width_meter, height_meter, title=None, mode='lines + markers + text', show=False):
    fig = go.Figure()

    # add trajectory
    for trajectory in trajectories:
        size_list = [6] * trajectory.shape[0]
        size_list[0] = 10
        size_list[-1] = 10

        color_list = ['rgba(4, 174, 4, 0.5)'] * trajectory.shape[0]
        color_list[0] = 'rgba(12, 5, 235, 1)'
        color_list[-1] = 'rgba(235, 5, 5, 1)'

        position_count = {}
        text_list = []
        for i in range(trajectory.shape[0]):
            if str(trajectory[i]) in position_count:
                position_count[str(trajectory[i])] += 1
            else:
                position_count[str(trajectory[i])] = 0
            text_list.append('        ' * position_count[str(trajectory[i])] + f'{i}')
        text_list[0] = 'Start Point: 0'
        text_list[-1] = f'End Point: {trajectory.shape[0] - 1}'

        fig.add_trace(
            go.Scattergl(
                x=trajectory[:, 0],
                y=trajectory[:, 1],
                mode=mode,
                marker=dict(size=size_list, color=color_list),
                line=dict(shape='linear', color='rgb(100, 10, 100)', width=2, dash='dot'),
                text=text_list,
                textposition="top center",
                name='trajectory',
            ))

    # add floor plan
    floor_plan = Image.open(floor_plan_filename)
    fig.update_layout(images=[
        go.layout.Image(
            source=floor_plan,
            xref="x",
            yref="y",
            x=0,
            y=height_meter,
            sizex=width_meter,
            sizey=height_meter,
            sizing="contain",
            opacity=1,
            layer="below",
        )
    ])

    # configure
    fig.update_xaxes(autorange=False, range=[0, width_meter])
    fig.update_yaxes(autorange=False, range=[0, height_meter], scaleanchor="x", scaleratio=1)
    fig.update_layout(
        title=go.layout.Title(
            text=title or "No title.",
            xref="paper",
            x=0,
        ),
        autosize=True,
        width=900,
        height=200 + 900 * height_meter / width_meter,
        template="plotly_white",
    )

    if show:
        fig.show()

    return fig

In [None]:
import glob
SITES = list(map(lambda x: x.split('/')[-1].split('_')[0], sorted(glob.glob('../input/generate-wifi-features-5-times-faster/*_train.csv'))))

In [None]:
siteNo = 4
site = SITES[siteNo]
for floor in floorplans:
    if floor.split('/')[-2] != site:
        continue
    floorNo = floor.split('/')[-1]

    trajectories = list()
    for path_filename in glob.glob(f'{base}/train/{site}/{floorNo}/*.txt'):

        # Read in a sample
        example = read_data_file(path_filename)

        # ~~~~~~~~~

        # Returns timestamp, x, y values
        trajectory = example.waypoint
        # Removes timestamp (we only need the coordinates)
        trajectory = trajectory[:, 1:3]
        trajectories.append(trajectory)

    # Prepare floor_plan coresponding with our example
    floor_plan_filename = f'{base}/metadata/{site}/{floorNo}/floor_image.png'

    # Prepare width_meter & height_meter
    ### (taken from the .json file)
    json_plan_filename = f'{base}/metadata/{site}/{floorNo}/floor_info.json'
    with open(json_plan_filename) as json_file:
        json_data = json.load(json_file)

    width_meter = json_data["map_info"]["width"]
    height_meter = json_data["map_info"]["height"]

    # Title
    title = f"All Waypoints {floorNo}"

    # ~~~~~~~~~
    # Finally, let's plot
    visualize_trajectories(trajectories = trajectories,
                         floor_plan_filename = floor_plan_filename,
                         width_meter = width_meter,
                         height_meter = height_meter,
                         title = title,
                         show = True)

## Compare predicted waypoint with ground truth on site 0

First, I'm gonna train a simple lightGBM Regressor to predict the position and floor.

> ðŸ“Œ**Note**: Preprocessed data is from [this dataset](https://www.kaggle.com/devinanzelmo/indoor-navigation-and-location-wifi-features) by [Devin Anzelmo](https://www.kaggle.com/devinanzelmo).

In [None]:
def mean_position_error(x_pred, y_pred, f_pred, x_true, y_true, f_true, p=15):
    '''Custom function to evaluate Mean Position Error.
    x: x coordinate of the waypoint position; dtype list()
    y: y coordinate of the waypoint position; dtype list()
    f: exact floor or the building; dtype list()
    p: floor penalty, set to 15 (always)'''
    
    N = len(x_true)
    #1
    formula = np.sqrt( np.power(x_pred - x_true, 2) + np.power(y_pred - y_true, 2) )
    #2
    formula = formula + p * np.absolute(f_pred - f_true)
    #3
    formula = formula.sum() / N
    
    return formula

In [None]:
N_A = 48
siteNo = 13
SITE = SITES[siteNo]
# Import Libraries
from pytorch_tabnet.tab_model import TabNetRegressor ##Import Tabnet 

feature_dir = "/kaggle/input/generate-wifi-features-5-times-faster"
train_file = f"{feature_dir}/{SITE}_train.csv"

train_df = pd.read_csv(train_file)

loaded_clf = TabNetRegressor()
loaded_clf.load_model(f'../input/tabnet-model-container/tabnet_{N_A}/tabnet_model_test_{siteNo}.zip')

prediction_dict = loaded_clf.predict(train_df.drop(columns = ['x', 'y', 'f', 'path']).values) 

preds_x = prediction_dict[:, 0]
preds_y = prediction_dict[:, 1]
preds_f = prediction_dict[:, 2].round()

#Accuracy
print('Accuracy floor of site {}: {}'.format(siteNo, (preds_f.shape[0] - np.abs(preds_f - train_df['f']).sum())/preds_f.shape[0] * 100))

In [None]:
floor_error = preds_f - train_df['f']
floor_error[floor_error !=0].index #760

In [None]:
# Valid paths
paths_valid = train_df.iloc[:, -1].unique()
pathNo_to_draw = np.where(paths_valid == train_df.loc[703]['path'])[0][0]

## Path in training data

Then, I'm gonna try to trace back the path in the validation data set created earlier. Then draw the ground truth position using Plotly from this [GitHub repo](https://github.com/location-competition/indoor-location-competition-20).

In [None]:
# GitHub functions
path_to_draw = paths_valid[pathNo_to_draw]
from visualize_f import visualize_trajectory, visualize_heatmap

base = '../input/indoor-location-navigation'
site = SITE
pathNo = path_to_draw
floorNo = glob.glob(f'{base}/train/{site}/*/{path_to_draw}.txt')[0].split('/')[-2]

path_filename = f'{base}/train/{site}/{floorNo}/{path_to_draw}.txt'

# Read in a sample
example = read_data_file(path_filename)

# ~~~~~~~~~

# Returns timestamp, x, y values
trajectory = example.waypoint
# Removes timestamp (we only need the coordinates)
trajectory = trajectory[:, 1:3]

gt_trajectory = trajectory
gt_pos_df = train_df[['x', 'y', 'path']]
# gt_pos_df.columns = ['x', 'y', 'path']
gt_pos_df_to_draw = gt_pos_df[gt_pos_df['path'] == path_to_draw]
gt_pos_df_to_draw

In [None]:
train_df[train_df['path'] == path_to_draw].drop(columns = ['x', 'y', 'f', 'path'])

In [None]:
np.bincount(np.where(train_df[train_df['path'] == path_to_draw].drop(columns = ['x', 'y', 'f', 'path']).values != -999)[0])

In [None]:
#Check for wifi signal
np.where(train_df[train_df['path'] == path_to_draw].drop(columns = ['x', 'y', 'f', 'path']).values != -999)

In [None]:
#Check for wifi signal and beacon signal
with open(path_filename) as f:
    for row in csv.reader(f, delimiter="\t", doublequote=True):
        if row[1] == "TYPE_WAYPOINT":
            print(row)
with open(path_filename) as f:
    for row in csv.reader(f, delimiter="\t", doublequote=True):
        if row[1] == "TYPE_WIFI":
            print(row)
with open(path_filename) as f:
    for row in csv.reader(f, delimiter="\t", doublequote=True):
        if row[1] == "TYPE_BEACON":
            print(row)

In [None]:
# Prepare floor_plan coresponding with our example
floor_plan_filename = f'{base}/metadata/{site}/{floorNo}/floor_image.png'

# Prepare width_meter & height_meter
### (taken from the .json file)
json_plan_filename = f'{base}/metadata/{site}/{floorNo}/floor_info.json'
with open(json_plan_filename) as json_file:
    json_data = json.load(json_file)
    
width_meter = json_data["map_info"]["width"]
height_meter = json_data["map_info"]["height"]

# Title
title = f"Training Waypoint {floorNo}"

# ~~~~~~~~~

# Finally, let's plot
visualize_trajectory(trajectory = gt_pos_df_to_draw.iloc[:,:2].to_numpy(),
                     floor_plan_filename = floor_plan_filename,
                     width_meter = width_meter,
                     height_meter = height_meter,
                     title = title)

## Path in prediction

Finally, I'm gonna do the same for the predicted path.

In [None]:
path_to_draw = paths_valid[pathNo_to_draw]
pred_pos_df = pd.DataFrame(np.array([preds_x.T, preds_y, preds_f, train_df.iloc[:, -1]])).T
pred_pos_df.columns = ['x', 'y', 'f', 'path']
pred_pos_df = pred_pos_df[pred_pos_df['path'] == path_to_draw]
pred_pos_df

In [None]:
from visualize_f import visualize_trajectory, visualize_heatmap
from collections import Counter

base = '../input/indoor-location-navigation'
site = SITE
pathNo = path_to_draw
floor_int = Counter(pred_pos_df['f']).most_common(1)[0][0]

floor_map = {-2: ["B2"], -1: ["B1"], 0: ["F1", '1F'], 1: ['F2', '2F'], 2: ['F3', '3F'], 3: ['F4', '4F'], 4: ['F5', '5F'], 5: ['F6', '6F'], 6: ['F7' ,'7F'],
                                    7: ['F8', '8F'], 8: ['F9', '9F']}

floorNolist = floor_map[floor_int]
floorNo = "error"
for floorNo_ in floorNolist:
    if os.path.exists(os.path.join(base, 'metadata', site, floorNo_)):
        floorNo = floorNo_
        break

# Removes timestamp (we only need the coordinates)
trajectory = pred_pos_df.iloc[:,:2].to_numpy()

# Prepare floor_plan coresponding with our example
floor_plan_filename = f'{base}/metadata/{site}/{floorNo}/floor_image.png'

# Prepare width_meter & height_meter
### (taken from the .json file)
json_plan_filename = f'{base}/metadata/{site}/{floorNo}/floor_info.json'
with open(json_plan_filename) as json_file:
    json_data = json.load(json_file)
    
width_meter = json_data["map_info"]["width"]
height_meter = json_data["map_info"]["height"]

# Title
title = f"Prediction on Waypoint {floorNo}"

# ~~~~~~~~~

# Finally, let's plot
visualize_trajectory(trajectory = trajectory,
                     floor_plan_filename = floor_plan_filename,
                     width_meter = width_meter,
                     height_meter = height_meter,
                     title = title)