In [None]:
import glob 
import os
import pandas as pd
import json
import plotly.graph_objects as go
import numpy as np
from PIL import Image
import re
import plotly
import matplotlib.pyplot as plt
from collections import Counter
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
import re

from helper import FLOOR_MAP, RELEVANT_BUILDINGS, is_interactive, GEODATA_OFFSETS, LINKS, get_folders
BUILDING_IDS = get_folders('/kaggle/input/indoor-location-navigation/metadata/*')

In [None]:
class Floor:
    def __init__(self, building_id, name, level=None):
        self.building_id = building_id
        self.level = FLOOR_MAP[name] if level is None else level 
        self.name = name
        
        with open(f"../input/indoor-location-navigation/metadata/{self.building_id}/{self.name}/floor_info.json") as f:
            self.info = json.load(f)["map_info"]
            
        self.shops = self._scale_to_map(self._extract_shops_on_floor())
        self.trajectories = self._get_trajectories()  # Access: [trajectory_id][trajectory_type] contains the trajectory log

    def _get_trajectories(self):
        # import pdb; pdb.set_trace()
        return {
            os.path.basename(trajectory_path).split(".")[0]: 
                {info_type: pd.read_csv(f"{trajectory_path}/{info_type}.csv") for info_type in ("waypoint",)}
            for trajectory_path in glob.glob(f"/kaggle/input/waypointwificsv/train/{self.building_id}/{self.name}/**")
        }
    
    def _scale_to_map(self, shops):
        min_x, min_y = min([x_poly for shop in shops for x_poly in shop["x_poly"]]), min([y_poly for shop in shops for y_poly in shop["y_poly"]])
        max_x, max_y = max([x_poly for shop in shops for x_poly in shop["x_poly"]]), max([y_poly for shop in shops for y_poly in shop["y_poly"]])
        x_offset, y_offset, x_factor, y_factor = 0, 0, 1, 1
        if (self.building_id, self.name) in GEODATA_OFFSETS:
            x_offset, y_offset = GEODATA_OFFSETS[(self.building_id, self.name)][0], GEODATA_OFFSETS[(self.building_id, self.name)][1]
            x_factor, y_factor = GEODATA_OFFSETS[(self.building_id, self.name)][2], GEODATA_OFFSETS[(self.building_id, self.name)][3]
        for shop in shops:
            shop["x_poly"] = ((shop["x_poly"] - min_x) / (max_x-min_x) * self.info["width"]) * x_factor + x_offset
            shop["y_poly"] = ((shop["y_poly"] - min_y) / (max_y-min_y) * self.info["height"]) * y_factor + y_offset
            shop["x_center"] =  (max(shop["x_poly"]) + min(shop["x_poly"])) / 2
            shop["y_center"] =  (max(shop["y_poly"]) + min(shop["y_poly"])) / 2
            
        return shops
        
    def _extract_shops_on_floor(self):
        with open(f"../input/indoor-location-navigation/metadata/{self.building_id}/{self.name}/geojson_map.json") as f:
            floor_features = json.load(f)
        shops = []
        for feature_n, feature in enumerate(floor_features["features"]): 
            if feature["geometry"]["type"] == "MultiPolygon":
                continue  # this is the overall shape
            if ("type" in feature and feature["type"] != "Feature") or feature["geometry"]["type"] != "Polygon":
                print("this is not a Polygon, type: ", feature["geometry"]["type"] )
                continue
            if "properties" not in feature:
                continue
            shops.append({
                "x_poly": np.array([xy_poly[0] for xy_poly in feature["geometry"]["coordinates"][0]]),
                "y_poly": np.array([xy_poly[1] for xy_poly in feature["geometry"]["coordinates"][0]]),
                "z_poly": np.array([self.level]*len(feature["geometry"]["coordinates"][0])),
                "name": feature["properties"]["name"] if "name" in feature["properties"] else None,
            })
        return shops

    def visualize(self, store=False):
        im = Image.open(f"../input/indoor-location-navigation/metadata/{self.building_id}/{self.name}/floor_image.png")

        fig = go.Figure()
        for shop in self.shops:
            fig.add_trace(go.Scatter(x=shop["x_poly"], y=shop["y_poly"],mode='lines', line={"color": "black", "width": 2}, name=shop["name"], showlegend=False))

        for trajectory_id, trajectory in self.trajectories.items():
            waypoint_trajectory = trajectory["waypoint"]
            fig.add_trace(go.Scatter(x=waypoint_trajectory.x, y=waypoint_trajectory.y, name=trajectory_id))

        fig.add_layout_image(dict(source=im, xref="x", yref="y", x=0, y=self.info["height"], sizex=self.info["width"], sizey=self.info["height"], sizing="stretch", opacity=0.9, layer="below"))
        fig.update_layout(
            template="plotly_white",
            sliders=[
                dict(active=0, currentvalue= {"prefix": "+x: "},
            )]
        )
        if store:
            folder = f"/kaggle/working/train/{self.building_id}/{self.name}/"
            os.makedirs(folder, exist_ok=True)
            try:
                fig.write_image(folder + "trajectory.png")
            except: 
                import sys
                !conda install --yes --prefix {sys.prefix} -c plotly plotly-orca
                fig.write_image(folder + "trajectory.png")
        else:
            fig.show()

In [None]:
f0 = Floor(building_id=RELEVANT_BUILDINGS[10], name="F1")
f0.visualize(store=False)

In [None]:
f0 = Floor(building_id=RELEVANT_BUILDINGS[0], name="B1")
f0.visualize(store=False)

In [None]:
f1 = Floor(building_id=RELEVANT_BUILDINGS[0], name="F1")
f1.visualize(store=False)

In [None]:
f2 = Floor(building_id=RELEVANT_BUILDINGS[0], name="F2")
f2.visualize(store=False)

In [None]:
f4 = Floor(building_id=RELEVANT_BUILDINGS[0], name="F4")
f4.visualize(store=False)

In [None]:
class Building:
    def __init__(self, building_id):
        print(f"creating: {building_id}", end=" ")
        self.plotly_data = []
        self.building_id = building_id
        self.positions = pd.read_csv(f"../input/wifipositions/{self.building_id}-p.csv")
        self.wifi = pd.read_csv(f"../input/wifipositions/{self.building_id}.csv")
        self.wifi.drop(columns=self.wifi.columns[0], inplace=True)
        self.wifi["index"] = [(v["f"], v["x"], v["y"]) for v in self.positions.T.to_dict().values()]
        self.wifi.set_index("index", inplace=True)
        
        self.floors = {}
        for filepath in glob.glob(f"../input/indoor-location-navigation/metadata/{building_id}/**"):
            floor_name = os.path.basename(filepath)
            try:
                level = FLOOR_MAP[floor_name]
            except KeyError:
                print(f"Floor {floor_name} is not standard identifier")
                continue                
            self.floors[floor_name] = Floor(building_id=building_id, name=floor_name, level=level)

    def display_shops(self):
        plotly.offline.init_notebook_mode()
        self.plotly_data.append(go.Scatter3d(x=[None], y=[None], z=[None], legendgroup="shops", name="shops",
                                line={"color": "black", "width": 1}, marker={"color": 'black'}, visible=True, showlegend=True))
        self.plotly_data.extend([
            go.Scatter3d(x=shop['x_poly'], y=shop['y_poly'], z=shop['z_poly'], line={"color": "black", "width": 1}, mode="lines", showlegend=False, legendgroup="shops", name=shop["name"])
            for floor in self.floors.values() for shop in floor.shops])    
        
    def display_waypoints(self):
        plotly.offline.init_notebook_mode()
        self.plotly_data.append(go.Scatter3d(x=[None], y=[None], z=[None], legendgroup="trajectories", name="trajectories",
                                line={"color": "gray", "width": 1}, marker={"color": 'gray'}, visible=True, showlegend=True))
        for floor in self.floors.values():
            for trajectory_id, trajectory in floor.trajectories.items():
                waypoint_trajectory = trajectory["waypoint"]
                self.plotly_data.append(go.Scatter3d(x=waypoint_trajectory.x, y=waypoint_trajectory.y, z=[floor.level]*len(waypoint_trajectory.x), 
                                                     line={"color": "gray", "width": 2}, mode="lines", name=trajectory_id, legendgroup="trajectories", showlegend=False))
            
    def visualize_bssids(self, no_bssids=5):
        cmap = plt.get_cmap('Spectral')
        norm = plt.Normalize(-90, -20)

        def get_color(v):
            c = cmap(norm(v))
            return f"rgb({int(c[0]*255)}, {int(c[1]*255)}, {int(c[2]*255)})"

        for nn, bssid in enumerate(self.wifi.columns[1:6]):
            occurences = list((-90 < self.wifi[bssid]) & (self.wifi[bssid] < -20))
            self.plotly_data.append(
                go.Scatter3d(x=self.positions["x"][occurences], y=self.positions["y"][occurences], z=[FLOOR_MAP[f] for f in self.positions["f"][occurences]],
                             mode="markers", showlegend=True, name=bssid[:7], visible=True if nn == 0 else "legendonly",
                             text=self.wifi[bssid][occurences],
                             marker=dict(color=[get_color(c) for nn, c in enumerate(self.wifi[bssid]) if occurences[nn]])))
            
    def show_position(self, this_positions, show="all"):    
        cmap = plt.get_cmap('Spectral')
        norm = plt.Normalize(0, 1)

        def get_color(v):
            c = cmap(norm(v))
            return f"rgb({int(c[0]*255)}, {int(c[1]*255)}, {int(c[2]*255)})"

        for nn, t in enumerate(sorted(this_positions.keys())):
            if show == "ends" and nn not in [0, len(this_positions)-1]:
                continue
            if len(this_positions[t]) == 0: continue
            pp = list(this_positions[t].keys())

            self.plotly_data.append(
                go.Scatter3d(x=[p[1] for p in pp], y=[p[2] for p in pp], z=[FLOOR_MAP[p[0]] for p in pp],
                             mode="markers", showlegend=True, name=f"{t}", visible=True if nn == 0 else "legendonly",
                             text=[c for c in this_positions[t].values()],
                             marker=dict(color=[get_color(c) for c in this_positions[t].values()],
                                        )))
    
    def show_trajectory(self, trajectory_dict, name="test_trajectory"):
        trajectory_list = sorted(trajectory_dict.items())
        self.plotly_data.append(go.Scatter3d(x=[t[1][1] for t in trajectory_list], y=[t[1][2] for t in trajectory_list], z=[FLOOR_MAP[t[1][0]] for t in trajectory_list],
                                             text=[(mm, t[0]) for mm, t in enumerate(trajectory_list)], name=name,
                                             line={"color": "blue", "width": 2}, mode="lines"))
    
    def show(self):
        clean_axis = dict(autorange=True, showgrid=False, zeroline=False, ticks='', showticklabels=False, showline=False)
        plot_figure = go.Figure(data=self.plotly_data)
        plot_figure.update_layout(scene=dict(xaxis=clean_axis, yaxis=clean_axis, zaxis=clean_axis))
        plotly.offline.iplot(plot_figure)
        

In [None]:
all_buildings = {building_id: Building(building_id) for building_id in RELEVANT_BUILDINGS 
                 if not is_interactive() or building_id == "5a0546857ecc773753327266"}

In [None]:
b1 = all_buildings["5a0546857ecc773753327266"]
b1.plotly_data = []
b1.display_shops()
b1.display_waypoints()
b1.visualize_bssids()
b1.show()

In [None]:
def get_building_id(path): 
    with open(path) as f:
        next(f) # first line can be skipped
        return re.findall(r"SiteID:(\w*)", next(f))[0]
    
def extract_trajectory(trajectory_path):
    trajectory_id = os.path.basename(trajectory_path)[:-4]
    wifi_logs = []
    with open(trajectory_path) as f:
        line = f.readline()
        while line:
            wifi_match = re.findall(r"(\d{13})\tTYPE_WIFI\t(\w{40})\t(\w{40})\t(-\d+)\t(\d+)\t\d+", line)
            if wifi_match:
                wifi_logs.append((trajectory_id, 
                                  int(wifi_match[0][0]), 
                                  wifi_match[0][2], 
                                  int(wifi_match[0][3])))
            line = f.readline()
    df_wifi = pd.DataFrame(wifi_logs, columns=("trajectory_id", "t", "bssid", "rssi"))
    return df_wifi

In [None]:
BATCH_SIZE = 100
def get_positions_for_trajectory(building_wifi, wifi, prefix=""):
    bssids = list(building_wifi.columns)
    all_trajectory_ids = wifi.trajectory_id.unique()
    readings = sorted(wifi.groupby(["trajectory_id", "t"]))
    
    def get_building_cube(bs):
        # building_wifi.values  np.array of (#points, #bssids)
        building_cube_ = np.array([building_wifi.values for _ in range(bs)])
        building_cube_[building_cube_ >= -20] = np.NaN
        building_cube_is_ = ~np.isnan(building_cube_)

        building_cube40_ = building_cube_.copy()
        building_cube40_[building_cube40_ >= -40] = np.NaN
        building_cube40_is_ = ~np.isnan(building_cube40_)
        return building_cube_, building_cube_is_, building_cube40_, building_cube40_is_
    
    building_cube, building_cube_is, building_cube40, building_cube40_is = get_building_cube(BATCH_SIZE)

    building_positions = list(building_wifi.index)
    touched_trajectories = set()
    results = {}
    for i in range(0, len(readings), BATCH_SIZE):        
        if is_interactive() and i > BATCH_SIZE * 3:
            break
        readings_batch = readings[i:i + BATCH_SIZE]
        touched_trajectories.update([trajectory_id for (trajectory_id, _), _ in readings_batch])
        
        if len(readings_batch) == BATCH_SIZE:
            print(f"{prefix} - {i+BATCH_SIZE}/{len(readings)} ({len(touched_trajectories)}/ {len(all_trajectory_ids)})")

        else:
            print(f"{prefix} - pad remaining {len(readings)}")
            building_cube, building_cube_is, building_cube40, building_cube40_is = get_building_cube(len(readings_batch))

        wifi_cube = np.empty((len(readings_batch), len(building_wifi.index), len(bssids)))
        wifi_cube[:] = np.NaN
        for nn, ((building_id, t), group) in enumerate(readings_batch):
            for mm, row in group.iterrows():
                if row["bssid"] in bssids and row["rssi"] < -20:
                    wifi_cube[nn, :, bssids.index(row["bssid"])] = row["rssi"]
        wifi_cube40 = wifi_cube.copy()
        wifi_cube40[wifi_cube40 >= -40] = np.NaN
        wifi_cube_is = ~(np.isnan(wifi_cube).astype(bool))
        wifi_cube40_is = ~(np.isnan(wifi_cube40).astype(bool))

        diff_both = np.nansum(abs(building_cube - wifi_cube), axis=2)
        cnt_both = (~np.isnan(building_cube - wifi_cube)).astype(int).sum(axis=2)
        cnt_both_nan = cnt_both.copy().astype(float)
        cnt_both_nan[cnt_both_nan == 0] = np.NaN
        p_diff_both_ = 30-(diff_both / cnt_both_nan)
        p_cnt_both = cnt_both / len(bssids)
        
        only_reading_is = np.clip(wifi_cube_is.astype(int) - building_cube_is.astype(int), 0, 1).astype(bool)
        only_reading_ = wifi_cube.copy()
        only_reading_[~only_reading_is] = 0
        only_reading = abs(only_reading_).sum(axis=2)
        cnt_only_reading40 = np.clip(wifi_cube40_is.astype(int) - building_cube40_is.astype(int), 0, 1).sum(axis=2)
        
        cnt_only_both_reading40_nan = (cnt_both + cnt_only_reading40).astype(float)
        cnt_only_both_reading40_nan[cnt_only_both_reading40_nan == 0] = np.NaN
        p_only_reading_ = only_reading / len(bssids)
        p_cnt_only_reading40 = cnt_only_reading40 / len(bssids)
        p_both_in_reading40 = cnt_both / cnt_only_both_reading40_nan

        only_known_is = np.clip(building_cube_is.astype(int) - wifi_cube_is.astype(int), 0, 1).astype(bool)
        only_known = building_cube.copy()
        only_known[~only_known_is] = 0
        only_known = abs(only_known).sum(axis=2)
        cnt_only_known40 = np.clip(building_cube40_is.astype(int) - wifi_cube40_is.astype(int), 0, 1).sum(axis=2)
        p_only_known = only_known / len(bssids)
        p_cnt_only_known40 = cnt_only_known40 / len(bssids)

        cnt_both_only_known40_nan = (cnt_both + cnt_only_known40).astype(float)
        cnt_both_only_known40_nan[cnt_both_only_known40_nan == 0] = np.NaN
        p_both_in_known40 = np.nan_to_num(cnt_both / cnt_both_only_known40_nan)
        cnt_neither = ((~wifi_cube_is) & (~building_cube_is)).astype(int).sum(axis=2)
        p_cnt_neither = cnt_neither / len(bssids)

        p_diff_both = np.clip(p_diff_both_, 0, 30) / 30
        p_only_reading = np.clip(10 - p_only_reading_, 0, 10) / 10
        p_only_known = np.clip(30 - p_only_known, 0, 30) / 30

        scores = np.nansum([
            .2 * p_diff_both,
            .2 * p_both_in_reading40,
            .2 * p_both_in_known40,
            .15 * p_only_reading,
            .05 * p_only_known,
            .2 * p_cnt_neither,
        ], axis=0)
                
        for mm, t_score in enumerate(scores):
            trajectory_id, t = readings_batch[mm][0]
            if trajectory_id not in results:
                results[trajectory_id] = {}
            results[trajectory_id][t] = {
                building_positions[nn]: b_score
                for nn, b_score in enumerate(t_score)}
    return results

In [None]:
# this is the required trajectories needed for the output
result_timestamps_out = {}
result_timestamps_out_list = []
for site_path_timestamp in list(pd.read_csv("../input/indoor-location-navigation/sample_submission.csv")["site_path_timestamp"]):
    building_id, trajectory, timestamp = site_path_timestamp.split("_")
    if building_id not in result_timestamps_out:
        result_timestamps_out[building_id] = {}
    if trajectory not in result_timestamps_out[building_id]:
        result_timestamps_out[building_id][trajectory] = []
    result_timestamps_out[building_id][trajectory].append(int(timestamp))
    result_timestamps_out_list.append((building_id, trajectory, int(timestamp)))
len(result_timestamps_out_list)  # the output has 10133 lines

In [None]:
scores_all_positions = {}
positions_of_best_score = {}
best_score_for_selected = {}
output_positions = {}
output = []

for tp, (building_id, trajectories) in enumerate(result_timestamps_out.items()):
    if is_interactive() and building_id != "5a0546857ecc773753327266": continue
    building = all_buildings[building_id]
    print(f"{tp}/{len(result_timestamps_out)} = {building_id}")
    stacked_wifi_positision_from_test = pd.concat([extract_trajectory(f"../input/indoor-location-navigation/test/{trajectory_id}.txt")
                                                   for trajectory_id in trajectories.keys()])
    all_scores = get_positions_for_trajectory(building.wifi, stacked_wifi_positision_from_test, f"{tp}/{len(result_timestamps_out)}")
    # format: [trajectory_id][timestamp][position_id] = score
    
    
    for ti, (trajectory_id, positions_data) in enumerate(all_scores.items()):
        this_best_positions_regardless_of_floor = {t: max(positions_data[t].items(), key=lambda obj: obj[1])[0]
                                                   for t in positions_data.keys()}
        best_floor = Counter([r[0] for r in this_best_positions_regardless_of_floor.values()]).most_common(1)[0]
        this_best_positions_on_floor = {t: max([(k, v) for (k, v) in positions_data[t].items() if k[0] == best_floor[0]], 
                                               key=lambda obj: obj[1])[0]
                                        for t in positions_data.keys()}

        if is_interactive() or trajectory_id == "046cfa46be49fc10834815c6":
            if building_id not in scores_all_positions:
                scores_all_positions[building_id] = {}
                positions_of_best_score[building_id] = {}

            scores_all_positions[building_id][trajectory_id] = positions_data
            positions_of_best_score[building_id][trajectory_id] = this_best_positions_on_floor

        resulted_timestamps = sorted(this_best_positions_on_floor.keys())
        output_ts = {}
        c = 0
        try:
            timestamps = result_timestamps_out[building_id][trajectory_id]
        except KeyError:
            print(f"Could not find {building_id}{trajectory_id}")
            continue
        for timestamp in timestamps:
            # it always holds: c-pointer is always larger than current timestamp
            while c < len(resulted_timestamps) - 1 and timestamp > resulted_timestamps[c]:
                c += 1
            point1 = this_best_positions_on_floor[resulted_timestamps[c]]

            if c == 0 or timestamp >= resulted_timestamps[c]:  # before first result or after last result
                output_ts[timestamp] = point1
                continue
            point0 = this_best_positions_on_floor[resulted_timestamps[c-1]]         
            output_ts[timestamp] = (best_floor[0], (point0[1] + point1[1])/2, (point0[2] + point1[2])/2)

        if building_id not in output_positions:
            output_positions[building_id] = {}
        output_positions[building_id][trajectory_id] = output_ts

        for t in sorted(output_ts.keys()):
            best_fit = output_ts[t]
            output.append({
                "site_path_timestamp": f"{building_id}_{trajectory_id}_{t:013d}",
                "floor": FLOOR_MAP[best_fit[0]],
                "x": best_fit[1],
                "y": best_fit[2],
            })
    pd.DataFrame(output).to_csv(f"submission-{tp}.csv", index=False)
pd.DataFrame(output).to_csv("submission.csv", index=False)
if is_interactive():
    display(pd.DataFrame(output))

In [None]:
!head submission.csv

In [None]:
summary = {}
for building_id in BUILDING_IDS:
    floors = get_folders(f'/kaggle/input/indoor-location-navigation/metadata/{building_id}/*')
    invalid_floors = [floor for floor in floors if floor not in FLOOR_MAP]
    train_trajectories = glob.glob(f"/kaggle/input/indoor-location-navigation/train/{building_id}/*/*.txt")
    with open(train_trajectories[0]) as f:
        next(f)
        building_name = re.findall(r"SiteName:(\w*)", next(f))[0]
    
    summary[building_id] = {"floors": floors, "floors_no": len(floors), "invalid_floors": invalid_floors,
                            "no_train": len(train_trajectories), "no_test": 0, "building_name": building_name}
    
for path in glob.glob(f"/kaggle/input/indoor-location-navigation/test/*.txt"):
    with open(path) as f:
        next(f) # first line
        building_id = re.findall(r"SiteID:(\w*)", next(f))[0]
    summary[building_id]["no_test"] += 1

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', 20)

summary = pd.DataFrame(summary).T.sort_values("no_test", ascending=False)
summary.to_csv("summary.csv")

RELEVANT_BUILDINGS = list(summary.loc[summary["no_test"] > 0].index)

@interact
def show_articles_more_than(column=['no_test', 'floors_no', 'no_train'], x=2):
    return summary.loc[summary[column] >= x]

In [None]:
traj_id = "046cfa46be49fc10834815c6"
b1 = all_buildings["5a0546857ecc773753327266"]
b1.plotly_data = []
b1.display_shops()
b1.show_position(scores_all_positions["5a0546857ecc773753327266"][traj_id])
b1.show_trajectory(positions_of_best_score["5a0546857ecc773753327266"][traj_id], traj_id)
b1.show()

In [None]:
traj_id = "05d052dde78384b0c543d89c"
b1 = all_buildings["5a0546857ecc773753327266"]
b1.plotly_data = []
b1.display_shops()
b1.show_position(scores_all_positions["5a0546857ecc773753327266"][traj_id])
b1.show_trajectory(positions_of_best_score["5a0546857ecc773753327266"][traj_id], traj_id)
b1.show()

In [None]:
traj_id = "0c06cc9f21d172618d74c6c8"
b1 = all_buildings["5a0546857ecc773753327266"]
b1.plotly_data = []
b1.display_shops()
b1.show_position(scores_all_positions["5a0546857ecc773753327266"][traj_id])
b1.show_trajectory(positions_of_best_score["5a0546857ecc773753327266"][traj_id], traj_id)
b1.show()

In [None]:
traj_id = "146035943a1482883ed98570"
b1 = all_buildings["5a0546857ecc773753327266"]
b1.plotly_data = []
b1.display_shops()
b1.show_position(scores_all_positions["5a0546857ecc773753327266"][traj_id])
b1.show_trajectory(positions_of_best_score["5a0546857ecc773753327266"][traj_id], traj_id)
b1.show()