In [2]:
import os
import json
import glob
import cv2
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import plotly.graph_objs as go

from PIL import Image, ImageOps
from skimage import io
from skimage.color import rgba2rgb, rgb2xyz
from tqdm import tqdm
from dataclasses import dataclass
from math import floor, ceil
import random

# Train data generation
import collections
import csv
from pathlib import Path
from typing import List, Tuple, Any

import time
import re
from sklearn import preprocessing
import lightgbm as lgb

import multiprocessing
from multiprocessing import Pool, Manager

import pickle
import math
import gc
import psutil
from collections import Counter

pd.set_option("display.max_columns", 100)

In [3]:
# Settings and altering components for GCP

# path settings
root_path = "../input/indoor-location-navigation/"
# root_path = "../jupyter/input/"
train_paths = glob.glob(root_path + "train" + "/*/*/*")
test_paths = glob.glob(root_path + "test" + "/*")
metafiles = glob.glob(root_path + "metadata" + "/*")

# function imports using github repo in kaggle kernels
# https://www.kaggle.com/getting-started/71642
!cp -r ../input/indoorlocationcompetition20master/indoor-location-competition-20-master/* ./
from io_f import read_data_file
from compute_f import compute_step_positions, compute_steps, \
compute_headings, compute_stride_length, compute_step_heading, compute_rel_positions, split_ts_seq

# import for gcp settings
# import compute_f
# import io_f
# import visualize_f
# import main
# from io_f import read_data_file
# from compute_f import compute_step_positions, compute_steps, \
# compute_headings, compute_stride_length, compute_step_heading, compute_rel_positions, split_ts_seq

In [4]:
# Make directory for saving files
!mkdir train
!mkdir test

In [5]:
!ls ./train

In [20]:
# filter milisecond setting 
IMU_CUT = 250
WPS_CUT = 5000

# train number setting
# TRAIN_NUM = len(train_paths)
# TRAIN_NUM = round(len(train_paths) / 2)
TRAIN_NUM = 10

# floor translation
FLOOR_MAP = {"B3":-3,"B2":-2,"B1":-1,"F1":0,"1F":0,"F2":1,"2F":1,"F3":2,"3F":2,"F4":3,"4F":3,
             "F5":4,"5F":4,"F6":5,"6F":5,"F7":6,"7F":6,"F8":7,"8F": 7,"F9":8,"9F":8,"F10":9,
             "B":0,"BF":1,"BM":2, "G":0, "M":0, "P1":0,"P2":1, "LG2":-2,"LG1":-1,"LG":0,"LM":0,
             "L1":1,"L2":2,"L3":3,"L4":4,"L5":5,"L6":6,"L7":7,"L8":8,"L9":9,"L10":10,"L11":11}

# Columns to shift to the beginning of df
SHIFT_COLS = ["rel_y", "rel_x", "rel_diff", \
              "magn_u_z_avg", "magn_u_y_avg", "magn_u_x_avg", \
              "gyro_z_avg", "gyro_y_avg", "gyro_x_avg", \
              "ahrs_z_avg", "ahrs_y_avg", "ahrs_x_avg",  \
              "magn_st", "magn_z_avg", "magn_y_avg", "magn_x_avg", \
              "acce_z_avg", "acce_y_avg", "acce_x_avg", \
              "site_id", "file_id", "floor_int", "floor", \
              "y", "x", "wps_diff", "wifi_ts"]

SHIFT_COLS_TEST = ["rel_y", "rel_x", "rel_diff", \
                   "magn_u_z_avg", "magn_u_y_avg", "magn_u_x_avg", \
                   "gyro_z_avg", "gyro_y_avg", "gyro_x_avg", \
                   "ahrs_z_avg", "ahrs_y_avg", "ahrs_x_avg",  \
                   "magn_st", "magn_z_avg", "magn_y_avg", "magn_x_avg", \
                   "acce_z_avg", "acce_y_avg", "acce_x_avg", \
                   "site_id", "file_id", "floor_int", "floor", \
                   "y", "x", "wps_diff", "wifi_ts", "site_path_timestamp"]

INT_COLS = ["wifi_ts"]
CAT_COLS = ["file_id", "site_id", "floor"]

In [7]:
# Preprocess
print("No. Files in Train: {:,}".format(len(train_paths)), "\n" +
      "No. Files in Test: {:,}".format(len(test_paths)), "\n" +
      "No. of metadata files: {:,}".format(len(metafiles)))

# Reading in 1 file
def pick_example(max_range, paths):
    ex = random.randint(0, max_range)
    example_path = paths[ex]
    path = f"{example_path}"
    paths = path.split("/")
    site = paths[4]
    floorNo = paths[5]
    floor_plan_filename = f"{root_path}metadata/{site}/{floorNo}/floor_image.png"
    json_plan_filename = f"{root_path}metadata/{site}/{floorNo}/floor_info.json"
    with open(json_plan_filename) as json_file:
        json_data = json.load(json_file)
    width_meter = json_data["map_info"]["width"]
    height_meter = json_data["map_info"]["height"]
    return path, site, floorNo, floor_plan_filename, json_plan_filename, width_meter, height_meter

path, site, floorNo, floor_plan_filename, \
json_plan_filename, width_meter, height_meter = pick_example(len(train_paths), train_paths)
print("example path: ", path)
print("site: ", site)
print("floorNo: ", floorNo)
print("floor_plan_filename: ", floor_plan_filename)
print("json_plan_filename: ", json_plan_filename)
print("width: {}, height: {} ".format(width_meter, height_meter))

with open(path) as p:
    lines = p.readlines()
print("No. Lines in 1 example: {:,}". format(len(lines)))

No. Files in Train: 26,925 
No. Files in Test: 626 
No. of metadata files: 204
example path:  ../input/indoor-location-navigation/train/5d27075f03f801723c2e360f/F2/5da7e3acaec4b20006154c03.txt
site:  5d27075f03f801723c2e360f
floorNo:  F2
floor_plan_filename:  ../input/indoor-location-navigation/metadata/5d27075f03f801723c2e360f/F2/floor_image.png
json_plan_filename:  ../input/indoor-location-navigation/metadata/5d27075f03f801723c2e360f/F2/floor_info.json
width: 292.37655429070344, height: 222.0824509676249 
No. Lines in 1 example: 5,276


In [8]:
# for line in lines:
#     print(line)

In [9]:
# Redefine the data extraction class

from dataclasses import dataclass

@dataclass
class ReadData:
    acce: np.ndarray
    acce_uncali: np.ndarray
    gyro: np.ndarray
    gyro_uncali: np.ndarray
    magn: np.ndarray
    magn_uncali: np.ndarray
    ahrs: np.ndarray
    wifi: np.ndarray
    ibeacon: np.ndarray
    waypoint: np.ndarray


def read_data_file_ed(data_filename):
    acce = []
    acce_uncali = []
    gyro = []
    gyro_uncali = []
    magn = []
    magn_uncali = []
    ahrs = []
    wifi = []
    ibeacon = []
    waypoint = []

    with open(data_filename, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    for line_data in lines:
        line_data = line_data.strip()
        if not line_data or line_data[0] == '#':
            continue

        line_data = line_data.split('\t')

        if line_data[1] == 'TYPE_ACCELEROMETER':
            acce.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED':
            acce_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_GYROSCOPE':
            gyro.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_GYROSCOPE_UNCALIBRATED':
            gyro_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_MAGNETIC_FIELD':
            magn.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED':
            magn_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_ROTATION_VECTOR':
            ahrs.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_WIFI':
            sys_ts = line_data[0]
            ssid = line_data[2]
            bssid = line_data[3]
            rssi = line_data[4]
            lastseen_ts = line_data[6]
            wifi_data = [sys_ts, ssid, bssid, '_'.join([ssid, bssid]), rssi, lastseen_ts]
            wifi.append(wifi_data)
            continue

        if line_data[1] == 'TYPE_BEACON':
            ts = line_data[0]
            uuid = line_data[2]
            major = line_data[3]
            minor = line_data[4]
            txpower = line_data[5]
            rssi = line_data[6]
            distance = line_data[7]
            mac_address = line_data[-2]
            beacon_ts = line_data[-1]
            ibeacon_data = [ts, '_'.join([uuid, major, minor]), txpower, rssi, distance, mac_address, beacon_ts]
            ibeacon.append(ibeacon_data)
            continue

        if line_data[1] == 'TYPE_WAYPOINT':
            waypoint.append([int(line_data[0]), float(line_data[2]), float(line_data[3])])

    acce = np.array(acce)
    acce_uncali = np.array(acce_uncali)
    gyro = np.array(gyro)
    gyro_uncali = np.array(gyro_uncali)
    magn = np.array(magn)
    magn_uncali = np.array(magn_uncali)
    ahrs = np.array(ahrs)
    wifi = np.array(wifi)
    ibeacon = np.array(ibeacon)
    waypoint = np.array(waypoint)

    return ReadData(acce, acce_uncali, gyro, gyro_uncali, magn, magn_uncali, ahrs, wifi, ibeacon, waypoint)

In [10]:
# Find out how many wps datapoints and wifi datapoints one floor has
train_path_floor = glob.glob(root_path + "train" + "/*/*/")
# train_paths = glob.glob(root_path + "train" + "/*/*/*")
ex = random.randint(0, 6)
print(train_path_floor[ex])
print("no. of files of that floor: ", len(os.listdir(train_path_floor[ex])))
count = 0
for f in os.listdir(train_path_floor[ex]):
    file_path = train_path_floor[ex] + f
    data = read_data_file_ed(file_path)
    count += len(data.waypoint)
    
print(count)

../input/indoor-location-navigation/train/5cdbc652853bc856e89a8694/F4/
no. of files of that floor:  19
86


In [11]:
# path, site, floorNo, floor_plan_filename, json_plan_filename, width_meter, height_meter = pick_example(len(train_paths), train_paths)
# show_site_png(root_path, site=site)

In [12]:
# Feature candidate
# You can't get the waypoint in test, so use acce and ahrs data to calculate relative positions
def calc_rel_positions(acce_datas, ahrs_datas):
    step_timestamps, step_indexs, step_acce_max_mins = compute_steps(acce_datas)
    headings = compute_headings(ahrs_datas)
    stride_lengths = compute_stride_length(step_acce_max_mins)
    step_headings = compute_step_heading(step_timestamps, headings)
    rel_positions = compute_rel_positions(stride_lengths, step_headings)
    # only use del if we don't need timestamps
    # rel_positions_del = np.delete(rel_positions, 0, 1)
    return rel_positions

# Feature candidate
# Modify extract_magnetic_strength from github for one magnetic data point
def extract_one_magn_strength(magn_datas):
    d = np.array(magn_datas)
    return np.mean(np.sqrt(np.sum(d ** 2, axis=0)))

In [13]:
# path, site, floorNo, floor_plan_filename, \
# json_plan_filename, width_meter, height_meter = pick_example(len(train_paths), train_paths)

In [14]:
# Common methods
def extract_imu_rep(imu_data, wifi_ts):
    imu_ts = imu_data[:, 0].astype(int)
    diff_list = []
    for ts in imu_ts:
        diff = abs(int(wifi_ts) - ts)
        diff_list.append(diff)
    # diff_idx = np.argmin(diff_list)
    # acce_diff_range = [(i,a) for i, a in enumerate(diff_list) if a < cut_line] # uncomment if we need to check acce_diff
    imu_diff_range = [i for i, a in enumerate(diff_list) if a < IMU_CUT]
    imu_filtered = imu_data[imu_diff_range]
    if imu_filtered.shape[0] == 0:
        print("no imu")
        imu_avg_x = np.nan
        imu_avg_y = np.nan
        imu_avg_z = np.nan
    else:
        imu_avg_x = imu_filtered[:, 1].mean()
        imu_avg_y = imu_filtered[:, 2].mean()
        imu_avg_z = imu_filtered[:, 3].mean()
        #print(imu_avg_x, imu_avg_y, imu_avg_z)
    return imu_avg_x, imu_avg_y, imu_avg_z

def shift_columns(cols, df):
    for col in cols:
        df_cols = list(df.columns)
        df_cols.insert(0, df_cols.pop(df_cols.index(col)))
        df = df[df_cols]
    return df

# convert data types of certain columns
def convert_dtypes(df, col_list, dtype):
    for col in col_list:
        df[col] = df[col].astype(dtype)

---
## Train generator
---

In [22]:
# Train specific methods
def extract_nearest_wps(wps_data, wifi_ts):
    wps_ts = wps_data[:, 0].astype(int)
    diff_list = []
    for ts in wps_ts:
        diff = abs(int(wifi_ts) - ts)
        diff_list.append(diff)
    diff_idx = np.argmin(diff_list)
    return diff_list[diff_idx], wps_data[diff_idx]

def extract_train_path(path):
    try:
        ex_path = f"{path}"
        ex_paths = ex_path.split("/")
        site_id = ex_paths[4]
        floor = ex_paths[5]
        f = FLOOR_MAP[floor]
        file_id = ex_paths[6].split(".")[0]
        return site_id, file_id, f, floor
    except:
        print("extract_path error")

def make_wifi_df_train(path):
    # First path
    datas = read_data_file_ed(path)
    acce_datas = datas.acce
    magn_datas = datas.magn
    ahrs_datas = datas.ahrs
    gyro_datas = datas.gyro
    # acce_uncali = datas.acce_uncali
    magn_uncali = datas.magn_uncali # Only use magn for uncalibrated data, as it seems more important in initial modeling result
    # gyro_uncali = datas.gyro_uncali
    wifi_datas = datas.wifi
    ibeacon_datas = datas.ibeacon
    wps = datas.waypoint
    rel_positions = calc_rel_positions(acce_datas, ahrs_datas)

    # print("wifi unique ts len: ", len(set(wifi_datas[:, 0])))

    # Make wifi df with wifi_ts
    dfs = []
    df = pd.DataFrame(wifi_datas[:,[0,2,4]])
    for wifi_ts, g in df.groupby(0):
        g = g.drop_duplicates(subset=1)
        tmp = g.iloc[:,1:]
        feat = tmp.set_index(1).T
        feat["wifi_ts"] = wifi_ts

        # get closest wps
        closest_wps = extract_nearest_wps(wps, wifi_ts)
        feat["wps_diff"] = closest_wps[0]
        feat["x"] = closest_wps[1][1]
        feat["y"] = closest_wps[1][2]
        
        # get average of acce within 250ms
        acce_avgs = extract_imu_rep(acce_datas, wifi_ts)
        feat["acce_x_avg"] = acce_avgs[0]
        feat["acce_y_avg"] = acce_avgs[1]
        feat["acce_z_avg"] = acce_avgs[2]

        # get average of magn within 250ms
        magn_avgs = extract_imu_rep(magn_datas, wifi_ts)
        feat["magn_x_avg"] = magn_avgs[0]
        feat["magn_y_avg"] = magn_avgs[1]
        feat["magn_z_avg"] = magn_avgs[2]
        # get magnetic strength of the 250ms average magn_avg
        feat["magn_st"] = extract_one_magn_strength(magn_avgs)

        # get average of ahrs within 250ms
        ahrs_avgs = extract_imu_rep(ahrs_datas, wifi_ts)
        feat["ahrs_x_avg"] = ahrs_avgs[0]
        feat["ahrs_y_avg"] = ahrs_avgs[1]
        feat["ahrs_z_avg"] = ahrs_avgs[2]

        # get average of gyro within 250ms
        gyro_avgs = extract_imu_rep(gyro_datas, wifi_ts)
        feat["gyro_x_avg"] = gyro_avgs[0]
        feat["gyro_y_avg"] = gyro_avgs[1]
        feat["gyro_z_avg"] = gyro_avgs[2]

        # get average of magn_uncali within 250ms
        magn_uncali_avgs = extract_imu_rep(magn_uncali, wifi_ts)
        feat["magn_u_x_avg"] = magn_uncali_avgs[0]
        feat["magn_u_y_avg"] = magn_uncali_avgs[1]
        feat["magn_u_z_avg"] = magn_uncali_avgs[2]
        
        # get closest relative positions that was worked out with acce and ahrs data
        rel_pos = extract_nearest_wps(rel_positions, wifi_ts)
        feat["rel_diff"] = rel_pos[0]
        feat["rel_x"] = rel_pos[1][1]
        feat["rel_y"] = rel_pos[1][2]

        # get floor and other path data
        site_id, file_id, f, floor = extract_train_path(path)
        feat["site_id"] = site_id
        feat["file_id"] = file_id
        feat["floor_int"] = f
        feat["floor"] = floor
        
        dfs.append(feat)
    
    return dfs


def make_train_df(paths_df, site_list):
    for site in site_list:
        df = paths_df[paths_df["site_id"] == site]
        paths = df["path"].unique()
        # get top bssids for site
        dfs_all = pool.map(make_wifi_df_train, tqdm(paths))
        dfs_unpack = [row for df in dfs_all for row in df]
        wifi_df = pd.concat(dfs_unpack)
        wifi_df = shift_columns(SHIFT_COLS, wifi_df)
        wifi_df = wifi_df.fillna(-999)
        convert_dtypes(wifi_df, tqdm(INT_COLS), int)
        convert_dtypes(wifi_df, tqdm(CAT_COLS), "category")
        # display(wifi_df.head())
        wifi_df.to_csv(f"./train/{site}_train.csv", index=False)
        del wifi_df

In [16]:
# train_path filtering

def extract_path_for_grouplist(path):
    ex_path = f"{path}"
    ex_paths = ex_path.split("/")
    site_id = ex_paths[4]
    file_id = ex_paths[6].split(".")[0]
    return [path, site_id, file_id]

# create pathlist to be used by 2 types of paths list
path_list = [extract_path_for_grouplist(item) for item in train_paths]
df_paths = pd.DataFrame(path_list, columns=["path", "site_id", "file_id"])
site_id_path_list = df_paths["site_id"].unique()

# grouped_paths_list -> It takes 3 records from every site_id
grouped_paths_df = df_paths.groupby("site_id").sample(n=3)
grouped_paths_list = list(grouped_paths_df["path"].unique())
display(grouped_paths_df.head())
print(len(df_paths))

Unnamed: 0,path,site_id,file_id
10839,../input/indoor-location-navigation/train/5a05...,5a0546857ecc773753327266,5d79be098c61220006a34672
10612,../input/indoor-location-navigation/train/5a05...,5a0546857ecc773753327266,5e158ed0f4c3420006d52166
10757,../input/indoor-location-navigation/train/5a05...,5a0546857ecc773753327266,5e15b3b2f4c3420006d522fe
25435,../input/indoor-location-navigation/train/5c3c...,5c3c44b80379370013e0fd2b,5d077df34cae4f000a2db81f
25426,../input/indoor-location-navigation/train/5c3c...,5c3c44b80379370013e0fd2b,5d075ecbb53a8d0008dd4bb1


26925


In [21]:
start = time.time()
num_cores = multiprocessing.cpu_count()
pool = Pool(num_cores)

# # Checking purposes
# # 100 records:  288.2806177139282 sec
grouped_paths_df = grouped_paths_df.iloc[:9,:]
# grouped_paths_df = grouped_paths_df.sample(n=100)
train_sites_list = grouped_paths_df["site_id"].unique()
make_train_df(grouped_paths_df, train_sites_list)

# REAL training
# train_sites_list = df_paths["site_id"].unique()
# make_train_df(df_paths, train_sites_list)

print("time to extract data: ", time.time() - start)
pool.close()

100%|██████████| 3/3 [00:00<00:00, 1007.60it/s]
100%|██████████| 1/1 [00:00<00:00, 243.77it/s]
100%|██████████| 3/3 [00:00<00:00, 565.07it/s]


1,wifi_ts,wps_diff,x,y,floor,floor_int,file_id,site_id,acce_x_avg,acce_y_avg,acce_z_avg,magn_x_avg,magn_y_avg,magn_z_avg,magn_st,ahrs_x_avg,ahrs_y_avg,ahrs_z_avg,gyro_x_avg,gyro_y_avg,gyro_z_avg,magn_u_x_avg,magn_u_y_avg,magn_u_z_avg,rel_diff,rel_x,rel_y,1467cad993c176879919f34cdc1637ab5e719a89,c819be27db6a2a0fc7b51b86ea5de9913ee8e976,975175309f52a0424020db06c9600218c46f5f74,b3c92bf4736f8a6507b17ea5eaf34630593741ba,f72e76e1198e03e8747cbab7d5eb7c8892236538,99582d0c318672b5ad49259b5edb238340a38c16,b374df87a70770cdb53534a2ac210d2221649780,1977049d6c8b84c76d71864fd05c5f25d61abea8,ea4208f40a36b18be923fcd5adb59b250d9cef96,5a96e1418a8278183021eb04f6f8a5f79c2c5914,1ec5e4ca2c856b82dd0b298e8c4382d79de95e21,5f6868dd5e53b0b7943c7c0e56f1563ac3babeab,c6af4eff151019147f188b80f153ab1f8f8f3533,c94d4ade874c71673a8cfa83db0bd63cf50d58e0,08580eccfc2fff7875287b0d87b5ace7a69112e0,b3986d6a6843e6a8781c257af32dbc8d1d613d3b,513d408c6a35b3b617933278f3aaa679cfaaa9f4,81d42ba66d22db77b862e07e3a84202e58f8c29b,ead4eb320daf267710b8ec2f80b572ef14ca55f6,f962c52f40b06bee79d1f37a996fdd7cc5b81e1d,2ab3f38e73b53269d6883b8be80da9d38a86ef9b,8130c3c8e8c3225d872c2a4df99c7518067632eb,cd9ee63b982cd8b78437a26d9b2f832d6b642e20,...,a7ac588c1f276fe39079b8e91da73889f3f49b5b,1f8f28d3896bdd008b6b4eef1c43e8cc17b17de0,96f14b4fcd1e9214a581fd4dbbd7ed8514a030fe,ebf4acd532f5f8402dbea5892e621229033f1901,ff115962b9756008e3bd8b6a969d53b1ac7a4616,c92fb62ab3444586b1ce18c5091a78ac63589fbc,263b027847fa009356ae35fa5d506f780d812628,365f6991c073ae9d62fbf544d8d1a92ef3cd2dca,6b3ebe7f7ba1f26436905626d6e2347345fa00c5,58f01d7d71a5b84f6be28fa2e0923b3444637280,f9c28549a90c103c64a6ac83d8e3714b7f3e521a,63d0d455f48e2be51297f1e695fadde7c49a7025,6370e7cef50cce9782679d8d5047f25aeb9196ac,8307ffaab5373cd4976b6cdebcb390014a908d29,99a5ceb93babdda20011fb1c603f31c2e1bf34fd,eea579b6113078849c2dc101cafd8c02f329d320,aa713301a7d1dc1400191d40a8f50dc20e06c80e,01506a39edc2da8d9003f0f12ce5dcc2e8c2772e,6bd230e5703a6abb6cc2b58a55894d33c854ca2d,9d101f50b0297785b7ca6f89cde8a5b774da1b9b,f1bd91a2f6751faf2e12d4148bda5a419188fb90,1c52ffff671209bb2da2468bd6a58bc2d6fcff6b,18caf2fa8f89e464cab9e2485b6550fbcc34d83b,99205a7c6d960ba33079d8c9b63c09ff086d644e,54364be90554b9f8106ceea4abf7effbf76477d5,3edd18bc87808fd55730b140f99ffcb253fef715,d4ad4013d2f94ec159432bf573244ce556abebc8,4fc8d579d18834cc4f0814abb92443c2ddf4af24,14ad50817e780cd17cec0cace1b7dc7b743a77af,dda6deef8c7a90dc352fb288cc1d807b8329f9d0,f6510a435161341bed6f347f5410db4cf789a277,9bdb755b25c5ff426d28ae8cedcf2b84c2731c68,cc59ad953876bf7c868153f1b7d1d4818513aefe,21ae19216b88f02fa3aa810eda12d1350e9b195c,1002016b0ac78fb99c5fdb89e1f816df4aaacc38,1ff95729c0dda7241be6bf9ee86ceb26d5186c5e,26d7b640f425a95ebb78e5709c545c344371ddb7,53c8ee4789dba74be0badebc3acf8fcea6c9c09b,80e0e21ba06136f3b168295fea79ede3a7b43f12,82d03402cffaacc2c1a738f1cd11ef8768c0b922,0b05eb1b63ef6f4491ab8a37d6f1cf250b7f39b6,8464ea586ee5479e1250f938d7c01e9bc68cefe8,7a10fbc53a85f75ecc384b48a9fe5cc3f6b1cc74,6350f07552fda4c9c10ef19e1087da458645da8a,b68ae49e67fb3d6233bcecc31d43d955d8c317eb,b45348dcdb01e76d156a341eb1aec302b4a05931,21c22f0b13afd0b2446a38eac19492afe8f53826,c91755d1795025b7d708fedc990dfa7a8ea97f5f,21d96943ab26e1e405b5bc3d1b49de7fc601f9bc,ed8e8d2a28a421a676fcc831df3d54f5bdd73f4b
2,1568258963315,2001,113.16723,121.61157,F4,3,5d79be098c61220006a34672,5a0546857ecc773753327266,-1.430059,0.26518,9.341049,5.930359,-32.211975,-14.051697,35.640296,-0.014593,0.006092,0.279669,0.147355,-0.015629,0.097985,-106.433838,-120.80603,-354.458497,309,-0.003603,-0.571802,-43,-43,-44,-44,-47,-49,-58,-58,-59,-60,-60,-60,-60,-60,-61,-61,-61,-61,-62,-64,-65,-65,-65,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
2,1568258965340,4026,113.16723,121.61157,F4,3,5d79be098c61220006a34672,5a0546857ecc773753327266,-0.244947,0.573243,10.058566,12.746826,-29.173584,-23.450501,39.541188,-0.000162,0.027284,0.990767,-0.176126,0.032521,0.041077,-99.644714,-117.849792,-363.777466,27,-0.189151,-0.664915,-54,-54,-53,-53,-53,-40,-60,-58,-59,-58,-58,-58,-58,-58,-63,-61,-61,-55,-62,-66,-75,-75,-75,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
2,1568258967391,4629,111.34523,114.018074,F4,3,5d79be098c61220006a34672,5a0546857ecc773753327266,0.913058,0.174776,9.35381,3.274902,-36.017151,-30.213318,47.125415,0.03519,0.008726,0.990645,-0.009666,-0.073409,-0.169209,-109.089295,-124.665955,-370.62012,294,-0.193979,-0.694646,-60,-58,-58,-62,-67,-40,-60,-58,-69,-57,-57,-57,-57,-57,-63,-61,-64,-55,-55,-66,-72,-73,-74,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
2,1568258969426,2594,111.34523,114.018074,F4,3,5d79be098c61220006a34672,5a0546857ecc773753327266,-0.674588,1.168874,10.438334,13.184387,-47.459656,-26.725464,56.040141,-0.017102,0.076602,0.983172,-0.131532,-0.173928,-0.23994,-99.17981,-136.05371,-367.132267,42,-0.238678,-0.682388,-55,-55,-56,-56,-53,-50,-59,-58,-69,-62,-61,-61,-61,-61,-64,-61,-64,-59,-55,-65,-74,-74,-73,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
2,1568258971480,540,111.34523,114.018074,F4,3,5d79be098c61220006a34672,5a0546857ecc773753327266,-0.113478,1.228876,10.12569,31.266081,-34.172469,-26.992739,53.609079,0.054399,0.065685,0.896856,-0.010462,0.306371,0.153905,-81.098117,-122.766523,-367.399542,130,-0.456663,-0.344332,-63,-63,-62,-63,-62,-49,-66,-999,-65,-58,-58,-58,-58,-58,-66,-61,-62,-64,-51,-65,-68,-69,-69,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999


100%|██████████| 3/3 [00:00<00:00, 5358.99it/s]
100%|██████████| 1/1 [00:00<00:00, 198.24it/s]
100%|██████████| 3/3 [00:00<00:00, 536.26it/s]


1,wifi_ts,wps_diff,x,y,floor,floor_int,file_id,site_id,acce_x_avg,acce_y_avg,acce_z_avg,magn_x_avg,magn_y_avg,magn_z_avg,magn_st,ahrs_x_avg,ahrs_y_avg,ahrs_z_avg,gyro_x_avg,gyro_y_avg,gyro_z_avg,magn_u_x_avg,magn_u_y_avg,magn_u_z_avg,rel_diff,rel_x,rel_y,5043ce55f702178a3d7d086c74098b02246c6a66,2d8421c14405c3c4c939f5a6e12deb72c21047b4,680dd547a35ff184834658ce65132ed9b28c7484,497b541a666e60032fb2fd36a7bc7f4a49aa5f3c,1a2ba063ae4b43a798140ef7410afa892c9e97a5,e3450cd4ed3ee60c3ebfaf88124fadaef0cae85f,4d7e7d320dc076c3659672cffe0c52edaa99f3b0,9cf9b17ac232639482295627c9b5c9e0ac30a58c,b1fe7be9076e8e8a502156988b5182f2e0cfc8ea,8d0bbc73c477179d0b34606938d51063973a08d6,f981562af5c3e59b4b6fe973e595a972150c4754,f11ce4136c58148694ec8fec04b9485cb79816e4,cb5b042170ccaa86bc9596fa819425e7575812ce,fe931cb19ff4ecd2001edeccda55f33dbe795ee9,c18a2f98b08cf69468e3f94b553eabf0b53dd115,fb2c6fe04b0e79a7e1c39585fecbd07d1ec0cce5,63dfb2186414f41edc9b42e83301fd121c7f0d5d,bb4221f46e281c979fe4ae6b4531ee2abcba0cba,57dff60b2933fd182e2b08435846e2fc1b3f4055,1a20d8b24f7eb2c112838b7863a44d409430d614,e23c05d5cc5c0169b2711291af8881f95e326cf1,1b658216894683bfd2a2555bc4cf631782d22ca9,5ce24de88195c36c6e96325717da64de330e9b8c,...,9b7ea8aaf0d1529be158945c01024724dd8b6db4,ce0362e528a54a3b59a327088e2b94b039525dc8,4024eef9a09bdbeb786f3479a052655631b101bd,95a30faf276a1ff112ca55e8d8794c47d1a4a153,e6b88bc4cf598c2935dfc0bc9574098b2a51eac0,b8ec39c641b7aeaadc670b6b3ca8a95ecb42c26b,1a044ff1e880647de2da1b1a3c6fe5c4607edcdf,16278337ecbd763a2a2839417ffd202d3b225f2b,bbfd571696d1451674bf1a439852e8747fcb3d5b,5f5de075724ff2f9d9e2bc7b5015a25431749639,57ca2f1a6b28bc20b5103b90f2573a19c5ad8300,ec4513cc7efdb1bd90cc7e076038f8ca5b6de4b7,c60d8ce2e915333dbff15818da0aa57dda4694b6,070c20d2796947108bf7105e614075d679c27423,88ba0cc7a77b2814f05d1e1f294b89bc9e281065,d4ae069255c373ddd75412e49b17dc673facc735,6a40f97a03fc4867848604549702b06971fbff2b,f79084ca5d2ab4e7efdb678487ea3e7eb5db52ba,638be9701af91e90c9adc8d9eba2dcb0020b4928,7a63206bbeb56c3e1f52a8e38ad50e557656aca8,df00a96a3c231be508ca21de51a86f4f5e28196d,f5baf6320da314834e5890db163950e6a7d1ed17,74e571ee97de116bee6efcc9e3763b16fe40b905,5966a215a3d47b44c2e5acaabde9d3e0e74853fc,4076d5ba344f37758fcf31f69a5fec006c5f1253,0dead6b9b952f2af56970a9cbcb569022283894c,7dd63cbd3a170cf01644e6f67586c6184bf8cff8,b3179af33f3280b5cd43000141715686cad4cbe8,6e3f7525c3b56b30c4dd859a2a18302e95eae630,3c6ad8291a69f37f08998673aadc10f07eac25c3,b271089d7a623e0240292e90e6a5492d2e672676,3171708b32f06a711eddb09720768ef0ec3a91a5,4a260ddbd05ee667b5969c05472cd213b1441cf0,d2bc7929956407b88ada0ae46cb9c30870dd7d3a,67595423314e9e7b0f0245a0eeb04dc2aa956278,3af5e4d8e72de8b37b04541c9f17197c510c5adb,a4dbe48735b3c9a8db8e157f3e6f3c8cb860f937,4ba976c89d7bfb72315309057f1c0b712dfb00e5,6d860d67bb071210bbb5b62ee3074fb044c70e39,d69c34a0c6294600cccc0b97c460f809aa969765,9e972751e08b93ce10862f9c073ba43138bda6d5,0f3241f26aef5264dbe44c6e7986ad5eb7d82640,d4d29e9a023779ee3024762799f9a7fc62746d60,9928be9eff17b39eb09164fc5b07bfc1128f5619,692c038cc7ab814f038cd7bdc35386a2f3e3fae1,bb722d1a5ddbf9171a198ab0f49cde92d2e8a585,f4f8a32b53fef8f783c6c18bc5f7e6e886563ab6,ababc923eac28d3cb9f91e39a989cb352ebf08d8,9a065ab05d3fe92365882f5f558a5d1a2ed9c082,24a647b45e086461114d29f9f9ecb18ca0d24a24
2,1560770133142,2047,163.71089,36.19737,F1,0,5d077df34cae4f000a2db81f,5c3c44b80379370013e0fd2b,-0.593713,-0.035576,9.824581,-17.769287,3.233154,-37.518799,41.639658,0.034825,-0.019033,-0.692841,-0.051913,-0.040404,-0.189944,-66.148803,-97.621337,-365.273013,245,0.634833,-0.00394,-54,-55,-56,-56,-56,-56,-56,-57,-57,-58,-58,-59,-59,-59,-59,-59,-60,-60,-60,-60,-61,-61,-61,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
2,1560770135258,4163,163.71089,36.19737,F1,0,5d077df34cae4f000a2db81f,5c3c44b80379370013e0fd2b,0.789145,0.433488,10.017966,-32.133423,-14.405457,-27.335022,44.578891,0.011565,-0.059669,-0.796714,0.033563,-0.205541,-0.103721,-80.51294,-115.259948,-355.089236,118,0.526889,-0.145886,-56,-55,-51,-57,-62,-56,-52,-60,-57,-65,-52,-59,-61,-59,-59,-60,-62,-59,-60,-60,-61,-61,-61,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
2,1560770137329,6234,163.71089,36.19737,F1,0,5d077df34cae4f000a2db81f,5c3c44b80379370013e0fd2b,0.563518,-0.031361,9.903969,-26.204773,-11.716309,-27.473755,39.733729,0.015344,-0.025976,-0.825421,-0.136689,0.092776,-0.04988,-74.58429,-112.570801,-355.227967,97,0.685247,-0.295356,-58,-55,-64,-59,-72,-56,-52,-56,-57,-63,-52,-58,-61,-59,-62,-57,-62,-59,-60,-60,-61,-61,-61,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
2,1560770139420,7172,183.15898,28.152115,F1,0,5d077df34cae4f000a2db81f,5c3c44b80379370013e0fd2b,0.068237,-0.031985,9.700419,-19.992371,-14.009033,-29.693787,38.440459,0.010705,0.000395,-0.864361,-0.061439,0.039279,-0.229955,-68.371888,-114.863525,-357.448001,25,0.588929,-0.356033,-54,-62,-63,-61,-72,-57,-52,-53,-57,-53,-52,-65,-68,-59,-62,-57,-51,-59,-60,-60,-61,-61,-61,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
2,1560770141485,5107,183.15898,28.152115,F1,0,5d077df34cae4f000a2db81f,5c3c44b80379370013e0fd2b,0.697866,0.305717,9.671306,-14.861939,-12.876953,-34.965943,40.11621,-0.001448,-0.022081,-0.895282,-0.112914,-0.018098,-0.100035,-63.241455,-113.731444,-362.720158,80,0.531569,-0.40558,-54,-67,-63,-62,-72,-47,-52,-48,-61,-63,-52,-64,-68,-59,-62,-57,-63,-55,-60,-60,-61,-61,-61,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999


100%|██████████| 3/3 [00:00<00:00, 5803.93it/s]
100%|██████████| 1/1 [00:00<00:00, 370.55it/s]
100%|██████████| 3/3 [00:00<00:00, 790.88it/s]


1,wifi_ts,wps_diff,x,y,floor,floor_int,file_id,site_id,acce_x_avg,acce_y_avg,acce_z_avg,magn_x_avg,magn_y_avg,magn_z_avg,magn_st,ahrs_x_avg,ahrs_y_avg,ahrs_z_avg,gyro_x_avg,gyro_y_avg,gyro_z_avg,magn_u_x_avg,magn_u_y_avg,magn_u_z_avg,rel_diff,rel_x,rel_y,2c0bdf82f642204b1c78bb891522ab2006d829be,378d2bbdf0e1abd0112fdff22bdce37f2ae8b25c,26856370473039eac75eaa6d0ee96baa469649ab,860d5dd33f753508a3ac6a63c8bdbfb7446e540b,ac24e24738edd89e1798e41af35a75543a75eaaa,1aa9dd327c1825412dcc2a99ae0b50979111a801,054ccd5a878d90c29725a08d3977e790ef00f29b,da3996fa30edaa48852b91f6cbc023585c1d10fd,7c439064509faa8fc8959ab607855abe293d7efd,049e270ced73981229b9e04020f778b43e00a03a,ede38a16f157a6ffc17e70f0d8901110cc8b0471,af8d979caf3395d1a45ca39ee1abbea790c5b69a,75242b62c0d010cdf36b9c33b969163c059dd10b,e78676338c30ba7e23b016c6f9b37bf492cd9c5b,2945d979f905ff549840caa7fd8abd38aef6294c,4f3c014b2a513cc644c6517300b1d6c9f2d4482a,58de2497beff53ae8be9e088f988361a12aba248,14ef68b2f36a9860a4f2f3b9bdb5556183bcebba,3b48ab13ee51d6e201baaca02b3046481beb41b0,bd26a64f7a65776d8c4a071dcf2c00109bd75b31,3e968f9c1e87e6c1353354841fe2b9f3fd1a8fc9,7cdd239c6217af1476dfbcdb292356acbbbd9df3,d71f9a39cbe1eed379d4b7ff1488d0096b7c63c6,...,1fc7097b47bf0ce65097b6733d55fad1e26eb42d,ff68d8be4e5401ee5960afd0a29de7dea83004d7,71ef8b13d7a06f408c2816d6fe04d6da46e9ea3f,1cd634a5eb800004a8b405b38dc5e7177b30964c,8458d419848ca0e7435e999ad91092eae2dafbf7,aea5725690716a4a37eadd1c0ef14504572bfbbf,e76bcd4172e72a8d5ba4334318bb649fceecb99d,457dadca3387d70f151192e90427b0e52bfb160d,c4953b3e5350242c11cd7a0350d4972cb06ec43a,a9d9b6eaf74895ab406b25599915843d67d91ad2,447c7eff99dd198de39e07394b319f7212b9988f,19453fd39a6573c8175a0fbd7582b2a072465a90,9a6077d7d6dc46dcd8bf9b142a29b34d60f11040,f234e550c5ef9711f6e79a93dce84b6cffe54675,503b6da6906ebfd54ddbef008fcc7aa8a72b7555,7c2fbb7153c0e404115c031cebd7d4181a5b63cd,52ca8231678ceddcc4bb65f88a16e0adf1474a29,8d201a051157df1bbc0dc81dffb3c60f539dc01d,9448deb05406e487d8ac0d661d3c03d95048e6f5,dd658a5683309cf8492ad01104f5feae0d51c1e1,14141de207d783094428024a3e931937afd58519,6093338a183084b1a37f5e112f7cc0282fcdf372,bc179c7df86297109f04f8ede6af1068f9dc12fd,aa011e0efdd65491e60793c57addee8feb3b74c2,92ddd7c082ffca40eed163ffb564dcc66bc990fc,bb80b1e1f051aabdf16d214dfd9b2f27594466e0,7706108bd0c0b5a74c25c0a386ae99ab0a28c2a5,d106a88d03756d49786e2f7f5e7126e43e32dd7e,68da1421e1738cb556c9b33f53a881583cad4f16,02cdfc531f12d5a98f820ffcf5f5b0298beb77b1,48d59d818652276967346830ddef54c64e6bcd77,9118debf0d42b5930b9da28ce0d95be8a2120928,588d9b969705287c78b51c6369accbe41ea15daa,a27fe1a1de66e00e5c49606c1558d80d563e09d8,1ec037e9d43a666c7f5f0ddec69665a73e9d5791,647c5107346995697dd355f1a27f3dccc52c43a4,947585ff20e460db3427805ead6f932d24545481,1f1ea2f3615f3de5026b86f231eb3ce4ebe80396,9e82ea109030e0cd99964df32be56444f6c79c19,5f5dd066653ac86f6ac9ebe30a5f227aa7c37b7e,a65f6f04691fe0286e3a2a9a7278c090bac513ae,0b65b3f923fee57d59cea533a5d4a0a041b687e8,d88d95f828cda900af1b2a9a3539bffefa82ec72,e81039dc9056d5186e31eda9bb309c5bf1c5c35f,fb50a0bb158218aa553069d0bc7e8af53f38afe1,b74b0b7381962f2093dbb5e0a3bbb154f1d9778e,e19abd63b589e1e647736afbcde911bf257bb360,c8e755820657cbf0f26f2444671db29f88facad5,20c63d91c5af8b7398ca5816373e2d85a65a5a4e,de18492b75976fe0ec5002474b1bf61655acf0be
2,1560137727741,706,73.254776,42.315132,F3,2,5cfdd04effdc200008fddee2,5cd56865eb294480de7167b6,-0.049654,2.540031,9.344255,20.044615,-11.573077,-25.850769,34.698486,0.03547,0.103047,0.779515,0.068356,-0.128367,-0.359954,26.044615,-76.253077,-172.850769,146,-0.390138,-0.114244,-56,-56,-65,-44,-71,-71,-72,-71,-71,-56,-55,-90,-90,-89,-88,-65,-50,-70,-57,-69,-88,-78,-76,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
2,1560137728460,1425,73.254776,42.315132,F3,2,5cfdd04effdc200008fddee2,5cd56865eb294480de7167b6,-0.400607,2.820138,9.44036,20.5152,-13.0008,-27.535199,36.716228,0.047933,0.117185,0.759424,0.024035,0.051274,0.062145,26.5152,-77.6808,-174.5352,159,-0.602448,-0.110087,-56,-62,-63,-45,-72,-71,-72,-64,-64,-56,-55,-90,-90,-89,-88,-57,-50,-65,-72,-69,-88,-77,-75,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
2,1560137729215,2180,73.254776,42.315132,F3,2,5cfdd04effdc200008fddee2,5cd56865eb294480de7167b6,-0.754588,2.650105,9.496272,19.2528,-17.1936,-24.1584,35.354186,0.046538,0.126775,0.773497,-0.098114,-0.119147,0.215005,25.2528,-81.8736,-171.1584,124,-0.577975,-0.115137,-63,-60,-61,-54,-73,-73,-73,-63,-63,-56,-55,-92,-91,-89,-92,-60,-50,-71,-72,-69,-88,-77,-75,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
2,1560137729900,2865,73.254776,42.315132,F3,2,5cfdd04effdc200008fddee2,5cd56865eb294480de7167b6,-0.738736,2.607782,9.263917,17.616,-18.5472,-22.7568,34.237319,0.062935,0.132737,0.773051,0.072291,0.07784,0.045092,23.628,-83.2128,-169.7952,96,-0.560516,-0.127314,-62,-51,-51,-50,-75,-75,-75,-72,-72,-66,-55,-92,-91,-89,-92,-60,-46,-71,-64,-69,-88,-68,-75,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
2,1560137730555,3520,73.254776,42.315132,F3,2,5cfdd04effdc200008fddee2,5cd56865eb294480de7167b6,0.010858,3.099768,9.139758,16.966153,-18.246923,-22.280769,33.425039,0.069548,0.136194,0.793037,0.102048,-0.052248,-0.029881,22.966153,-82.926923,-169.280769,66,-0.513409,-0.154058,-62,-56,-58,-49,-77,-77,-78,-71,-70,-67,-55,-91,-90,-91,-91,-66,-40,-57,-64,-69,-88,-66,-75,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999


time to extract data:  22.007187604904175


---
## Test generator
---

In [24]:
# Test specific methods
def extract_nearest_wifi(wifi_datas, timestamp):
    diff_list = []
    wifi_ts = wifi_datas[:, 0]
    for ts in wifi_ts:
        diff = abs(int(timestamp) - int(ts))
        diff_list.append(diff)
    min_value = min(diff_list)
    diff_indices = [i for i, x in enumerate(diff_list) if x == min_value]
    wifi_datas = wifi_datas[diff_indices]
    return wifi_datas

def make_wifi_df_test(zipped_paths):
    site_id, file_id, timestamp, site_path_timestamp = zipped_paths
    file_path = "../input/indoor-location-navigation/test/" + file_id + ".txt"
    datas = read_data_file_ed(file_path)
    acce_datas = datas.acce
    magn_datas = datas.magn
    ahrs_datas = datas.ahrs
    gyro_datas = datas.gyro
    # acce_uncali = datas.acce_uncali
    magn_uncali = datas.magn_uncali # Only use magn for uncalibrated data, as it seems more important in initial modeling result
    # gyro_uncali = datas.gyro_uncali
    wifi_datas = datas.wifi
    ibeacon_datas = datas.ibeacon
    # wps = datas.waypoint # not to be used
    rel_positions = calc_rel_positions(acce_datas, ahrs_datas)

    # print("wifi unique ts len: ", len(set(wifi_datas[:, 0])))

    # Make wifi df with wifi_ts
    wifi_datas = extract_nearest_wifi(wifi_datas, timestamp)
    
    dfs = []
    df = pd.DataFrame(wifi_datas[:,[0,2,4]])
    for wifi_ts, g in df.groupby(0):
        g = g.drop_duplicates(subset=1)
        tmp = g.iloc[:,1:]
        feat = tmp.set_index(1).T
        feat["wifi_ts"] = wifi_ts

        # get closest wps
        feat["wps_diff"] = abs(int(wifi_ts) - int(timestamp))
        feat["x"] = np.nan
        feat["y"] = np.nan

        # get average of acce within 250ms
        acce_avgs = extract_imu_rep(acce_datas, wifi_ts)
        feat["acce_x_avg"] = acce_avgs[0]
        feat["acce_y_avg"] = acce_avgs[1]
        feat["acce_z_avg"] = acce_avgs[2]

        # get average of magn within 250ms
        magn_avgs = extract_imu_rep(magn_datas, wifi_ts)
        feat["magn_x_avg"] = magn_avgs[0]
        feat["magn_y_avg"] = magn_avgs[1]
        feat["magn_z_avg"] = magn_avgs[2]
        # get magnetic strength of the 250ms average magn_avg
        feat["magn_st"] = extract_one_magn_strength(magn_avgs)

        # get average of ahrs within 250ms
        ahrs_avgs = extract_imu_rep(ahrs_datas, wifi_ts)
        feat["ahrs_x_avg"] = ahrs_avgs[0]
        feat["ahrs_y_avg"] = ahrs_avgs[1]
        feat["ahrs_z_avg"] = ahrs_avgs[2]

        # get average of gyro within 250ms
        gyro_avgs = extract_imu_rep(gyro_datas, wifi_ts)
        feat["gyro_x_avg"] = gyro_avgs[0]
        feat["gyro_y_avg"] = gyro_avgs[1]
        feat["gyro_z_avg"] = gyro_avgs[2]

        # get average of magn_uncali within 250ms
        magn_uncali_avgs = extract_imu_rep(magn_uncali, wifi_ts)
        feat["magn_u_x_avg"] = magn_uncali_avgs[0]
        feat["magn_u_y_avg"] = magn_uncali_avgs[1]
        feat["magn_u_z_avg"] = magn_uncali_avgs[2]
        
        # get closest relative positions that was worked out with acce and ahrs data
        rel_pos = extract_nearest_wps(rel_positions, wifi_ts)
        feat["rel_diff"] = rel_pos[0]
        feat["rel_x"] = rel_pos[1][1]
        feat["rel_y"] = rel_pos[1][2]

        # get floor and other path data
        feat["site_path_timestamp"] = site_path_timestamp
        feat["site_id"] = site_id
        feat["file_id"] = file_id
        feat["floor_int"] = np.nan
        feat["floor"] = np.nan
        
        dfs.append(feat)
    
    return dfs

def make_test_df(zipped_path, site):
    dfs_all = pool.map(make_wifi_df_test, tqdm(zipped_path))
    dfs_unpack = [row for df in dfs_all for row in df]
    wifi_df = pd.concat(dfs_unpack)
    wifi_df = shift_columns(SHIFT_COLS_TEST, wifi_df)
    wifi_df = wifi_df.fillna(-999)
    convert_dtypes(wifi_df, tqdm(INT_COLS), int)
    convert_dtypes(wifi_df, tqdm(CAT_COLS), "category")
    display(wifi_df.head())
    # print(wifi_df.iloc[:, :30].info())
    wifi_df.to_csv(f"./test/{site}_test.csv", index=False)
    del wifi_df

In [25]:
# Get submission file
sub_df = pd.read_csv("/kaggle/input/indoor-location-navigation/sample_submission.csv")
sub_df[["site_id", "file_id", "timestamp"]] = sub_df["site_path_timestamp"].apply(lambda x: pd.Series(x.split("_")))
sub_df = sub_df.drop(columns=["floor", "x", "y"])
# sub_df_site_list = sub_df["site_id"].unique()

In [None]:
start = time.time()
num_cores = multiprocessing.cpu_count()
pool = Pool(num_cores)

# 100 records:  33.47870922088623 sec
# comment out to run all
# sub_df = sub_df.sample(n=100)
sub_df = sub_df.iloc[:9, :]
test_sites = sub_df["site_id"].unique()

# Run generator for each building
for site in test_sites:
    sub_df_filtered = sub_df[sub_df["site_id"] == site]
    site_file_zip = list(zip(sub_df_filtered["site_id"], \
                             sub_df_filtered["file_id"], \
                             sub_df_filtered["timestamp"], \
                             sub_df_filtered["site_path_timestamp"]))
    make_test_df(site_file_zip, site)

# display(wifi_df.head())

print("time to extract data: ", time.time() - start)
pool.close()

In [None]:
# start = time.time()

# num_cores = multiprocessing.cpu_count()
# print(f"num_cores={num_cores}")
# pool = Pool(num_cores)

# # 10 paths:  6.070369720458984
# # 100 paths:  87.05400061607361
# # dfs_all = pool.map(make_wifi_df, tqdm(train_paths[:TRAIN_NUM]))
# dfs_all = pool.map(make_wifi_df, tqdm(grouped_paths_list[:10]))

# # time to process:  11.514546155929565
# # dfs_all = []
# # for path in train_paths[:TRAIN_NUM]:
# #     dfs_all.append(make_wifi_df(path))

# print(len(dfs_all))
# print("time to extract data: ", time.time() - start)
# pool.close()

In [None]:
# start = time.time()

# num_cores = multiprocessing.cpu_count()
# pool = Pool(num_cores)

# # Do this for each building

# # 10 paths:  8.992910146713257
# # 100 paths:  2454.589078426361
# dfs_unpack = [row for df in dfs_all for row in df]
# wifi_df = pd.concat(dfs_unpack)

# print("time for df conversion: ", time.time() - start)
# print(len(wifi_df.columns))
# print(len(wifi_df))
# display(wifi_df.head())
# pool.close()

In [None]:
# wifi_df.iloc[:,:50].info()

In [None]:
# start = time.time()

# # move columns
# cols = ["acce_z_avg", "acce_y_avg", "acce_x_avg", \
#         "site_id", "file_id", "floor_int", "floor", \
#         "y", "x", "wps_diff", "wifi_ts"]

# for col in cols:
#     df_cols = list(wifi_df.columns)
#     df_cols.insert(0, df_cols.pop(df_cols.index(col)))
#     wifi_df = wifi_df[df_cols]
  
# # Fillna
# wifi_df = wifi_df.fillna(-999)

# display(wifi_df.head())
# print(len(wifi_df))

# print("time to shift columns: ", time.time() - start)
# print(wifi_df.iloc[:,:50].info())

In [None]:
# print("available RAM:", psutil.virtual_memory())

# train_file_name = "indoor_train_5.pkl"

# with open(train_file_name, "wb") as file:
#     pickle.dump(wifi_df, file)

# del wifi_df
# del dfs_unpack
# del dfs_all
# gc.collect()

# print("available RAM after cleanup:", psutil.virtual_memory())

In [None]:
# # Load data it back in
# train_file_name = "indoor_train_5.pkl"

# with open(train_file_name, "rb") as file:
#     df_train = pickle.load(file)

In [None]:
# print("df len: ", len(df_train), "\n")
# print("site_id nunique: ", df_train["site_id"].nunique(), "\n")
# print("site_id value_counts: ", df_train["site_id"].value_counts(), "\n")
# print("file_id nunique: ", df_train["file_id"].nunique(), "\n")
# print("x value_counts: ", df_train["x"].value_counts(), "\n")
# print("y value_counts: ", df_train["y"].value_counts(), "\n")
# print("wifi_ts nunique: ", df_train["wifi_ts"].nunique(), "\n")
# print("wps_diff nunique: ", df_train["wps_diff"].nunique(), "\n")
# display(df_train.head())

In [None]:
# df_train_pp = df_train.loc[:, ["site_id", "x", "y", "acce_x_avg", "acce_y_avg", "acce_z_avg"]]
# display(df_train_pp.head())
# sns.pairplot(df_train_pp, hue="site_id")

In [None]:
# # Check the wps_diff distribution
# # Need to filter out those wps that are above 5000ms difference from wifi_ts
# f, ax = plt.subplots(figsize=(8, 8))
# f.patch.set_facecolor("white")
# sns.distplot(df_train["wps_diff"])
# plt.show()

In [None]:
# df_train_slim = df_train[df_train["wps_diff"] < WPS_CUT]
# perc = round(len(df_train_slim)/len(df_train)*100, 2)

# print("no of records: ", len(df_train))
# print(f"Filter df_train with {WPS_CUT}, it retains {perc} % of data")

In [None]:
# # Visualizing timestamp distribution

# # LabelEncode site_id, file_id, floor_converted, ssid, bssid
# # def col_encode(df, cols):
# #     for col in cols:
# #         le = preprocessing.LabelEncoder()
# #         df["%s_le"%col] = le.fit_transform(df[col])

# # col_enc = ["site_id", "file_id", "wifi_ssid", "wifi_bssid", "beacon_ssid"]
# # col_encode(df_train, tqdm(col_enc))

# # convert data types of certain columns
# def convert_dtypes(df, col_list, dtype):
#     for col in col_list:
#         df[col] = df[col].astype(dtype)

# convert_dtypes(df_train, tqdm(["wifi_ts"]), int)
# convert_dtypes(df_train, tqdm(["file_id", "site_id", "floor"]), "category")

# # Check
# display(df_train.head())

In [None]:
# # Methods for preprocessing train data: Timestamp handling
# def find_diff_ts(ts, data):
#     data_ts = data[0]
#     diff_ts = int(data_ts) - int(ts)
#     return diff_ts

# def find_start_ts(path):
#     with open(path, 'r', encoding='utf-8') as file:
#         lines = file.readlines()

#     for line_data in lines:
#         line_data = line_data.strip()
#         m = re.search(r"(?<=startTime.)(.*)", line_data)
#         start_ts = m.groups(0)
#         if m:
#             return (start_ts[0])

# def find_smallest_diff(t, data):
#     if data.size == 0:
#         return np.array([])
#     else:
#         data_ts = data[:, [0]]
#         diff = []
#         for ts in data_ts:
#             diff.append(abs(int(t) - int(ts)))
#         closest_index = np.argmin(diff) # if multiple records have the same value..?
#         return data[closest_index]

In [None]:
# # Method for preprocessing train data: splitting acce/ahrs/gyro/magn
# def split_axis(data, start_ts):
#     if data.size == 0:
#         # print("no axis data")
#         return [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]
#     else:
#         data_ts = data[0]
#         diff_ts = int(data[0]) - int(start_ts)
#         x_axis = data[1]
#         y_axis = data[2]
#         z_axis = data[3]
#         try:
#             accuracy = data[4]
#         except IndexError:
#             accuracy = np.nan
#         return [data_ts, diff_ts, x_axis, y_axis, z_axis, accuracy]

# # Method for preprocessing train data: splitting wifi
# def split_wifi(data, start_ts):
#     if data.size == 0:
#         # print("no wifi data")
#         return [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]
#     else:
#         data_ts = data[0]
#         diff_ts = int(data[0]) - int(start_ts)
#         ssid = data[1]
#         bssid = data[2]
#         rssi = data[3]
#         if len(data) > 5:
#             freq = data[4]
#             last_seen_ts = data[5]
#         else:
#             freq = np.nan
#             last_seen_ts = data[-1]
#         return [data_ts, diff_ts, ssid, bssid, rssi, freq, last_seen_ts]

# # Method for preprocessing train data: splitting ibeacon
# def split_beacon(data, start_ts):
#     if data.size == 0:
#         # print("no beacon data")
#         return [np.nan, np.nan, np.nan, np.nan]
#     else:
#         data_ts = data[0]
#         diff_ts = int(data[0]) - int(start_ts)
#         ssid = data[1]
#         rssi = data[2]
#         return [data_ts, diff_ts, ssid, rssi]

# # Method for preprocessing train data: calc rel pos
# def split_rel_pos(data, start_ts):
#     if data.size == 0:
#         # print("no rel_pos data")
#         return [np.nan, np.nan, np.nan, np.nan]
#     else:
#         data_ts = data[0]
#         diff_ts = int(data[0]) - int(start_ts)
#         x_axis = data[1]
#         y_axis = data[2]
#         return [data_ts, diff_ts, x_axis, y_axis]

In [None]:
# # Extract path and other data
# def extract_path(path, floor_map):
#     # split path
#     try:
#         ex_path = f"{path}"
#         ex_paths = ex_path.split("/")
#         site_id = ex_paths[4]
#         floor = ex_paths[5]
#         f = floor_map[floor]
#         file_id = ex_paths[6].split(".")[0]
#         return [site_id, file_id, f, floor]
#     except:
#         print("extract_path error")

# # Definitely needs to be refactored
# def extract_data(path):
#     start_ts = find_start_ts(path)
#     path_datas = read_data_file(path)
#     acce = path_datas.acce
#     ahrs = path_datas.ahrs
#     magn = path_datas.magn
#     gyro = path_datas.gyro
#     acce_uncali = path_datas.acce_uncali
#     magn_uncali = path_datas.magn_uncali
#     gyro_uncali = path_datas.gyro_uncali
#     wifi = path_datas.wifi
#     wps = path_datas.waypoint
#     ibeacon = path_datas.ibeacon
#     rel_positions = calc_rel_positions(acce, ahrs)

#     # Changed from: just extracting wps time stamps -> take all acce uncalib timestamps
#     # ts = np.unique(wps[:, [0]])
#     if acce_uncali.any():
#         # print("acce_uncali")
#         ts = np.unique(acce_uncali[:, [0]]) # take uncalibrated access, as sometimes access has less data
#     elif acce.any():
#         # print("acce")
#         ts = np.unique(acce[:, [0]])
#     else:
#         print("no acce or acce_uncali")

#     # extract data for each timestamp of waypoints
#     res = []
#     for t in ts:
#         try:
#             wp_closest = find_smallest_diff(t, wps)
#             closest_wp_ts = wp_closest[0]
#             diff_ts_wp_ts = abs(int(t) - int(closest_wp_ts))
#             # time_stamp_cut = 2000, only the records within 2 sec of waypoint are kept
#             if diff_ts_wp_ts < time_stamp_cut:
#                 # flag to indicate how close the data point is to the wps
#                 # print("diff_ts_wp_ts", diff_ts_wp_ts)
#                 within_100ms = True if abs(diff_ts_wp_ts) <= 100 else False
#                 within_200ms = True if abs(diff_ts_wp_ts) <= 200 else False
#                 x = wp_closest[1]
#                 y = wp_closest[2]
#                 # print("x, y: ", x, y)
#                 diff_start_ts = int(t) - int(start_ts)
#                 diff_start_wp_ts = int(closest_wp_ts) - int(start_ts)
#                 # print("diff_start_ts, diff_start_wp_ts: ", diff_start_ts, diff_start_wp_ts)
#                 acce_closest = split_axis(find_smallest_diff(t, acce), start_ts)
#                 ahrs_closest = split_axis(find_smallest_diff(t, ahrs), start_ts)
#                 magn_closest = split_axis(find_smallest_diff(t, magn), start_ts)
#                 magn_closest.append(extract_one_magn_strength(magn_closest)) # append magnetic strength only for the magn data
#                 gyro_closest = split_axis(find_smallest_diff(t, gyro), start_ts)
#                 # print("acce: ", acce_closest)
#                 # print("ahrs: ", ahrs_closest)
#                 # print("magn: ", magn_closest)
#                 # print("gyro: ", gyro_closest)
#                 acce_u_closest = split_axis(find_smallest_diff(t, acce_uncali), start_ts)
#                 magn_u_closest = split_axis(find_smallest_diff(t, magn_uncali), start_ts)
#                 gyro_u_closest = split_axis(find_smallest_diff(t, gyro_uncali), start_ts)
#                 # print("acce_u_closest: ", acce_u_closest)
#                 # print("magn_u_closest: ", magn_u_closest)
#                 # print("gyro_u_closest: ", gyro_u_closest)
#                 wifi_closest = split_wifi(find_smallest_diff(t, wifi), start_ts)
#                 if len(ibeacon) > 0:
#                     beacon_closest = split_beacon(find_smallest_diff(t, ibeacon), start_ts)
#                 else:
#                     beacon_closest = [np.nan, np.nan, np.nan, np.nan]
#                 rel_pos = split_rel_pos(find_smallest_diff(t, rel_positions), start_ts)
#                 # print([t, x, y, int(closest_wp_ts), acce_closest, acce_u_closest])
#                 res.append([int(t), start_ts, diff_start_ts, x, y, int(closest_wp_ts), diff_start_wp_ts, diff_ts_wp_ts, within_100ms, within_200ms] + \
#                            acce_closest + ahrs_closest + magn_closest + gyro_closest + \
#                            acce_u_closest + magn_u_closest + gyro_u_closest + \
#                            wifi_closest + beacon_closest + rel_pos
#                           )
#             else:
#                 # print("no wp made it through timestamp cut")
#                 continue
#         except Exception as exc:
#             pass
#             # print("Error message: ", exc)
#             # print("extract_test_data error")
#     return res

In [None]:
# # %%timeit

# # 5.55 ms ± 1.76 ms per loop
# path, site, floorNo, floor_plan_filename, \
# json_plan_filename, width_meter, height_meter = pick_example(len(train_paths), train_paths)

# def one_trace_to_rows(path, floor_map):
#     try:
#         path_info = extract_path(path, floor_map)
#         data = extract_data(path)
#         # rows = list(itertools.chain(path_info, *data))
#         rows = []
#         for d in data:
#             row = path_info + d
#             rows.append(row)
#             # print("row: ", row)
#         return rows
#     except:
#         print("one_trace_to_rows error at: ", path)

# # path -> train/5cd56bdbe2acfd2d33b663c0/L3/5dfc8108241c3600064049b9.txt
# # time w/ for loop with 1 train_path -> 11.6
# # time w/ itertools.chain for 1 train_path -> 11.8
# start = time.time()
# path_info = extract_path(path, floor_map)
# print("path: ", path_info)
# rows = one_trace_to_rows(path, floor_map)
# print("time to process one train_path", time.time() - start)
# #print("col count: ", len(rows[0]))
# print("rows: ", rows)

In [None]:
# # Run row making function for all training paths
# # print(train_paths[:10])
# import time
# start = time.time()

# all_rows = []
# for train_path in train_paths[:10]:
#     rows = one_trace_to_rows(train_path, floor_map)
#     all_rows.extend(rows)

# one_trace_df = pd.DataFrame(all_rows)
# display(len(one_trace_df))

# # Data below are the time it took to create the old version of training data (only waypoints)
# # without Pool
# # 10 -> 1.64 sec
# # 100 -> 28.12 sec
# # 1000 -> 286.67 sec
# # to process training (~26,000 files) -> ~7500 sec (~2hours)
# print(time.time() - start)

# with Pool
# no need for wrapper with pool.starmap -> https://qiita.com/okiyuki99/items/a54797cb44eb4ae571f6

# Memo about Pool
# with Pool
# 10 -> 1.09 sec
# 100 -> 12.35 sec
# 1000 -> 113.87 sec
# to process training (~26,000 files) -> ~3000 sec (~50min)

In [None]:
# # Check if we can make df

# # column names
# col_names = ["site_id", "file_id", "floor_converted", "floor", \
#              "ts", "start_ts", "diff_start_ts", "x", "y", \
#              "closest_wp_ts", "diff_start_wp_ts", "diff_ts_wp_ts", "within_100ms", "within_200ms", \
#              "acce_ts", "diff_acce_ts", "acce_x", "acce_y", "acce_z", "acce_acc", \
#              "ahrs_ts", "diff_ahrs_ts", "ahrs_x", "ahrs_y", "ahrs_z", "ahrs_acc", \
#              "magn_ts", "diff_magn_ts", "magn_x", "magn_y", "magn_z", "magn_acc", "magn_strength",\
#              "gyro_ts", "diff_gyro_ts", "gyro_x", "gyro_y", "gyro_z", "gyro_acc", \
#              "acce_u_ts", "diff_acce_u_ts", "acce_u_x", "acce_u_y", "acce_u_z", "acce_u_acc", \
#              "magn_u_ts", "diff_magn_u_ts", "magn_u_x", "magn_u_y", "magn_u_z", "magn_u_acc", \
#              "gyro_u_ts", "diff_gyro_u_ts", "gyro_u_x", "gyro_u_y", "gyro_u_z", "gyro_u_acc", \
#              "wifi_ts", "diff_wifi_ts", "wifi_ssid", "wifi_bssid", "wifi_rssi", "wifi_freq", "wifi_last_seen_ts", \
#              "beacon_ts", "diff_beacon_ts", "beacon_ssid", "beacon_rssi", \
#              "rel_ts", "diff_rel_ts", "rel_x", "rel_y"
#             ]

# print(len(col_names))

# df = pd.DataFrame(rows, columns=col_names)
# print("df len: ", len(df))
# print("site_id nunique: ", df["site_id"].nunique())
# print("file_id nunique: ", df["file_id"].nunique())
# print("x value_counts: ", df["x"].value_counts())
# print("y value_counts: ", df["y"].value_counts())
# print("event ts nunique: ", df["ts"].nunique())
# print("start ts nunique: ", df["start_ts"].nunique()) # should be one
# print("diff_ts_wp_ts value_counts: ", df["diff_ts_wp_ts"].value_counts())
# print("diff_ts_wp_ts nunique: ", df["diff_ts_wp_ts"].nunique())
# print("within_100ms value_counts: ", df["within_100ms"].value_counts())
# print("within_100ms nunique: ", df["within_100ms"].nunique())
# print("within_100ms count: ", df["within_100ms"].count())
# print("within_200ms value_counts: ", df["within_200ms"].value_counts())
# print("within_200ms nunique: ", df["within_200ms"].nunique())
# print("within_200ms count: ", df["within_200ms"].count())
# display(df.head())

In [None]:
# # Set pool
# num_cores = multiprocessing.cpu_count()
# print(f"num_cores={num_cores}")
# # args = [(p, floor_map) for p in train_paths[:train_num]]
# args = [(p, floor_map) for p in grouped_paths_list]
# pool = Pool(num_cores)

# start = time.time()
# # w/ 250ms settings, 3 random samples from each site_id
# # 2 paths -> 18.7 sec
# # 10 paths -> 315 sec (df len is 1994)
# # 100 paths -> 708 sec (df len is 7183)
# # all ~ 600 paths -> 

# # errors
# # grouped_paths_list -> 100 paths -> site_id: 8 errors, 27 correct
# # grouped_paths_list -> 100 paths -> file_id: 23 errors, 77 correct

# # all in one go -> xxx sec
# # array_split -> 5891.8 sec

# # all in one go
# # res = pool.starmap(one_trace_to_rows, args)

# # split the args
# res = []
# for arg in tqdm(np.array_split(args, 50)):
#     res.extend(pool.starmap(one_trace_to_rows, arg))

In [None]:
############################## KEEP THIS CELL FOR LATER REF ##############################

# Error in ~20% of the train paths -> caused by not having acces_uncali to create the event timestamps

# error files
# /5cd56b5ae2acfd2d33b58548/1F/5cf20b29718b08000848aa0a.txt
# /5cd56b5ae2acfd2d33b58548/2F/5cf214bbc852a70008c01607.txt
# /5cd56b5ae2acfd2d33b58548/2F/5cf214bda50dc300099d34cc.txt
# /5cd56b61e2acfd2d33b58d20/F2/5d085df529994a0008202661.txt
# /5cd56b61e2acfd2d33b58d20/F2/5d085dea4a2bd40008d47468.txt
# /5cd56b61e2acfd2d33b58d20/F4/5d086c44d85da00008644fce.txt
# /5cd56b5ae2acfd2d33b5854a/F3/5d078bab0e86b60008036348.txt
# /5cd56b5ae2acfd2d33b5854a/B1/5d073ba64a19c000086c559b.txt
# /5cd56b5ae2acfd2d33b5854a/F1/5d07603e4cae4f000a2db525.txt
# /5cd56b63e2acfd2d33b591c2/F2/5d0b0668912a980009fe91f2.txt
# /5cd56b63e2acfd2d33b591c2/F1/5d0afbfb2f8a26000805b9cb.txt
# /5cd56b63e2acfd2d33b591c2/F1/5d0afbf92f8a26000805b9c9.txt
# /5cd56b64e2acfd2d33b592b3/F2/5d0c9321c99c56000836df18.txt
# /5cd56b64e2acfd2d33b592b3/F3/5d0c9952ea565d0008e34e8b.txt
# /5cd56b64e2acfd2d33b592b3/F4/5d0c9d65ea565d0008e34ea2.txt
# /5cd56b5ae2acfd2d33b58549/5F/5d0613514a19c000086c432a.txt
# /5cd56b5ae2acfd2d33b58549/2F/5d11a6089c50c70008fe89bc.txt
# /5cd56b79e2acfd2d33b5b74e/F3/5d0b01522f8a26000805ba3e.txt
# /5cd56b79e2acfd2d33b5b74e/F3/5d0b015e2f8a26000805ba44.txt
# /5cd56b79e2acfd2d33b5b74e/F1/5d0af3452f8a26000805b830.txt
# /5cd56b6be2acfd2d33b59d1f/F1/5d08a1545125450008037d87.txt
# /5cd56b6be2acfd2d33b59d1f/F1/5d08a14e3f461f0008dac56c.txt
# /5cd56b6be2acfd2d33b59d1f/F3/5d0896415125450008037c76.txt

# base_path = "../input/indoor-location-navigation/train"
# error_files = [
#     "/5cd56b5ae2acfd2d33b58548/1F/5cf20b29718b08000848aa0a.txt",
#     "/5cd56b61e2acfd2d33b58d20/F2/5d085dea4a2bd40008d47468.txt",
#     "/5cd56b61e2acfd2d33b58d20/F4/5d086c44d85da00008644fce.txt",
#     "/5cd56b5ae2acfd2d33b5854a/F3/5d078bab0e86b60008036348.txt",
#     "/5cd56b63e2acfd2d33b591c2/F1/5d0afbfb2f8a26000805b9cb.txt",
#     "/5cd56b63e2acfd2d33b591c2/F1/5d0afbf92f8a26000805b9c9.txt",
#     "/5cd56b5ae2acfd2d33b58549/2F/5d11a6089c50c70008fe89bc.txt",
#     "/5cd56b79e2acfd2d33b5b74e/F3/5d0b01522f8a26000805ba3e.txt",
#     "/5cd56b6be2acfd2d33b59d1f/F1/5d08a1545125450008037d87.txt",
#     "/5cd56b6be2acfd2d33b59d1f/F1/5d08a14e3f461f0008dac56c.txt"
# ]

# working_path = "../input/indoor-location-navigation/train/5d2709c303f801723c3299ee/1F/5dad7d6daa1d300006faa80c.txt"
# error_paths = [base_path + e for e in error_files]
# rows = one_trace_to_rows(error_paths[1], floor_map)
# print(rows)

In [None]:
# start = time.time()

# df_train = pd.DataFrame(res[0], columns=col_names)
# for r in res[1:]:
#     df = pd.DataFrame(r, columns=col_names)
#     df_train = df_train.append(df, ignore_index=True)

# print("time to process", time.time() - start)
# print("length of df made", len(df_train))
# display(df_train.head(10))

In [None]:
# def list_to_df(row_list):
#     df_train = pd.DataFrame(row_list[0], columns=col_names)
#     for r in row_list[1:]:
#         df = pd.DataFrame(r, columns=col_names)
#         df_train = df_train.append(df)
#     return df_train

# start = time.time()
# pool = Pool(num_cores)

# df_train = pool.map(list_to_df, tqdm(res))

# # print("train_path count", len(train_paths[:train_num]))
# print("time to process", time.time() - start)
# print("length of df made", len(df_train))
# display(df_train.head(10))
# pool.close()

In [None]:
# Calculate moving averages
# Differencing respect to time (as each timestep is unevenly spaced)

In [None]:
# # Save the file in parquet
# # https://www.kaggle.com/pedrocouto39/fast-reading-w-pickle-feather-parquet-jay
# # https://www.kaggle.com/prmohanty/python-how-to-save-and-load-ml-models

# # Saving train data
# train_file_name = "indoor_train_4.pkl"

# with open(train_file_name, "wb") as file:
#     pickle.dump(df_train, file)

# # Save them to output
# # df_train.to_csv('df_train_2.csv',index=False)
# # df_test.to_csv('df_test.csv',index=False)

In [None]:
# # Load data it back in
# with open(train_file_name, "rb") as file:
#     df_train = pickle.load(file)

In [None]:
# print("df len: ", len(df_train), "\n")
# print("file_id unique: ", (df_train["file_id"].nunique()), "\n")
# print("site_id unique: ", (df_train["site_id"].nunique()), "\n")
# print("site_id value_counts: ", (df_train["site_id"].value_counts()))
# display(df_train.head())

In [None]:
# # Get submission file
# sub_df = pd.read_csv("/kaggle/input/indoor-location-navigation/sample_submission.csv")
# sub_df[["site", "file", "timestamp"]] = sub_df["site_path_timestamp"].apply(lambda x: pd.Series(x.split("_")))
# sub_df = sub_df.drop(columns=["floor", "x", "y"])
# # grouped_df = sub_df.groupby("file").sample(n=2)
# # all_file_id = grouped_df["file"].unique()
# # print(len(grouped_df))
# # print(len(all_file_id))
# # display(grouped_df.head())
# display(sub_df.head())

# test_site_id = sub_df["site"].unique()
# train_site_id = df_train["site_id"].unique()
# print(test_site_id, "\n")
# print(train_site_id, "\n")
# a = list(set(test_site_id) & set(train_site_id))
# print(a)