This notebook creates a time lag table for each path in the test data.  
The time lag is obtained from subtracting timestamp (column No.0) from the values of below:
<br/>
<br/>
- the path with any ibeacon data: column No.9 of "TYPE_BEACON"  
  When the timestamp is subtracted, these values will be the same for each row.  
  https://www.kaggle.com/jiweiliu/fix-the-timestamps-of-test-data-using-dask
  <br/>  
  <br/>  
- the path without ibeacon data: column No.6 of "TYPE_WIFI" ("lastseen_ts")  
  "lastseen_ts minus timestamp" varies for each row, so the maximum value (closest to zero) is adopted as the time lag to whole path. Also consider 200 milliseconds as the offset.

In [None]:
import sys
import numpy as np
import pandas as pd
import gc, glob, time, pickle
from tqdm import tqdm

import matplotlib.pyplot as plt
plt.style.use('seaborn-deep')
plt.style.use('seaborn-darkgrid')

"test_site_dict.pkl" in the below cell is  the dictionary consists of wifi bssid and the ibeacon MAC address in the test data, and created with [this notebook](https://www.kaggle.com/horsek/ilnpre1-create-testsitedict).

In [None]:
input_dir = '../input/indoor-location-navigation'

with open('../input/iln-dataset/test_site_dict.pkl', 'rb') as f:
    test_site_dict = pickle.load(f)

floor_map = {"B2":-2, "B1":-1,
             "F1":0, "F2":1, "F3":2, "F4":3, "F5":4,
             "F6":5, "F7":6, "F8":7, "F9":8,
             "1F":0, "2F":1, "3F":2, "4F":3, "5F":4,
             "6F":5, "7F":6, "8F":7, "9F":8}

In [None]:
'''
Modify the host's code "read_data_file" function in "io_f.py"
for dealing with the malformed data etc.
'''

from dataclasses import dataclass

@dataclass
class ReadData:
    acce: np.ndarray
    acce_uncali: np.ndarray
    gyro: np.ndarray
    gyro_uncali: np.ndarray
    magn: np.ndarray
    magn_uncali: np.ndarray
    ahrs: np.ndarray
    wifi: np.ndarray
    ibeacon: np.ndarray
    waypoint: np.ndarray

def split_list_as_req(line_data):
    redo = False
    data_BU = []
    header_list = [i for i, itm in enumerate(line_data) if 'TYPE_' in itm]
    if len(header_list) > 1:
        data_BU = [line_data[header_list[1]-1][-13:]] + line_data[header_list[1]:]
        line_data[header_list[1]-1] = line_data[header_list[1]-1][:-13]
        line_data = line_data[:header_list[1]]
        redo = True
    return redo, line_data, data_BU

def read_data_file(data_filename):
    acce = []
    acce_uncali = []
    gyro = []
    gyro_uncali = []
    magn = []
    magn_uncali = []
    ahrs = []
    wifi = []
    ibeacon = []
    waypoint = []

    with open(data_filename, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    i = 0
    redo = False
    while i < len(lines):
        if not redo:
            line_data = lines[i]
            line_data = line_data.strip()
            if not line_data or line_data[0] == '#':
                i += 1
                continue
            line_data = line_data.split('\t')
        else:
            line_data = data_BU
            redo = False

        redo, line_data, data_BU = split_list_as_req(line_data)
    
        if line_data[1] == 'TYPE_ACCELEROMETER':
            try:
                acce.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            except ValueError:
                print(data_filename)
                print(line_data)

        elif line_data[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED':
            acce_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])

        elif line_data[1] == 'TYPE_GYROSCOPE':
            gyro.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])

        elif line_data[1] == 'TYPE_GYROSCOPE_UNCALIBRATED':
            gyro_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])

        elif line_data[1] == 'TYPE_MAGNETIC_FIELD':
            magn.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])

        elif line_data[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED':
            magn_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])

        elif line_data[1] == 'TYPE_ROTATION_VECTOR':
            ahrs.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])

        elif line_data[1] == 'TYPE_WIFI':
            sys_ts = line_data[0]
            ssid = line_data[2]
            bssid = line_data[3]
            rssi = line_data[4]
            lastseen_ts = line_data[6]
            frequency = line_data[5]
            wifi_data = [sys_ts, ssid, bssid, rssi, lastseen_ts, frequency]
            wifi.append(wifi_data)

        elif line_data[1] == 'TYPE_BEACON':
            ts = line_data[0]
            uuid = line_data[2]
            major = line_data[3]
            minor = line_data[4]
            rssi = line_data[6]
            txpow = line_data[5]
            distance = line_data[7]
            mac = line_data[8]
            if len(line_data)>9:
                ts_copy = line_data[9]
            else:
                ts_copy = ts
            ibeacon_data = [ts, '_'.join([uuid, major, minor]), rssi,
                            txpow, distance, mac, ts_copy]
            ibeacon.append(ibeacon_data)

        elif line_data[1] == 'TYPE_WAYPOINT':
            waypoint.append([int(line_data[0]), float(line_data[2]), float(line_data[3])])

        if not redo:
            i += 1
            
    acce = np.array(acce)
    acce_uncali = np.array(acce_uncali)
    gyro = np.array(gyro)
    gyro_uncali = np.array(gyro_uncali)
    magn = np.array(magn)
    magn_uncali = np.array(magn_uncali)
    ahrs = np.array(ahrs)
    wifi = np.array(wifi)
    ibeacon = np.array(ibeacon)
    waypoint = np.array(waypoint)

    return ReadData(acce, acce_uncali, gyro, gyro_uncali, magn, magn_uncali, ahrs, wifi, ibeacon, waypoint)

In [None]:
''' Retrieve the Site ID from txt file '''
def SiteID(txt):
    p1 = txt[1].find('SiteID:')+7
    p2 = txt[1].find('\tSiteName:')
    assert p1!=-1+7 and p2!=-1, 'SiteID not found'
    return txt[1][p1:p2]

In [None]:
print('\n=== Read Path Data ===')
test_files = sorted(glob.glob(f'{input_dir}/test/*.txt'))
TestPathData,TestSiteName = {},{}
time.sleep(1)
for path_file_ in tqdm(test_files):
    path_name = path_file_.split('/')[-1].replace('.txt','')
    TestPathData[path_name] = read_data_file(path_file_)
    
    with open(path_file_, 'r', encoding="utf-8") as f:
        txt = f.readlines()
    TestSiteName[path_name] = SiteID(txt)

In [None]:
''' Create a time lag table (1) '''
time_lag_summary = {}
time.sleep(1)
for PathName, PathData in tqdm(TestPathData.items()):
    site = TestSiteName[PathName]
    
    ''' wifi '''
    df_wifi = pd.DataFrame(PathData.wifi,
                           columns=['sys_ts','ssid','bssid','rssi',
                                    'lastseen_ts','frequency'])
    df_wifi['sys_ts']=df_wifi['sys_ts'].astype(np.int64)
    df_wifi['lastseen_ts']=df_wifi['lastseen_ts'].astype(np.int64)
    time_lag_wifi = (df_wifi['lastseen_ts']-df_wifi['sys_ts']+200).max()
    
    ''' beacon '''
    if PathData.ibeacon.shape[0]==0:
        time_lag_beac = [np.nan]
    else:
        df_beac = pd.DataFrame(PathData.ibeacon,
                               columns=['ts','uuid_maj_min','rssi',
                                        'txpow','distance','mac','ts_copy'])
        df_beac['ts']=df_beac['ts'].astype(np.int64)
        df_beac['ts_copy']=df_beac['ts_copy'].astype(np.int64)
        time_lag_beac = (df_beac['ts_copy']-df_beac['ts']).unique().tolist()
    
    time_lag_summary[PathName] = [time_lag_wifi, time_lag_beac,
                                  time_lag_wifi-time_lag_beac[0]]

In [None]:
''' Create a time lag table (2) '''
df_summary = pd.DataFrame(time_lag_summary).T
df_summary.columns = ['time_lag_wifi','time_lag_beac','diff']
df_summary = df_summary.astype({'diff':float})
df_summary['beac_unique'] = df_summary['time_lag_beac'].apply(lambda x: len(x))
df_summary['time_lag']=df_summary.apply(lambda x:
                                        x['time_lag_beac'][0]
                                        if x['time_lag_beac']!=[np.nan]
                                        else x['time_lag_wifi'], axis=1)
df_summary

In [None]:
df_summary['beac_unique'].unique()

The number of candidate time lags values obtained from beacon data is 1 for all paths.

In [None]:
df_summary['diff'].describe()

The median of "diff" values is almost zero, so 200 ms of offset value seems to be proper.

In [None]:
df_summary[['time_lag']].to_csv('test_ts_lag.csv')