In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Prepare paths:
import glob
from pathlib import Path
inpath = '../input/indoor-location-navigation/'
metapath = inpath + 'metadata/'
trainpath = inpath + 'train/'
testpath = inpath + 'test/'

# Extract testing files, buildings and sites:
os.system(f'grep SiteID {testpath}/* > test_buildings.txt' )
test_buildings = pd.read_csv('test_buildings.txt',sep='\t',header=None,names=['file','building','site'])
test_buildings['file'] = test_buildings['file'].apply(lambda x: x[:-2])
test_buildings['building'] = test_buildings['building'].apply(lambda x: x[7:])

# How many buildings in the testing set?
buildings = np.unique(test_buildings['building'])
print('There are',len(buildings),'buildings in the testing set.')

test_buildings.head()

Floor dictionary as defined by Devin in his nice work @ https://www.kaggle.com/devinanzelmo/wifi-features

In [None]:
floors = {"B3":-3,"B2":-2,"B1":-1,"F1":0,"1F":0,"F2":1,"2F":1,"F3":2,"3F":2,"F4":3,"4F":3,
          "F5":4,"5F":4,"F6":5,"6F":5,"F7":6,"7F":6,"F8":7,"8F": 7,"F9":8,"9F":8,"F10":9}

# Other floors:
floorsB = {"B":0,"BF":1,"BM":2}
floorsG = {"G":0}
floorsM = {"M":0}
floorsP = {"P1":0,"P2":1}
floorsL = {"LG2":-2,"LG1":-1,"LG":0,"LM":0,"L1":1,"L2":2,"L3":3,"L4":4,"L5":5,"L6":6,"L7":7,"L8":8,"L9":9,"L10":10,"L11":11}

A custom C++ code I wrote to fix the data quality issues reported by the hosts and to reformat the dataset without losing any information:

In [None]:
# Compile C++ pre-processing code:
er=os.system("g++ /kaggle/input/indoor-cpp/1_preprocess.cpp -std=c++11 -o preprocess")
if(er): print("Error")

# Fix data quality issues and reformat dataset:
os.system('mkdir train')
for building in buildings:
    for floor in os.listdir(trainpath+building):
        path_filenames = list(Path(trainpath+building+f'/{floor}/').resolve().glob("*.txt"))
        for path_filename in path_filenames:
            if floor in floors:
                er=os.system(f'./preprocess {path_filename} train {building} {floors[floor]}')
            if(er): print("Error:",path_filename)

Visualization example:

In [None]:
# Column names in the C++ ordering:
cols = ['t_start','xyz_time','x_acce','y_acce','z_acce','a_acce',
        'x_acce_uncali','y_acce_uncali','z_acce_uncali',
        'x2_acce_uncali','y2_acce_uncali','z2_acce_uncali','a_acce_uncali',
        'x_gyro','y_gyro','z_gyro','a_gyro',
        'x_gyro_uncali','y_gyro_uncali','z_gyro_uncali',
        'x2_gyro_uncali','y2_gyro_uncali','z2_gyro_uncali','a_gyro_uncali',
        'x_magn','y_magn','z_magn','a_magn',
        'x_magn_uncali','y_magn_uncali','z_magn_uncali',
        'x2_magn_uncali','y2_magn_uncali','z2_magn_uncali','a_magn_uncali',
        'x_ahrs','y_ahrs','z_ahrs','a_ahrs',
        't1_wifi','ssid_wifi','bssid_wifi','rssid_wifi','freq_wifi','t2_wifi',
        't1_beac','id_beac','power_beac','rssi_beac','dist_beac','mac_beac','t2_beac',
        't_waypoint','floor','x_waypoint','y_waypoint']

# Column names by data type:
int8col = ['floor','a_acce','a_gyro','a_magn','a_magn_uncali','a_ahrs',
           'a_acce_uncali','a_gyro_uncali','a_magn_uncali']
int16col = ['rssid_wifi','freq_wifi','power_beac','rssi_beac']
int32col = ['t_start','xyz_time','t1_wifi','t2_wifi','t1_beac','t2_beac','t_waypoint']
floatcol = ['x_acce','y_acce','z_acce','x_acce_uncali','y_acce_uncali','z_acce_uncali',
            'x2_acce_uncali','y2_acce_uncali','z2_acce_uncali',
            'x_gyro','y_gyro','z_gyro','x_gyro_uncali','y_gyro_uncali','z_gyro_uncali',
            'x2_gyro_uncali','y2_gyro_uncali','z2_gyro_uncali',
            'x_magn','y_magn','z_magn','x_magn_uncali','y_magn_uncali','z_magn_uncali',
            'x2_magn_uncali','y2_magn_uncali','z2_magn_uncali',
            'x_ahrs','y_ahrs','z_ahrs','dist_beac','x_waypoint','y_waypoint']
stringcol = ['ssid_wifi','bssid_wifi','id_beac','mac_beac']
tarcol = ['t_waypoint','floor','x_waypoint','y_waypoint']

# Choose any building:
building = '5a0546857ecc773753327266'
xtrain = pd.read_csv(f'train/{building}.txt',index_col=0,header=None,names=cols,dtype='object')
xtrain = xtrain.fillna('')
xtrain[int8col] = xtrain[int8col].apply(lambda x: x.apply(lambda y: np.array(y.split(),dtype=np.int8)))
xtrain[int16col] = xtrain[int16col].apply(lambda x: x.apply(lambda y: np.array(y.split(),dtype=np.int16)))
xtrain[int32col] = xtrain[int32col].apply(lambda x: x.apply(lambda y: np.array(y.split(),dtype=np.int32)))
xtrain[floatcol] = xtrain[floatcol].apply(lambda x: x.apply(lambda y: np.array(y.split(),dtype=np.float32)))
xtrain[stringcol] = xtrain[stringcol].apply(lambda x: x.apply(lambda y: np.array(y.split())))
xtrain.head()

In [None]:
import matplotlib.pyplot as plt

# Choose any path:
pathid = '5e157315a280850006f3d00d'
# XYZ:
plt.figure(figsize=(25,5))
plt.subplot(141)
# https://developer.android.com/guide/topics/sensors/sensors_overview#sensors-coords
for col in ['x_acce','y_acce','z_acce','a_acce']: #y_acce is the acceleration in the walking direction, z_acce is in the vertical direction (i.e. including gravity) ?
    plt.plot(xtrain.loc[pathid,'xyz_time'],xtrain.loc[pathid,col],label=col)
    plt.legend(loc="upper right")
    plt.xlabel("xyz_time")
    plt.title('Acce')
plt.subplot(142)
for col in ['x_gyro','y_gyro','z_gyro','a_gyro']: #z_gyro is the angular velocity around the axis pointing to the sky (i.e. the change in walking direction) ?
    plt.plot(xtrain.loc[pathid,'xyz_time'],xtrain.loc[pathid,col],label=col)
    plt.legend(loc="upper right")
    plt.xlabel("xyz_time")
    plt.title('Gyro')
plt.subplot(143)
for col in ['x_magn','y_magn','z_magn','a_magn']: #maybe need to use magnitude? (x_magn and y_magn will depend on the orientation, z_magn should be independent if the phone is held correctly) ?
    plt.plot(xtrain.loc[pathid,'xyz_time'],xtrain.loc[pathid,col],label=col)
    plt.legend(loc="upper right")
    plt.xlabel("xyz_time")
    plt.title('Magn')
plt.subplot(144)
# https://developer.android.com/guide/topics/sensors/sensors_motion#sensors-motion-rotate
for col in ['x_ahrs','y_ahrs','z_ahrs','a_ahrs']: #z_ahrs is the rotation around the axis pointing to the sky (i.e. the walking direction) ?
    plt.plot(xtrain.loc[pathid,'xyz_time'],xtrain.loc[pathid,col],label=col)
    plt.legend(loc="upper right")
    plt.xlabel("xyz_time")
    plt.title('Ahrs')
plt.show()
#XYZ Uncali:
plt.figure(figsize=(25,5))
plt.subplot(141)
for col in ['x_acce_uncali','y_acce_uncali','z_acce_uncali','a_acce_uncali']:
    plt.plot(xtrain.loc[pathid,'xyz_time'],xtrain.loc[pathid,col],label=col)
    plt.legend(loc="upper right")
    plt.xlabel("xyz_time")
    plt.title('Acce_uncali')
plt.subplot(142)
for col in ['x_gyro_uncali','y_gyro_uncali','z_gyro_uncali','a_gyro_uncali']:
    plt.plot(xtrain.loc[pathid,'xyz_time'],xtrain.loc[pathid,col],label=col)
    plt.legend(loc="upper right")
    plt.xlabel("xyz_time")
    plt.title('Gyro_uncali')
plt.subplot(143)
for col in ['x_magn_uncali','y_magn_uncali','z_magn_uncali','a_magn_uncali']:
    plt.plot(xtrain.loc[pathid,'xyz_time'],xtrain.loc[pathid,col],label=col)
    plt.legend(loc="upper right")
    plt.xlabel("xyz_time")
    plt.title('Magn_uncali')
plt.subplot(144)
for col in ['x_waypoint','y_waypoint']:
    plt.plot(xtrain.loc[pathid,'t_waypoint'],xtrain.loc[pathid,col],'-+',label=col)
    plt.legend(loc="right")
    plt.xlabel("t_waypoint")
    plt.title('Waypoint')
plt.show()
#XYZ Uncali 2:
plt.figure(figsize=(25,5))
plt.subplot(131)
for col in ['x2_acce_uncali','y2_acce_uncali','z2_acce_uncali']:
    plt.plot(xtrain.loc[pathid,'xyz_time'],xtrain.loc[pathid,col],label=col)
    plt.legend(loc="upper right")
    plt.xlabel("xyz_time")
    plt.title('Acce_uncali_2')
plt.subplot(132)
for col in ['x2_gyro_uncali','y2_gyro_uncali','z2_gyro_uncali']:
    plt.plot(xtrain.loc[pathid,'xyz_time'],xtrain.loc[pathid,col],label=col)
    plt.legend(loc="upper right")
    plt.xlabel("xyz_time")
    plt.title('Gyro_uncali_2')
plt.subplot(133)
for col in ['x2_magn_uncali','y2_magn_uncali','z2_magn_uncali']:
    plt.plot(xtrain.loc[pathid,'xyz_time'],xtrain.loc[pathid,col],label=col)
    plt.legend(loc="upper right")
    plt.xlabel("xyz_time")
    plt.title('Magn_uncali_2')
plt.show()
# Wifi:
plt.figure(figsize=(25,5))
ax = plt.subplot(131)
pd.DataFrame({'rssid_wifi':xtrain.loc[pathid,'rssid_wifi'],'bssid_wifi':xtrain.loc[pathid,'bssid_wifi']},
    index=xtrain.loc[pathid,'t1_wifi']).groupby('bssid_wifi')['rssid_wifi'].plot(ax=ax,title='rssid_wifi',legend=False)
ax = plt.subplot(132)
pd.DataFrame({'freq_wifi':xtrain.loc[pathid,'freq_wifi'],'bssid_wifi':xtrain.loc[pathid,'bssid_wifi']},
    index=xtrain.loc[pathid,'t1_wifi']).groupby('bssid_wifi')['freq_wifi'].plot(ax=ax,title='freq_wifi',legend=False)
ax = plt.subplot(133)
pd.DataFrame({'t2_wifi':xtrain.loc[pathid,'t2_wifi'],'bssid_wifi':xtrain.loc[pathid,'bssid_wifi']},
    index=xtrain.loc[pathid,'t1_wifi']).groupby('bssid_wifi')['t2_wifi'].plot(ax=ax,title='t2_wifi',legend=False)
plt.show()
# Beacon. Note: use mac to split ids, but use ids to train model (i.e. multiple sequences for each id, one for each mac) ?
plt.figure(figsize=(25,5))
ax = plt.subplot(131)
pd.DataFrame({'rssi_beac':xtrain.loc[pathid,'rssi_beac'],'mac_beac':xtrain.loc[pathid,'mac_beac']},
    index=xtrain.loc[pathid,'t1_beac']).groupby('mac_beac')['rssi_beac'].plot(ax=ax,title='rssi_beac',legend=False)
ax = plt.subplot(132)
pd.DataFrame({'dist_beac':xtrain.loc[pathid,'dist_beac'],'mac_beac':xtrain.loc[pathid,'mac_beac']},
    index=xtrain.loc[pathid,'t1_beac']).groupby('mac_beac')['dist_beac'].plot(ax=ax,title='dist_beac',legend=False)
ax = plt.subplot(133)
pd.DataFrame({'power_beac':xtrain.loc[pathid,'power_beac'],'id_beac':xtrain.loc[pathid,'id_beac']},
    index=xtrain.loc[pathid,'t1_beac']).groupby('id_beac')['power_beac'].plot(ax=ax,title='power_beac',legend=False)
plt.show()
plt.figure(figsize=(25,5))
ax = plt.subplot(131)
pd.DataFrame({'rssi_beac':xtrain.loc[pathid,'rssi_beac'],'id_beac':xtrain.loc[pathid,'id_beac']},
    index=xtrain.loc[pathid,'t1_beac']).groupby('id_beac')['rssi_beac'].plot(ax=ax,title='rssi_beac',legend=False)
ax = plt.subplot(132)
pd.DataFrame({'dist_beac':xtrain.loc[pathid,'dist_beac'],'id_beac':xtrain.loc[pathid,'id_beac']},
    index=xtrain.loc[pathid,'t1_beac']).groupby('id_beac')['dist_beac'].plot(ax=ax,title='dist_beac',legend=False)
ax = plt.subplot(133)
pd.DataFrame({'power_beac':xtrain.loc[pathid,'power_beac'],'id_beac':xtrain.loc[pathid,'id_beac']},
    index=xtrain.loc[pathid,'t1_beac']).groupby('id_beac')['power_beac'].plot(ax=ax,title='power_beac',legend=False)
plt.show()

Run the GitHub scripts shared by the hosts on the training set @ https://github.com/location-competition/indoor-location-competition-20 Copyright (c) 2017-2020 XYZ10, Inc. https://dangwu.com/

In [None]:
import numpy as np
import scipy.signal as signal


def split_ts_seq(ts_seq, sep_ts):
    """

    :param ts_seq:
    :param sep_ts:
    :return:
    """
    tss = ts_seq[:, 0].astype(float)
    unique_sep_ts = np.unique(sep_ts)
    ts_seqs = []
    start_index = 0
    for i in range(0, unique_sep_ts.shape[0]):
        end_index = np.searchsorted(tss, unique_sep_ts[i], side='right')
        if start_index == end_index:
            continue
        ts_seqs.append(ts_seq[start_index:end_index, :].copy())
        start_index = end_index

    # tail data
    if start_index < ts_seq.shape[0]:
        ts_seqs.append(ts_seq[start_index:, :].copy())

    return ts_seqs


def correct_trajectory(original_xys, end_xy):
    """

    :param original_xys: numpy ndarray, shape(N, 2)
    :param end_xy: numpy ndarray, shape(1, 2)
    :return:
    """
    corrected_xys = np.zeros((0, 2))

    A = original_xys[0, :]
    B = end_xy
    Bp = original_xys[-1, :]

    angle_BAX = np.arctan2(B[1] - A[1], B[0] - A[0])
    angle_BpAX = np.arctan2(Bp[1] - A[1], Bp[0] - A[0])
    angle_BpAB = angle_BpAX - angle_BAX
    AB = np.sqrt(np.sum((B - A) ** 2))
    ABp = np.sqrt(np.sum((Bp - A) ** 2))

    corrected_xys = np.append(corrected_xys, [A], 0)
    for i in np.arange(1, np.size(original_xys, 0)):
        angle_CpAX = np.arctan2(original_xys[i, 1] - A[1], original_xys[i, 0] - A[0])

        angle_CAX = angle_CpAX - angle_BpAB

        ACp = np.sqrt(np.sum((original_xys[i, :] - A) ** 2))

        AC = ACp * AB / ABp

        delta_C = np.array([AC * np.cos(angle_CAX), AC * np.sin(angle_CAX)])

        C = delta_C + A

        corrected_xys = np.append(corrected_xys, [C], 0)

    return corrected_xys


def correct_positions(rel_positions, reference_positions):
    """

    :param rel_positions:
    :param reference_positions:
    :return:
    """
    rel_positions_list = split_ts_seq(rel_positions, reference_positions[:, 0])
    if len(rel_positions_list) != reference_positions.shape[0] - 1:
        # print(f'Rel positions list size: {len(rel_positions_list)}, ref positions size: {reference_positions.shape[0]}')
        del rel_positions_list[-1]
    assert len(rel_positions_list) == reference_positions.shape[0] - 1

    corrected_positions = np.zeros((0, 3))
    for i, rel_ps in enumerate(rel_positions_list):
        start_position = reference_positions[i]
        end_position = reference_positions[i + 1]
        abs_ps = np.zeros(rel_ps.shape)
        abs_ps[:, 0] = rel_ps[:, 0]
        # abs_ps[:, 1:3] = rel_ps[:, 1:3] + start_position[1:3]
        abs_ps[0, 1:3] = rel_ps[0, 1:3] + start_position[1:3]
        for j in range(1, rel_ps.shape[0]):
            abs_ps[j, 1:3] = abs_ps[j-1, 1:3] + rel_ps[j, 1:3]
        abs_ps = np.insert(abs_ps, 0, start_position, axis=0)
        corrected_xys = correct_trajectory(abs_ps[:, 1:3], end_position[1:3])
        corrected_ps = np.column_stack((abs_ps[:, 0], corrected_xys))
        if i == 0:
            corrected_positions = np.append(corrected_positions, corrected_ps, axis=0)
        else:
            corrected_positions = np.append(corrected_positions, corrected_ps[1:], axis=0)

    corrected_positions = np.array(corrected_positions)

    return corrected_positions


def init_parameters_filter(sample_freq, warmup_data, cut_off_freq=2):
    order = 4
    filter_b, filter_a = signal.butter(order, cut_off_freq / (sample_freq / 2), 'low', False)
    zf = signal.lfilter_zi(filter_b, filter_a)
    _, zf = signal.lfilter(filter_b, filter_a, warmup_data, zi=zf)
    _, filter_zf = signal.lfilter(filter_b, filter_a, warmup_data, zi=zf)

    return filter_b, filter_a, filter_zf


def get_rotation_matrix_from_vector(rotation_vector):
    q1 = rotation_vector[0]
    q2 = rotation_vector[1]
    q3 = rotation_vector[2]

    if rotation_vector.size >= 4:
        q0 = rotation_vector[3]
    else:
        q0 = 1 - q1*q1 - q2*q2 - q3*q3
        if q0 > 0:
            q0 = np.sqrt(q0)
        else:
            q0 = 0

    sq_q1 = 2 * q1 * q1
    sq_q2 = 2 * q2 * q2
    sq_q3 = 2 * q3 * q3
    q1_q2 = 2 * q1 * q2
    q3_q0 = 2 * q3 * q0
    q1_q3 = 2 * q1 * q3
    q2_q0 = 2 * q2 * q0
    q2_q3 = 2 * q2 * q3
    q1_q0 = 2 * q1 * q0

    R = np.zeros((9,))
    if R.size == 9:
        R[0] = 1 - sq_q2 - sq_q3
        R[1] = q1_q2 - q3_q0
        R[2] = q1_q3 + q2_q0

        R[3] = q1_q2 + q3_q0
        R[4] = 1 - sq_q1 - sq_q3
        R[5] = q2_q3 - q1_q0

        R[6] = q1_q3 - q2_q0
        R[7] = q2_q3 + q1_q0
        R[8] = 1 - sq_q1 - sq_q2

        R = np.reshape(R, (3, 3))
    elif R.size == 16:
        R[0] = 1 - sq_q2 - sq_q3
        R[1] = q1_q2 - q3_q0
        R[2] = q1_q3 + q2_q0
        R[3] = 0.0

        R[4] = q1_q2 + q3_q0
        R[5] = 1 - sq_q1 - sq_q3
        R[6] = q2_q3 - q1_q0
        R[7] = 0.0

        R[8] = q1_q3 - q2_q0
        R[9] = q2_q3 + q1_q0
        R[10] = 1 - sq_q1 - sq_q2
        R[11] = 0.0

        R[12] = R[13] = R[14] = 0.0
        R[15] = 1.0

        R = np.reshape(R, (4, 4))

    return R


def get_orientation(R):
    flat_R = R.flatten()
    values = np.zeros((3,))
    if np.size(flat_R) == 9:
        values[0] = np.arctan2(flat_R[1], flat_R[4])
        values[1] = np.arcsin(-flat_R[7])
        values[2] = np.arctan2(-flat_R[6], flat_R[8])
    else:
        values[0] = np.arctan2(flat_R[1], flat_R[5])
        values[1] = np.arcsin(-flat_R[9])
        values[2] = np.arctan2(-flat_R[8], flat_R[10])

    return values


def compute_steps(acce_datas):
    step_timestamps = np.array([])
    step_indexs = np.array([], dtype=int)
    step_acce_max_mins = np.zeros((0, 4))
    sample_freq = 50
    window_size = 22
    low_acce_mag = 0.6
    step_criterion = 1
    interval_threshold = 250

    acce_max = np.zeros((2,))
    acce_min = np.zeros((2,))
    acce_binarys = np.zeros((window_size,), dtype=int)
    acce_mag_pre = 0
    state_flag = 0

    warmup_data = np.ones((window_size,)) * 9.81
    filter_b, filter_a, filter_zf = init_parameters_filter(sample_freq, warmup_data)
    acce_mag_window = np.zeros((window_size, 1))

    # detect steps according to acceleration magnitudes
    for i in np.arange(0, np.size(acce_datas, 0)):
        acce_data = acce_datas[i, :]
        acce_mag = np.sqrt(np.sum(acce_data[1:] ** 2))

        acce_mag_filt, filter_zf = signal.lfilter(filter_b, filter_a, [acce_mag], zi=filter_zf)
        acce_mag_filt = acce_mag_filt[0]

        acce_mag_window = np.append(acce_mag_window, [acce_mag_filt])
        acce_mag_window = np.delete(acce_mag_window, 0)
        mean_gravity = np.mean(acce_mag_window)
        acce_std = np.std(acce_mag_window)
        mag_threshold = np.max([low_acce_mag, 0.4 * acce_std])

        # detect valid peak or valley of acceleration magnitudes
        acce_mag_filt_detrend = acce_mag_filt - mean_gravity
        if acce_mag_filt_detrend > np.max([acce_mag_pre, mag_threshold]):
            # peak
            acce_binarys = np.append(acce_binarys, [1])
            acce_binarys = np.delete(acce_binarys, 0)
        elif acce_mag_filt_detrend < np.min([acce_mag_pre, -mag_threshold]):
            # valley
            acce_binarys = np.append(acce_binarys, [-1])
            acce_binarys = np.delete(acce_binarys, 0)
        else:
            # between peak and valley
            acce_binarys = np.append(acce_binarys, [0])
            acce_binarys = np.delete(acce_binarys, 0)

        if (acce_binarys[-1] == 0) and (acce_binarys[-2] == 1):
            if state_flag == 0:
                acce_max[:] = acce_data[0], acce_mag_filt
                state_flag = 1
            elif (state_flag == 1) and ((acce_data[0] - acce_max[0]) <= interval_threshold) and (
                    acce_mag_filt > acce_max[1]):
                acce_max[:] = acce_data[0], acce_mag_filt
            elif (state_flag == 2) and ((acce_data[0] - acce_max[0]) > interval_threshold):
                acce_max[:] = acce_data[0], acce_mag_filt
                state_flag = 1

        # choose reasonable step criterion and check if there is a valid step
        # save step acceleration data: step_acce_max_mins = [timestamp, max, min, variance]
        step_flag = False
        if step_criterion == 2:
            if (acce_binarys[-1] == -1) and ((acce_binarys[-2] == 1) or (acce_binarys[-2] == 0)):
                step_flag = True
        elif step_criterion == 3:
            if (acce_binarys[-1] == -1) and (acce_binarys[-2] == 0) and (np.sum(acce_binarys[:-2]) > 1):
                step_flag = True
        else:
            if (acce_binarys[-1] == 0) and acce_binarys[-2] == -1:
                if (state_flag == 1) and ((acce_data[0] - acce_min[0]) > interval_threshold):
                    acce_min[:] = acce_data[0], acce_mag_filt
                    state_flag = 2
                    step_flag = True
                elif (state_flag == 2) and ((acce_data[0] - acce_min[0]) <= interval_threshold) and (
                        acce_mag_filt < acce_min[1]):
                    acce_min[:] = acce_data[0], acce_mag_filt
        if step_flag:
            step_timestamps = np.append(step_timestamps, acce_data[0])
            step_indexs = np.append(step_indexs, [i])
            step_acce_max_mins = np.append(step_acce_max_mins,
                                           [[acce_data[0], acce_max[1], acce_min[1], acce_std ** 2]], axis=0)
        acce_mag_pre = acce_mag_filt_detrend

    return step_timestamps, step_indexs, step_acce_max_mins


def compute_stride_length(step_acce_max_mins):
    K = 0.4
    K_max = 0.8
    K_min = 0.4
    para_a0 = 0.21468084
    para_a1 = 0.09154517
    para_a2 = 0.02301998

    stride_lengths = np.zeros((step_acce_max_mins.shape[0], 2))
    k_real = np.zeros((step_acce_max_mins.shape[0], 2))
    step_timeperiod = np.zeros((step_acce_max_mins.shape[0] - 1, ))
    stride_lengths[:, 0] = step_acce_max_mins[:, 0]
    window_size = 2
    step_timeperiod_temp = np.zeros((0, ))

    # calculate every step period - step_timeperiod unit: second
    for i in range(0, step_timeperiod.shape[0]):
        step_timeperiod_data = (step_acce_max_mins[i + 1, 0] - step_acce_max_mins[i, 0]) / 1000
        step_timeperiod_temp = np.append(step_timeperiod_temp, [step_timeperiod_data])
        if step_timeperiod_temp.shape[0] > window_size:
            step_timeperiod_temp = np.delete(step_timeperiod_temp, [0])
        step_timeperiod[i] = np.sum(step_timeperiod_temp) / step_timeperiod_temp.shape[0]

    # calculate parameters by step period and acceleration magnitude variance
    k_real[:, 0] = step_acce_max_mins[:, 0]
    k_real[0, 1] = K
    for i in range(0, step_timeperiod.shape[0]):
        k_real[i + 1, 1] = np.max([(para_a0 + para_a1 / step_timeperiod[i] + para_a2 * step_acce_max_mins[i, 3]), K_min])
        k_real[i + 1, 1] = np.min([k_real[i + 1, 1], K_max]) * (K / K_min)

    # calculate every stride length by parameters and max and min data of acceleration magnitude
    stride_lengths[:, 1] = np.max([(step_acce_max_mins[:, 1] - step_acce_max_mins[:, 2]),
                                   np.ones((step_acce_max_mins.shape[0], ))], axis=0)**(1 / 4) * k_real[:, 1]

    return stride_lengths


def compute_headings(ahrs_datas):
    headings = np.zeros((np.size(ahrs_datas, 0), 2))
    for i in np.arange(0, np.size(ahrs_datas, 0)):
        ahrs_data = ahrs_datas[i, :]
        rot_mat = get_rotation_matrix_from_vector(ahrs_data[1:])
        azimuth, pitch, roll = get_orientation(rot_mat)
        around_z = (-azimuth) % (2 * np.pi)
        headings[i, :] = ahrs_data[0], around_z
    return headings


def compute_step_heading(step_timestamps, headings):
    step_headings = np.zeros((len(step_timestamps), 2))
    step_timestamps_index = 0
    for i in range(0, len(headings)):
        if step_timestamps_index < len(step_timestamps):
            if headings[i, 0] == step_timestamps[step_timestamps_index]:
                step_headings[step_timestamps_index, :] = headings[i, :]
                step_timestamps_index += 1
        else:
            break
    assert step_timestamps_index == len(step_timestamps)

    return step_headings


def compute_rel_positions(stride_lengths, step_headings):
    rel_positions = np.zeros((stride_lengths.shape[0], 3))
    for i in range(0, stride_lengths.shape[0]):
        rel_positions[i, 0] = stride_lengths[i, 0]
        rel_positions[i, 1] = -stride_lengths[i, 1] * np.sin(step_headings[i, 1])
        rel_positions[i, 2] = stride_lengths[i, 1] * np.cos(step_headings[i, 1])

    return rel_positions


def compute_step_positions(acce_datas, ahrs_datas, posi_datas):
    step_timestamps, step_indexs, step_acce_max_mins = compute_steps(acce_datas)
    headings = compute_headings(ahrs_datas)
    stride_lengths = compute_stride_length(step_acce_max_mins)
    step_headings = compute_step_heading(step_timestamps, headings)
    rel_positions = compute_rel_positions(stride_lengths, step_headings)
    step_positions = correct_positions(rel_positions, posi_datas)

    return step_positions

def extract_magnetic_strength(mwi_datas):
    magnetic_strength = {}
    for position_key in mwi_datas:
        # print(f'Position: {position_key}')
        magnetic_data = mwi_datas[position_key]['magnetic']
        magnetic_s = np.mean(np.sqrt(np.sum(magnetic_data[:, 1:4] ** 2, axis=1)))
        magnetic_strength[position_key] = magnetic_s
    return magnetic_strength
def extract_wifi_rssi(mwi_datas):
    wifi_rssi = {}
    for position_key in mwi_datas:
        # print(f'Position: {position_key}')
        wifi_data = mwi_datas[position_key]['wifi']
        for wifi_d in wifi_data:
            bssid = wifi_d[2]
            rssi = int(wifi_d[3])
            if bssid in wifi_rssi:
                position_rssi = wifi_rssi[bssid]
                if position_key in position_rssi:
                    old_rssi = position_rssi[position_key][0]
                    old_count = position_rssi[position_key][1]
                    position_rssi[position_key][0] = (old_rssi * old_count + rssi) / (old_count + 1)
                    position_rssi[position_key][1] = old_count + 1
                else:
                    position_rssi[position_key] = np.array([rssi, 1])
            else:
                position_rssi = {}
                position_rssi[position_key] = np.array([rssi, 1])

            wifi_rssi[bssid] = position_rssi
    return wifi_rssi
def extract_ibeacon_rssi(mwi_datas):
    ibeacon_rssi = {}
    for position_key in mwi_datas:
        # print(f'Position: {position_key}')
        ibeacon_data = mwi_datas[position_key]['ibeacon']
        for ibeacon_d in ibeacon_data:
            ummid = ibeacon_d[1]
            rssi = int(ibeacon_d[2])
            if ummid in ibeacon_rssi:
                position_rssi = ibeacon_rssi[ummid]
                if position_key in position_rssi:
                    old_rssi = position_rssi[position_key][0]
                    old_count = position_rssi[position_key][1]
                    position_rssi[position_key][0] = (old_rssi * old_count + rssi) / (old_count + 1)
                    position_rssi[position_key][1] = old_count + 1
                else:
                    position_rssi[position_key] = np.array([rssi, 1])
            else:
                position_rssi = {}
                position_rssi[position_key] = np.array([rssi, 1])
            ibeacon_rssi[ummid] = position_rssi
    return ibeacon_rssi
def extract_wifi_count(mwi_datas):
    wifi_counts = {}
    for position_key in mwi_datas:
        # print(f'Position: {position_key}')
        wifi_data = mwi_datas[position_key]['wifi']
        count = np.unique(wifi_data[:, 2]).shape[0]
        wifi_counts[position_key] = count
    return wifi_counts

In [None]:
os.system('mkdir indoor_xy_floor')

# Column names by type:
colacce = ['xyz_time','x_acce','y_acce','z_acce']
colmagn = ['xyz_time','x_magn','y_magn','z_magn']
colahrs = ['xyz_time','x_ahrs','y_ahrs','z_ahrs']
colwifi = ['t1_wifi','ssid_wifi','bssid_wifi','rssid_wifi','t2_wifi']
colbeac = ['t1_beac','id_beac','rssi_beac']
colwayp = ['t_waypoint','x_waypoint','y_waypoint']

for building in buildings:
    train = pd.read_csv(f'train/{building}.txt',index_col=0,header=None,names=cols,dtype='object')
    os.remove(f'train/{building}.txt') # due to the disk space limit

    for floor in np.unique(train.floor):
        xtrain = train.loc[train.floor==floor]
        print(f'{building},{floor}',xtrain.shape)

        xtrain = xtrain.fillna('')
        xtrain[int8col] = xtrain[int8col].apply(lambda x: x.apply(lambda y: np.array(y.split(),dtype=np.int8)))
        xtrain[int16col] = xtrain[int16col].apply(lambda x: x.apply(lambda y: np.array(y.split(),dtype=np.int16)))
        xtrain[int32col] = xtrain[int32col].apply(lambda x: x.apply(lambda y: np.array(y.split(),dtype=np.int32)))
        xtrain[floatcol] = xtrain[floatcol].apply(lambda x: x.apply(lambda y: np.array(y.split(),dtype=np.float32)))
        xtrain[stringcol] = xtrain[stringcol].apply(lambda x: x.apply(lambda y: np.array(y.split())))

        mwi_datas = {}
        for path_id in xtrain.index:
            acce_datas = np.array(xtrain.loc[path_id,colacce].apply(lambda x: pd.Series(x)).T)
            magn_datas = np.array(xtrain.loc[path_id,colmagn].apply(lambda x: pd.Series(x)).T)
            ahrs_datas = np.array(xtrain.loc[path_id,colahrs].apply(lambda x: pd.Series(x)).T)
            wifi_datas = np.array(xtrain.loc[path_id,colwifi].apply(lambda x: pd.Series(x)).T)
            ibeacon_datas = np.array(xtrain.loc[path_id,colbeac].apply(lambda x: pd.Series(x)).T)
            posi_datas = np.array(xtrain.loc[path_id,colwayp].apply(lambda x: pd.Series(x)).T)
            step_positions = compute_step_positions(acce_datas, ahrs_datas, posi_datas)
            if wifi_datas.size != 0:
                sep_tss = np.unique(wifi_datas[:, 0].astype(float))
                wifi_datas_list = split_ts_seq(wifi_datas, sep_tss)
                for wifi_ds in wifi_datas_list:
                    diff = np.abs(step_positions[:, 0] - float(wifi_ds[0, 0]))
                    index = np.argmin(diff)
                    target_xy_key = tuple(step_positions[index, 1:3])
                    if target_xy_key in mwi_datas:
                        mwi_datas[target_xy_key]['wifi'] = np.append(
                            mwi_datas[target_xy_key]['wifi'], wifi_ds, axis=0)
                    else:
                        mwi_datas[target_xy_key] = {
                            'magnetic': np.zeros((0, 4)),'wifi': wifi_ds,'ibeacon': np.zeros((0, 3))}
            if ibeacon_datas.size != 0:
                sep_tss = np.unique(ibeacon_datas[:, 0].astype(float))
                ibeacon_datas_list = split_ts_seq(ibeacon_datas, sep_tss)
                for ibeacon_ds in ibeacon_datas_list:
                    diff = np.abs(step_positions[:, 0] - float(ibeacon_ds[0, 0]))
                    index = np.argmin(diff)
                    target_xy_key = tuple(step_positions[index, 1:3])
                    if target_xy_key in mwi_datas:
                        mwi_datas[target_xy_key]['ibeacon'] = np.append(
                            mwi_datas[target_xy_key]['ibeacon'], ibeacon_ds, axis=0)
                    else:
                        mwi_datas[target_xy_key] = {
                            'magnetic': np.zeros((0, 4)),'wifi': np.zeros((0, 5)),'ibeacon': ibeacon_ds}
            sep_tss = np.unique(magn_datas[:, 0].astype(float))
            magn_datas_list = split_ts_seq(magn_datas, sep_tss)
            for magn_ds in magn_datas_list:
                diff = np.abs(step_positions[:, 0] - float(magn_ds[0, 0]))
                index = np.argmin(diff)
                target_xy_key = tuple(step_positions[index, 1:3])
                if target_xy_key in mwi_datas:
                    mwi_datas[target_xy_key]['magnetic'] = np.append(
                        mwi_datas[target_xy_key]['magnetic'], magn_ds, axis=0)
                else:
                    mwi_datas[target_xy_key] = {
                        'magnetic': magn_ds,'wifi': np.zeros((0, 5)),'ibeacon': np.zeros((0, 3))}

        magnetic_strength = extract_magnetic_strength(mwi_datas)
        wifi_counts = extract_wifi_count(mwi_datas)
        wifi_rssi = extract_wifi_rssi(mwi_datas)
        ibeacon_rssi = extract_ibeacon_rssi(mwi_datas)
        print(f'This floor has {len(wifi_rssi.keys())} wifi aps')
        print(f'This floor has {len(ibeacon_rssi.keys())} ibeacons')

        x = [x for (x,y) in mwi_datas.keys()]
        y = [y for (x,y) in mwi_datas.keys()]
        xy = pd.DataFrame({'x':x,'y':y})
        xy['count'] = [wifi_counts[(x,y)] if wifi_counts[(x,y)]>0 else np.nan for (x,y) in zip(xy.x,xy.y)]
        xy['magn'] = [magnetic_strength[(x,y)] for (x,y) in zip(xy.x,xy.y)]
        for bssid in wifi_rssi.keys():
            xy[bssid] = [wifi_rssi[bssid][(x,y)][0] if (x,y) in wifi_rssi[bssid].keys() else np.nan for (x,y) in zip(xy.x,xy.y)]
        for beacid in ibeacon_rssi.keys():
            xy[beacid] = [ibeacon_rssi[beacid][(x,y)][0] if (x,y) in ibeacon_rssi[beacid].keys() else np.nan for (x,y) in zip(xy.x,xy.y)]
        xy.to_csv(f'/kaggle/working/indoor_xy_floor/{building}_{floor}.csv')


In [None]:
building = '5a0546857ecc773753327266'
floor = "0"
xy = pd.read_csv(f'indoor_xy_floor/{building}_{floor}.csv',index_col=0)
bssid = xy.columns[4]
beaid = xy.columns[-1]
xyw = xy.loc[~np.isnan(xy['count'])]
plt.figure(figsize=(20,20))
plt.subplot(221)
plt.scatter(xyw.x,xyw.y,c='black')
plt.scatter(xyw.x,xyw.y,c=xyw[bssid],cmap='viridis')
plt.title(f'wifi id {bssid}')
plt.subplot(222)
plt.scatter(xyw.x,xyw.y,c='black')
plt.scatter(xyw.x,xyw.y,c=xyw['count'],cmap='viridis')
plt.title(f'wifi count {bssid}')
plt.subplot(223)
plt.scatter(xy.x,xy.y,c='black')
plt.scatter(xy.x,xy.y,c=xy['magn'],cmap='viridis')
plt.title(f'magnetic strength')
plt.subplot(224)
plt.scatter(xy.x,xy.y,c='black')
plt.scatter(xy.x,xy.y,c=xy[beaid],cmap='viridis')
plt.title(f'beacon id {beaid}')
plt.show()

The output is stored at "indoor_xy_floor" @ https://www.kaggle.com/oxzplvifi/indoor-xy-floor Let's take a look at how the hosts' GitHub scripts created the data inside the "indoor_xy_floor" directory:

Step I. step detection and stride length:

In [None]:
# Step detection:
step_timestamps, step_indexs, step_acce_max_mins = compute_steps(acce_datas)
stride_lengths = compute_stride_length(step_acce_max_mins)
# Step detection is to detect at which timepoints the person walked:
plt.figure(figsize=(20,10))
plt.plot(acce_datas[:,0],acce_datas[:,3],label='az')
plt.plot(step_acce_max_mins[:,0],step_acce_max_mins[:,1],'*-',label='max')
plt.plot(step_acce_max_mins[:,0],step_acce_max_mins[:,2],'*-',label='min')
plt.plot(step_acce_max_mins[:,0],step_acce_max_mins[:,3],'*-',label='variance')
plt.legend()
plt.title('Step detection')
plt.show()
# Stride length is related to the distance traveled in each step:
plt.figure(figsize=(20,10))
plt.plot(stride_lengths[:,0],stride_lengths[:,1],'*-')
plt.title('Stride length')
plt.show()

Step II. Walking direction (angle around z):

In [None]:
headings = compute_headings(ahrs_datas)
step_headings = compute_step_heading(step_timestamps, headings)
plt.figure(figsize=(20,10))
plt.plot(ahrs_datas[:,0],ahrs_datas[:,1],label='x_ahrs')
plt.plot(ahrs_datas[:,0],ahrs_datas[:,2],label='y_ahrs')
plt.plot(ahrs_datas[:,0],ahrs_datas[:,3],label='z_ahrs')
plt.legend()
plt.show()
plt.figure(figsize=(20,10))
plt.plot(headings[:,0],headings[:,1],label='around_z')
plt.plot(step_headings[:,0],step_headings[:,1],'*-',label='steps around_z')
plt.legend()
plt.show()

Step III. Waypoint interpolation:

In [None]:
rel_positions = compute_rel_positions(stride_lengths, step_headings)
step_positions = correct_positions(rel_positions, posi_datas)
plt.figure(figsize=(20,10))
plt.plot(rel_positions[:,0],rel_positions[:,1],'*-',label='stride*-sin')
plt.plot(rel_positions[:,0],rel_positions[:,2],'*-',label='stride*cos')
plt.legend()
plt.show()

plt.figure(figsize=(20,10))
plt.plot(posi_datas[:,0],posi_datas[:,1],'*-',label='x waypoint')
plt.plot(step_positions[:,0],step_positions[:,1],'+-',label='x position')
plt.legend()
plt.show()

plt.figure(figsize=(20,10))
plt.plot(posi_datas[:,0],posi_datas[:,2],'*-',label='y waypoint')
plt.plot(step_positions[:,0],step_positions[:,2],'+-',label='y position')
plt.legend()
plt.show()

The training set thus consists of a 4D grid map of (X,Y,Floor,RSSI) values. On the other hand, the testing set is 2D consisting of (time,RSSI) values.

Thank you for reading! Let me know if you have any question or suggestion.