## Metadata

In [None]:
import warnings 
warnings.filterwarnings('ignore')

import os 
import time 
import math 
import pickle as pkl 
from datetime import datetime
import json
import pprint
import glob
import imageio
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 10000)
import seaborn as sns 
np.set_printoptions(suppress=True)
import matplotlib.pyplot as plt 
import plotly.graph_objs as go
from PIL import Image
from tqdm.notebook import tqdm

sample_building = "../input/indoor-location-navigation/metadata/5a0546857ecc773753327266/"

def plot_all_floors(path_to_building):
    plt.figure(figsize=(16, 10))
    floor_paths = glob.glob(os.path.join(path_to_building, "*/floor_image.png"))
    for ind, floor_path in enumerate(floor_paths):
        w = math.ceil(len(floor_paths) / 2)
        h = math.ceil(len(floor_paths) / w)
        plt.subplot(h, w, ind + 1)
        image = imageio.imread(floor_path)  
        plt.imshow(image)
        plt.axis("off")
        plt.title(floor_path.split("/")[-2], fontsize=16)
    plt.show()
    
def read_floor_info(path_to_building): 
    json_paths = glob.glob(os.path.join(path_to_building, "*/floor_info.json"))
    for ind, json_path in enumerate(json_paths): 
        with open(json_path, "r") as f: 
            info = json.load(f)
        print('for {} floor in building {}'.format(json_path.split('/')[-2], sample_building))
        pprint.pprint(info)


## meta data related

plot_all_floors(sample_building)
read_floor_info(sample_building)

## for train/test data

In [None]:
# copy from https://github.com/location-competition/indoor-location-competition-20/blob/master/io_f.py
from dataclasses import dataclass

@dataclass
class ReadData:
    acce: np.ndarray
    acce_uncali: np.ndarray
    gyro: np.ndarray
    gyro_uncali: np.ndarray
    magn: np.ndarray
    magn_uncali: np.ndarray
    ahrs: np.ndarray
    wifi: np.ndarray
    ibeacon: np.ndarray
    waypoint: np.ndarray


def read_data_file(data_filename):
    acce = []
    acce_uncali = []
    gyro = []
    gyro_uncali = []
    magn = []
    magn_uncali = []
    ahrs = []
    wifi = []
    ibeacon = []
    waypoint = []

    with open(data_filename, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    for line_data in lines:
        line_data = line_data.strip()
        if not line_data or line_data[0] == '#':
            continue

        line_data = line_data.split('\t')

        if line_data[1] == 'TYPE_WAYPOINT':
            waypoint.append([int(line_data[0]), float(line_data[2]), float(line_data[3])])
            continue
       
        if line_data[1] == 'TYPE_ACCELEROMETER':
            acce.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue
        
        if line_data[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED':
            acce_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue
        
        if line_data[1] == 'TYPE_GYROSCOPE':
            gyro.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_GYROSCOPE_UNCALIBRATED':
            gyro_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue
        
        if line_data[1] == 'TYPE_MAGNETIC_FIELD':
            magn.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED':
            magn_uncali.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_ROTATION_VECTOR':
            ahrs.append([int(line_data[0]), float(line_data[2]), float(line_data[3]), float(line_data[4])])
            continue

        if line_data[1] == 'TYPE_WIFI':
            sys_ts = line_data[0]
            ssid = line_data[2]
            bssid = line_data[3]
            rssi = line_data[4]
            lastseen_ts = line_data[6]
            wifi_data = [sys_ts, ssid, bssid, rssi, lastseen_ts]
            wifi.append(wifi_data)
            continue

        if line_data[1] == 'TYPE_BEACON':
            ts = line_data[0]
            uuid = line_data[2]
            major = line_data[3]
            minor = line_data[4]
            rssi = line_data[6]
            ibeacon_data = [ts, '_'.join([uuid, major, minor]), rssi]
            ibeacon.append(ibeacon_data)
            continue
        
    
    acce = np.array(acce)
    acce_uncali = np.array(acce_uncali)
    gyro = np.array(gyro)
    gyro_uncali = np.array(gyro_uncali)
    magn = np.array(magn)
    magn_uncali = np.array(magn_uncali)
    ahrs = np.array(ahrs)
    wifi = np.array(wifi)
    ibeacon = np.array(ibeacon)
    waypoint = np.array(waypoint)
    
    return ReadData(acce, acce_uncali, gyro, gyro_uncali, magn, magn_uncali, ahrs, wifi, ibeacon, waypoint)

In [None]:
def visualize_trajectory(trajectory, floor_plan_filename, width_meter, height_meter, title=None, mode='lines + markers + text', show=False):
    """
    Copied from from https://github.com/location-competition/indoor-location-competition-20/blob/master/visualize_f.py

    """
    fig = go.Figure()

    # add trajectory
    size_list = [6] * trajectory.shape[0]
    size_list[0] = 10
    size_list[-1] = 10

    color_list = ['rgba(4, 174, 4, 0.5)'] * trajectory.shape[0]
    color_list[0] = 'rgba(12, 5, 235, 1)'
    color_list[-1] = 'rgba(235, 5, 5, 1)'

    position_count = {}
    text_list = []
    for i in range(trajectory.shape[0]):
        if str(trajectory[i]) in position_count:
            position_count[str(trajectory[i])] += 1
        else:
            position_count[str(trajectory[i])] = 0
        text_list.append('        ' * position_count[str(trajectory[i])] + f'{i}')
    text_list[0] = 'Start 0'
    text_list[-1] = f'End {trajectory.shape[0] - 1}'

    fig.add_trace(
        go.Scattergl(
            x=trajectory[:, 0],
            y=trajectory[:, 1],
            mode=mode,
            marker=dict(size=size_list, color=color_list),
            line=dict(shape='linear', color='lightgrey', width=3, dash='dash'),
            text=text_list,
            textposition="top center",
            name='trajectory',
        ))

    # add floor plan
    floor_plan = Image.open(floor_plan_filename)
    fig.update_layout(images=[
        go.layout.Image(
            source=floor_plan,
            xref="x",
            yref="y",
            x=0,
            y=height_meter,
            sizex=width_meter,
            sizey=height_meter,
            sizing="contain",
            opacity=1,
            layer="below",
        )
    ])

    # configure
    fig.update_xaxes(autorange=False, range=[0, width_meter])
    fig.update_yaxes(autorange=False, range=[0, height_meter], scaleanchor="x", scaleratio=1)
    fig.update_layout(
        title=go.layout.Title(
            text=title or "No title.",
            xref="paper",
            x=0,
        ),
        autosize=True,
        width=800,
        height=  800 * height_meter / width_meter,
        template="plotly_white",
    )

    if show:
        fig.show()

    return fig

def visualize_train_trajectory(path):
    """
    Edited from 
    https://www.kaggle.com/ihelon/indoor-location-exploratory-data-analysis
    """
    _id, floor = path.split("/")[:2]
    
    train_floor_data = read_data_file(f"../input/indoor-location-navigation/train/{path}")
    with open(f"../input/indoor-location-navigation/metadata/{_id}/{floor}/floor_info.json") as f:
        train_floor_info = json.load(f)

    return visualize_trajectory(
        train_floor_data.waypoint[:, 1:3], 
        f"../input/indoor-location-navigation/metadata/{_id}/{floor}/floor_image.png",
        train_floor_info["map_info"]["width"], 
        train_floor_info["map_info"]["height"],
        f"Visualization of {path}"
    )

def visualize_approx_trajectory(path, points):
    """
    Edited from 
    https://www.kaggle.com/ihelon/indoor-location-exploratory-data-analysis
    """
    _id, floor = path.split("/")[:2]
    
    train_floor_data = read_data_file(f"../input/indoor-location-navigation/train/{path}")
    with open(f"../input/indoor-location-navigation/metadata/{_id}/{floor}/floor_info.json") as f:
        train_floor_info = json.load(f)

    return visualize_trajectory(
        points, 
        f"../input/indoor-location-navigation/metadata/{_id}/{floor}/floor_image.png",
        train_floor_info["map_info"]["width"], 
        train_floor_info["map_info"]["height"],
        f"Visualization of {path}"
    )

visualize_train_trajectory("5a0546857ecc773753327266/F2/5dccf516c04f060006e6e3c9.txt")

In [None]:
# cat /kaggle/input/indoor-location-navigation/train/5a0546857ecc773753327266/B1/5e15730aa280850006f3d005.txt | head -5
# cat /kaggle/input/indoor-location-navigation/train/5a0546857ecc773753327266/B1/5e15730aa280850006f3d005.txt | tail -5

sample_building_train = "../input/indoor-location-navigation/train/5a0546857ecc773753327266/F2/5dccf516c04f060006e6e3c9.txt"
sample_building_test = "/kaggle/input/indoor-location-navigation/test/046cfa46be49fc10834815c6.txt"
        
train_sample = read_data_file(sample_building_train)
test_sample = read_data_file(sample_building_test)

imu = np.concatenate((train_sample.acce, train_sample.acce_uncali[:, 1:], train_sample.gyro[:, 1:], 
                          train_sample.gyro_uncali[:, 1:], train_sample.magn[:, 1:], train_sample.magn_uncali[:, 1:], train_sample.ahrs[:, 1:]), axis=1)
imu_df = pd.DataFrame(imu)
imu_df.columns = ['timestamp', 'acce_x','acce_y', 'acce_z','acce_uncali_x','acce_uncali_y', 'acce_uncali_z',
              'gyro_x','gyro_y', 'gyro_z','gyro_uncali_x','gyro_uncali_y', 'gyro_uncali_z',
              'magn_x','magn_y', 'magn_z','magn_uncali_x','magn_uncali_y', 'magn_uncali_z',
              'ahrs_x','ahrs_y', 'ahrs_z']
display(imu_df.head().style.set_caption('imu'))

waypoint_df = pd.DataFrame(train_sample.waypoint)
waypoint_df.columns = ['timestamp', 'X', 'Y']
display(waypoint_df.style.set_caption('waypoint')); 

In [None]:
display(imu_df.iloc[:, 1:].describe())

# check the differece between calibrated and uncalibrated columns 
cali_columns = [col for col in imu_df.columns[1:] if 'uncali' not in col and 'ahrs' not in col]
uncali_columns = [col for col in imu_df.columns[1:] if 'uncali' in col and 'ahrs' not in col]
print('cali columns', cali_columns)
print('uncali_columns', uncali_columns)
a = imu_df[cali_columns]
b = imu_df[uncali_columns]
b.columns = a.columns 
diff = a - b 
diff.columns = [col + '_diff' for col in diff.columns]
display(diff.describe().style.set_caption('difference'))

In [None]:
start_time = 1573713056850
end_time = 1573713091483

def plot_imu_signals(col):
    fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(14, 9))

    sns.lineplot(x=imu_df.timestamp, y=imu_df[f"{col}_uncali_x"], ax=ax[0], label = 'uncali', color='orange')
    sns.lineplot(x=imu_df.timestamp, y=imu_df[f"{col}_uncali_y"], ax=ax[1], label = 'uncali', color='orange')
    sns.lineplot(x=imu_df.timestamp, y=imu_df[f"{col}_uncali_z"], ax=ax[2], label = 'uncali', color='orange')

    sns.lineplot(x=imu_df.timestamp, y=imu_df[f"{col}_x"], ax=ax[0], label='cali', color='cornflowerblue')
    sns.lineplot(x=imu_df.timestamp, y=imu_df[f"{col}_y"], ax=ax[1], label='cali', color='cornflowerblue')
    sns.lineplot(x=imu_df.timestamp, y=imu_df[f"{col}_z"], ax=ax[2], label='cali', color='cornflowerblue')
    
    ax[0].set_ylabel(f"{col}_x \n(calib./uncalib.)")
    ax[1].set_ylabel(f"{col}_y \n(calib./uncalib.)")
    ax[2].set_ylabel(f"{col}_z \n(calib./uncalib.)")

    for i in range(3):
        ax[i].set_xlim([start_time, end_time])
    plt.tight_layout()
    plt.show()
    
plot_imu_signals('acce')

In [None]:
plot_imu_signals('gyro')

In [None]:
plot_imu_signals('magn')

In [None]:
def calc_from_acce(timestamp, acce, p_0):
    df = pd.DataFrame({'timestamp' : timestamp, 'acceleration' : acce})
    df['timestamp_ms'] = df['timestamp'].apply(lambda x: datetime.fromtimestamp(x/1000.0))
    df['timedelta_ms'] = df['timestamp_ms'].diff()
    df['timedelta_s'] = df['timedelta_ms'].apply(lambda x: x.total_seconds()).fillna(0)
    df['velocity'] = (df['acceleration']*df['timedelta_s']).cumsum()
    df['position'] = p_0 + (df['velocity']*df['timedelta_s']).cumsum()

    return df[['timestamp', 'timestamp_ms', 'timedelta_s', 'position', 'velocity', 'acceleration']]

df_from_acce_x = calc_from_acce(imu_df.timestamp, imu_df.acce_x, waypoint_df.X.iloc[0])
df_from_acce_y = calc_from_acce(imu_df.timestamp, imu_df.acce_y, waypoint_df.Y.iloc[0])

# print(os.path.basename(sample_building_train))
# visualize_train_trajectory('/'.join(sample_building_train.split('/')[-3:]))
visualize_approx_trajectory('/'.join(sample_building_train.split('/')[-3:]), np.concatenate((df_from_acce_x.position.values.reshape(-1, 1), df_from_acce_y.position.values.reshape(-1, 1)), axis=1)[::50])

In [None]:
import os
import os.path as osp 

site_count = 0 
root = "/kaggle/input/indoor-location-navigation/train"
site_list, floor_list, traj_list = [], [], []
for site in os.listdir(root): 
    if not osp.isdir(osp.join(root, site)): 
        continue 
    site_count += 1
    traj_count = 0 
    num_floors = len(os.listdir(osp.join(root, site)))
    for floor in os.listdir(osp.join(root, site)): 
        traj_count += len(os.listdir(osp.join(root, site, floor)))
    traj_list.append(traj_count)
    floor_list.append(num_floors)
    site_list.append(site)
    # print('for site {}, there are {} floors and {} trajectories in total'.format(site, num_floors, traj_count))

print('{} sites in total'.format(site_count))

floor_traj = pd.DataFrame()
floor_traj['site'] = site_list
floor_traj['floor'] = floor_list 
floor_traj['traj'] = traj_list
floor_traj = floor_traj.set_index('site')

display(floor_traj.describe())
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(14, 8))
sns.distplot(floor_traj['floor'], ax=ax1)
sns.distplot(floor_traj['traj'], ax=ax2)

## first we build a mapping from sites to trajectories 

In [None]:
sample_submission = pd.read_csv("../input/indoor-location-navigation/sample_submission.csv", index_col=0)
print('{} rows in sample submission'.format(sample_submission.shape[0]))

site_path_timestamps = sample_submission.index.values

splits = [item.split('_') for item in site_path_timestamps]
sites = [split[0] for split in splits]
trajs = [split[1] for split in splits]
timestamps = [split[2] for split in splits]
print("{} unique sites with {} trajectories".format(len(np.unique(sites)), len(np.unique(trajs))))

traj2site = {}
for traj, site in zip(trajs, sites): 
    if traj not in traj2site: 
        traj2site[traj] = site
    else: 
        assert traj2site[traj] == site, "One trajectory can not correspond to two sites"
        
print('mapping from trajectories to sites built, with {} items'.format(len(traj2site.keys())))

# only 24 sites in the training site are involved 
usites = np.unique(sites)
train_sites = floor_traj.index.values.tolist()
indices = []

for usite in usites: 
    assert usite in train_sites, "Site {} can not be found in training set".format(usite)
    index = train_sites.index(usite)
    indices.append(index)
    
masked_floor_traj = floor_traj.iloc[indices, :]
display(masked_floor_traj.describe().style.set_caption('in consideration'))
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(14, 8))
sns.distplot(masked_floor_traj['floor'], ax=ax1)
sns.distplot(masked_floor_traj['traj'], ax=ax2); 

## obtain the floor names(for later one-hot embedding)

In [None]:
import copy 

def build_floorname2floorindex(floornames):
    new_floornames = []
    for floorname in floornames: 
        if floorname[0] <= 'Z' and floorname[0] >= 'A': 
            new_floornames.append(floorname)
        else: 
            new_floornames.append(floorname[::-1])
    f_floornames = [floorname for floorname in new_floornames if 'F' in floorname]
    b_floornames = [floorname for floorname in new_floornames if 'B' in floorname]
    min_index = 0 - len(b_floornames)
    
    new_floornames = (sorted(b_floornames, reverse=True) + sorted(f_floornames)) if min_index < 0 else sorted(f_floornames)
    print(new_floornames)
    mapping = {name: i for i, name in enumerate(new_floornames)}
    out_mapping = copy.deepcopy(mapping)
    for k, v in mapping.items(): 
        out_mapping[k[::-1]] = v
    print(out_mapping)
    return out_mapping, min_index

In [None]:
np.random.seed(2021)
import pickle as pkl 
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier, RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import mean_squared_error

downsample_factor = 5
site_loc_models = {}
site_floor_models = {}
site_shifts = {}

test_root = "/kaggle/input/indoor-location-navigation/test/"

sample_submission = pd.read_csv("/kaggle/input/indoor-location-navigation/sample_submission.csv", index_col=0)

ids = sample_submission.index.values.tolist()
sample_sites = [_id.split('_')[0] for _id in ids]
sample_trajs = [_id.split('_')[1] for _id in ids]
sample_timestamps = [_id.split('_')[2] for _id in ids]


def estimate_loc_every_timestamp(timestamps, waypoint_timestamps, waypoints): 
    estimated_locs = []
    start, end = waypoint_timestamps[0], waypoint_timestamps[-1]
    interval = end - start 
    dist = np.linalg.norm(waypoints[0] - waypoints[-1])
    # m / ms 
    vel = dist / interval 
    # actually we can assume that the surveyor walks with a constant speed 
    for timestamp in timestamps: 
        if timestamp <= start: 
            loc = waypoints[0]
        elif timestamp >= end: 
            loc = waypoints[-1]
        else: 
            e = None
            for end_index in range(len(waypoint_timestamps)): 
                if waypoint_timestamps[end_index] > timestamp: 
                    e = end_index
                    break 
            s = e - 1 
            assert s >= 0 
            loc = waypoints[s] + (timestamp - waypoint_timestamps[s]) * (waypoints[e] - waypoints[s]) / (waypoint_timestamps[e] - waypoint_timestamps[s])
        estimated_locs.append(loc)
    
    estimated_locs = np.stack(estimated_locs, axis=0)
    return estimated_locs
    
resume_id = 0 
if osp.exists("checkpoint.pkl"): 
    with open("checkpoint.pkl", "rb") as f: 
        checkpoint = pkl.load(f)
    resume_id = checkpoint['resume_id']
    site_loc_models = checkpoint['loc_model']
    site_floor_models = checkpoint['cls_model']
    site_shifts = checkpoint['shifts']
    print("load checkpoint with resume id {}".format(resume_id))
    
for site_id, usite in enumerate(usites): 
    # if site_id < resume_id: 
    #     continue 
    row_indices = np.nonzero(np.array(sample_sites) == usite)[0]
    # print(row_indices)
    print('--- * processing the {} site {} starts * ---'.format(site_id, usite))
    # train a model for a site
    # we have to first generaete the training data 
    usite_root = osp.join(root, usite)
    num_floors = masked_floor_traj[masked_floor_traj.index == usite]['floor'].iloc[0]
    X_train, Y_train_cls, Y_train_reg = [], [], []
    floorname2floorindex, shift = build_floorname2floorindex(os.listdir(usite_root))
    for floor_name in os.listdir(usite_root): 
        floor_label = floorname2floorindex[floor_name]
        skip_count = 0 
        for traj in tqdm(os.listdir(osp.join(usite_root, floor_name))): 
            traj_path = osp.join(usite_root, floor_name, traj)
            data = read_data_file(traj_path)
            # post-process data into imu & waypoint 
            if data.acce.shape != data.acce_uncali.shape: 
                # print('skip trajectory {}'.format(traj))
                skip_count += 1
                continue
            # print(downsample_factor)
            imu = np.concatenate((data.acce, data.acce_uncali[:, 1:], data.gyro[:, 1:], 
                          data.gyro_uncali[:, 1:], data.magn[:, 1:], data.magn_uncali[:, 1:], data.ahrs[:, 1:]), axis=1)[::downsample_factor]
            imu_df = pd.DataFrame(imu)
            imu_df.columns = ['timestamp', 'acce_x','acce_y', 'acce_z','acce_uncali_x','acce_uncali_y', 'acce_uncali_z',
                          'gyro_x','gyro_y', 'gyro_z','gyro_uncali_x','gyro_uncali_y', 'gyro_uncali_z',
                          'magn_x','magn_y', 'magn_z','magn_uncali_x','magn_uncali_y', 'magn_uncali_z',
                          'ahrs_x','ahrs_y', 'ahrs_z']
            waypoint_df = pd.DataFrame(data.waypoint)
            waypoint_df.columns = ['timestamp', 'waypoint_x', 'waypoint_y']
            waypoints = np.stack((waypoint_df.waypoint_x, waypoint_df.waypoint_y), axis=1)
            estimated_locs = estimate_loc_every_timestamp(imu_df.timestamp.values.tolist(), waypoint_df.timestamp.values.tolist(), waypoints)
            # print(estimated_locs)
            imu_df['loc_x'] = estimated_locs[:, 0]
            imu_df['loc_y'] = estimated_locs[:, 1]
            train_columns = [col for col in imu_df.columns if 'magn' in col]
            traj_X_train = imu_df[train_columns].values
            traj_Y_train_cls = np.zeros((traj_X_train.shape[0], num_floors))
            traj_Y_train_cls[:, floor_label] = 1 
            traj_Y_train_reg = imu_df[['loc_x', 'loc_y']].values
            # print(traj_X_train.shape, traj_Y_train_cls.shape, traj_Y_train_reg.shape)
            X_train.append(traj_X_train); Y_train_cls.append(traj_Y_train_cls); Y_train_reg.append(traj_Y_train_reg)
            # visualize_approx_trajectory(osp.join(usite, floor_name, traj), estimated_locs)
    X_train = np.concatenate(X_train, axis=0); Y_train_cls = np.concatenate(Y_train_cls, axis=0); Y_train_reg = np.concatenate(Y_train_reg, axis=0)
    shuffle_indices = np.arange(len(X_train))
    np.random.shuffle(shuffle_indices)
    X_train = X_train[shuffle_indices]; Y_train_cls = Y_train_cls[shuffle_indices]; Y_train_reg = Y_train_reg[shuffle_indices]
    print(X_train.shape, Y_train_cls.shape, Y_train_reg.shape)
    
    start = time.time()
    cls_model = RandomForestClassifier()
    reg_model = RandomForestRegressor()
    val_ratio = 0.25 
    val_num = int(len(X_train) * val_ratio)
    x_train, y_train_cls, y_train_reg = X_train[val_num:], Y_train_cls[val_num:], Y_train_reg[val_num:]
    x_val, y_val_cls, y_val_reg = X_train[:val_num], Y_train_cls[:val_num], Y_train_reg[:val_num]
    cls_model.fit(x_train, y_train_cls)
    end = time.time()
    print('training on classifier finished in {}s'.format(end - start))
    cls_score = cls_model.score(x_val, y_val_cls)
    print('score on the validation set for classification is {:.4f}'.format(cls_score))
    start = time.time()
    reg_model.fit(x_train, y_train_reg)
    reg_score = reg_model.score(x_val, y_val_reg)
    y_val_reg_pred = reg_model.predict(x_val)
    rmse = mean_squared_error(y_val_reg_pred, y_val_reg, squared=False)
    end = time.time()
    print('training on regressor finished in {}s'.format(end - start))
    print('score/rmse on the validation set for regression is {:.4f}({:.4F})'.format(reg_score, rmse))
    # site_floor_models[usite] = cls_model
    # site_loc_models[usite] = reg_model
    # site_shifts[usite] = shift 
    # with open("checkpoint.pkl".format(site_id), "wb") as f: 
    #     pkl.dump({"resume_id": site_id + 1, "cls_model": site_floor_models, "loc_model": site_loc_models, "shifts": site_shifts}, f)
    print('--- * processing site {} ends * ---'.format(usite))
    for row_index in row_indices: 
        traj = sample_trajs[row_index]
        test_data = read_data_file(osp.join(test_root, traj + ".txt"))
        timestamp = sample_timestamps[row_index]
        ref_timestamps = test_data.acce[:, 0]
        # find nearest timestamp
        index = np.abs(ref_timestamps - int(timestamp)).argsort()[0]
        # we have already obtained index 
        imu = np.concatenate((test_data.acce, test_data.acce_uncali[:, 1:], test_data.gyro[:, 1:], 
                          test_data.gyro_uncali[:, 1:], test_data.magn[:, 1:], test_data.magn_uncali[:, 1:], test_data.ahrs[:, 1:]), axis=1)
        imu_df = pd.DataFrame(imu)
        imu_df.columns = ['timestamp', 'acce_x','acce_y', 'acce_z','acce_uncali_x','acce_uncali_y', 'acce_uncali_z',
                      'gyro_x','gyro_y', 'gyro_z','gyro_uncali_x','gyro_uncali_y', 'gyro_uncali_z',
                      'magn_x','magn_y', 'magn_z','magn_uncali_x','magn_uncali_y', 'magn_uncali_z',
                      'ahrs_x','ahrs_y', 'ahrs_z']
        test_columns = [col for col in imu_df.columns if 'mag' in col]
        inputs = imu_df.iloc[index][test_columns].values.reshape(1, -1)
        loc = reg_model.predict(inputs).reshape(-1)
        floor_pred = cls_model.predict(inputs).argmax() + shift

        sample_submission.iloc[row_index, 0] = int(floor_pred) 
        sample_submission.iloc[row_index, 1] = loc[0]
        sample_submission.iloc[row_index, 2] = loc[1]
    del X_train, Y_train_cls, Y_train_reg
    del cls_model, reg_model

In [None]:
display(sample_submission.style.set_caption('final submission'))
sample_submission.to_csv("submission_v1.csv") 

In [None]:
# test_root = "/kaggle/input/indoor-location-navigation/test/"

# sample_submission = pd.read_csv("/kaggle/input/indoor-location-navigation/sample_submission.csv", index_col=0)
# print(sample_submission.columns)
# floor_predictions = []
# xs_predictions = []
# ys_predictions = []

# ids = sample_submission.index.values.tolist()
# sites = [_id.split('_')[0] for _id in ids]
# trajs = [_id.split('_')[1] for _id in ids]
# timestamps = [_id.split('_')[2] for _id in ids]

# for i, (site, traj, timestamp) in enumerate(zip(sites, trajs, timestamps)): 
#     loc_model = site_loc_models[site]
#     cls_model = site_floor_models[site]
#     shift = site_shifts[site]
#     # read trajectory 
#     test_data = read_data_file(osp.join(test_root, traj + ".txt"))
#     ref_timestamps = test_data.timestamp.values
#     # find nearest timestamp 
#     index = np.abs(ref_timestamps - timestamp).argsort()[0]
#     # we have already obtained index 
#     test_columns = [col for col in test_data.columns if 'mag' in col]
#     inputs = test_data.iloc[index][test_columns].values.reshape(1, -1)
#     loc = loc_model.predict(inputs).reshape(-1)
#     floor = cls_model.predict(inputs) + shift 
#     sample_submission.iloc[i].floor = floor 
#     sample_submission.iloc[i].x = loc[0]
#     sample_submission.iloc[i].y = loc[1]
    
# display(sample_submission.style.set_caption('final submission'))
# sample_submission.to_csv("submission_v1.csv") 