Main imports

In [None]:
import os
import gc
import json
import numpy as np
import pandas as pd
from PIL import Image
import geoplot
import geopandas as gpd
from tqdm import tqdm
from scipy.fft import fft, ifft
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
pd.options.display.max_columns = None

In [None]:
!pip install detecta

Firstly, define what type of data we have. Take first row `5a0546857ecc773753327266` as example:

In [None]:
exp_ls = '../input/indoor-location-navigation/metadata/5a0546857ecc773753327266'
os.listdir(exp_ls)

In [None]:
os.listdir(exp_ls + '/F1')

In [None]:
B1_png = exp_ls + '/F2' + '/floor_image.png'
img = mpimg.imread(B1_png)
plt.figure(figsize=(20, 10))
plt.title('B1')
plt.imshow(img);

In [None]:
with open(exp_ls + '/F1' + '/floor_info.json') as f:
    data_hw = json.load(f)
    print(data_hw)
    
df_gpd = gpd.read_file(exp_ls + '/F1' + '/geojson_map.json')
display(df_gpd.head())

In [None]:
# there are some details of big .png picture
# geoplot.polyplot(df_gpd, figsize=(8, 4));
df_gpd['geometry'][1]

Now see train_file consists:

In [None]:
exp_train = '../input/indoor-location-navigation/train/5a0546857ecc773753327266'
len(os.listdir(exp_train + '/B1'))

In [None]:
full_lines = []
for txt_files in tqdm(os.listdir(exp_train + '/B1')):
    with open (exp_train + '/B1/' + txt_files) as f:
        lines = f.readlines()
    lines = [i.rstrip('\n').split('\t') for i in lines]
    full_lines += lines

In [None]:
len(full_lines)

In [None]:
full_lines[-80:]

In [None]:
# wifi = []
# ac = []
# for i in tqdm(full_lines):
#     if i[1] == 'TYPE_WIFI':
#         wifi.append(i[6])
#     if i[1]=='TYPE_ACCELEROMETER':
#         ac.append(i[0])

In [None]:
# len(full_lines)

In [None]:
# print(min(ac), max(ac))
# print(min(wifi), max(wifi))

In [None]:
coords = []
for line in full_lines:
    if line[1] == 'TYPE_WAYPOINT':
        coords.append(line)

In [None]:
coords = pd.DataFrame(coords)
coords.columns = ['ts', 'type', 'x', 'y']
coords['ts'] = pd.to_datetime(coords['ts'], unit='ms') #timestamp in ms format
coords['diff_ts'] = coords['ts'].diff()
coords['x'] = coords['x'].astype('float64')
coords['y'] = coords['y'].astype('float64')

Visualize path:

In [None]:
B1_png = exp_ls + '/F2' + '/floor_image.png'
img = mpimg.imread(B1_png)
plt.figure(figsize=(30, 20))
n = 11
plt.plot(coords.head(n)['x'], coords.head(n)['y'])
plt.scatter(coords.head(n)['x'], coords.head(n)['y'], c='b')

    
plt.title('B1')
plt.imshow(img);

In [None]:
coords.head()

In [None]:
coords = []
coord_line = ''
acc_n, mag_n, gyro_n, rot_n = 0, 0, 0, 0
acc1, acc2, acc3 = [], [], []
for i, line in tqdm(enumerate(full_lines)):
    if line[1] == 'TYPE_WAYPOINT':
        if coord_line:
            if acc1:
                features = [max(acc1), min(acc1)]
            else:
                features = [0, 0]
            coords.append(coord_line + [line[0], line[2], line[3]] + [acc_n, mag_n, gyro_n, rot_n, i] + features)
        coord_line = line
        acc_n, mag_n, gyro_n, rot_n = 0, 0, 0, 0
        acc1c, acc2c, acc3c = acc1.copy(), acc2.copy(), acc3.copy()
        acc1, acc2, acc3 = [], [], []
    elif line[1] == 'TYPE_ACCELEROMETER':
        acc_n +=1
        acc1.append(float(line[2])), acc2.append(float(line[3])), acc3.append(float(line[4]))
    elif line[1] == 'TYPE_MAGNETIC_FIELD':
        mag_n +=1
    elif line[1] == 'TYPE_GYROSCOPE':
        gyro_n +=1
    elif line[1] == 'TYPE_ROTATION_VECTOR':
        rot_n +=1

In [None]:
coords = pd.DataFrame(coords)
coords.columns = ['ts_start', 'type_start', 'x_start', 'y_start',
                  'ts_end', 'x_end', 'y_end', 
                  'acc_n', 'mag_n', 'gyro_n', 'rot_n', 'i', 'acc1_max', 'acc1_min']
coords['ts_start'] = pd.to_datetime(coords['ts_start'], unit='ms') #timestamp in ms format
coords['ts_end'] = pd.to_datetime(coords['ts_end'], unit='ms') #timestamp in ms format

# coords['diff_ts'] = coords['ts_start'].diff()
coords['x_start'] = coords['x_start'].astype('float64')
coords['x_end'] = coords['x_end'].astype('float64')
coords['y_start'] = coords['y_start'].astype('float64')
coords['y_end'] = coords['y_end'].astype('float64')

coords['between_ts'] = coords['ts_end'] - coords['ts_start']

In [None]:
coords.tail()

Example of data:

In [None]:
coords[['x_start', 'y_start', 'x_end', 'y_end', 'acc_n']].iloc[-1]

In [None]:
B1_png = exp_ls + '/F2' + '/floor_image.png'
img = mpimg.imread(B1_png)
plt.figure(figsize=(20, 10))
cor_loc = coords.iloc[-1]
plt.plot([cor_loc['x_start'], cor_loc['x_end']], [cor_loc['y_start'], cor_loc['y_end']])
plt.scatter([cor_loc['x_start'], cor_loc['x_end']], [cor_loc['y_start'], cor_loc['y_end']], c='b')

    
plt.title('B1')
plt.imshow(img[:300,:300]);

In [None]:
plt.figure(figsize=(12,5))
plt.plot(acc1c, label='acc1');
plt.plot(acc2c, label='acc2');
plt.plot(acc3c, label='acc3');
plt.legend();

In [None]:
from detecta import detect_peaks
# peaks counts
detect_peaks(acc1c, show=True, mpd=30, valley=True)
detect_peaks(acc1c, show=True, mpd=30, valley=False)

In [None]:
# how much rows without data
coords[coords.acc_n==0].shape

## Look at raw accelerometers data:

In [None]:
coords.tail(4)

In [None]:
# B1_png = exp_ls + '/F2' + '/floor_image.png'
# img = mpimg.imread(B1_png)
# plt.figure(figsize=(30, 20))
# n = 5
# plt.plot(coords.head(n)['x'], coords.head(n)['y'])
# plt.scatter(coords.head(n)['x'], coords.head(n)['y'], c='b')    
# plt.title('B1')
# plt.imshow(img);

## Make features from train dataset

In [None]:
def get_feats(*acc, flag=3):
    if flag==3:
        if acc[0]:
            acc1, acc2, acc3 = acc[0], acc[1], acc[2]
            acc1_f = [
                      max(acc1), min(acc1), np.mean(acc1), np.std(acc1), np.median(acc1),
                      np.quantile(acc1, .1), np.quantile(acc1, .15), np.quantile(acc1, .20),
                      np.quantile(acc1, .25), np.quantile(acc1, .3), np.quantile(acc1, .4),
                      np.quantile(acc1, .6), np.quantile(acc1, .65), np.quantile(acc1, .7), 
                      np.quantile(acc1, .75), np.quantile(acc1, .8), np.quantile(acc1, .9)
                     ]
            acc2_f = [
                      max(acc2), min(acc2), np.mean(acc2), np.std(acc2), np.median(acc2),
                      np.quantile(acc2, .1), np.quantile(acc2, .15), np.quantile(acc2, .20),
                      np.quantile(acc2, .25), np.quantile(acc2, .3), np.quantile(acc2, .4),
                      np.quantile(acc2, .6), np.quantile(acc2, .65), np.quantile(acc2, .7), 
                      np.quantile(acc2, .75), np.quantile(acc2, .8), np.quantile(acc2, .9)
                     ]
            acc3_f = [
                      max(acc3), min(acc3), np.mean(acc3), np.std(acc3), np.median(acc3),
                      np.quantile(acc3, .1), np.quantile(acc3, .15), np.quantile(acc3, .20),
                      np.quantile(acc3, .25), np.quantile(acc3, .3), np.quantile(acc3, .4),
                      np.quantile(acc3, .6), np.quantile(acc3, .65), np.quantile(acc3, .7), 
                      np.quantile(acc3, .75), np.quantile(acc3, .8), np.quantile(acc3, .9)
                     ]
            acc_lens = [len(acc1)]
            acc_peaks = []
            for mp in [30, 40]:
                for df in [acc1, acc2, acc3]:
    #                 for vall in [True, False]:
                        acc_peaks.append(
                            detect_peaks(df, mpd=mp, valley=True).shape[0]
                        )

            return acc1_f + acc2_f + acc3_f + acc_lens + acc_peaks
        else:
            return [0]*17*3
    else:
        acc1 = acc[0]
        if acc1:
            acc1_f = [
                      max(acc1), min(acc1), np.mean(acc1), np.std(acc1), np.median(acc1),
                      np.quantile(acc1, .1), np.quantile(acc1, .15), np.quantile(acc1, .20),
                      np.quantile(acc1, .25), np.quantile(acc1, .3), np.quantile(acc1, .4),
                      np.quantile(acc1, .6), np.quantile(acc1, .65), np.quantile(acc1, .7), 
                      np.quantile(acc1, .75), np.quantile(acc1, .8), np.quantile(acc1, .9)
                     ]
            acc_peaks = []
            for mp in [30, 40]:
                for df in [acc1]:
    #                 for vall in [True, False]:
                        acc_peaks.append(
                            detect_peaks(df, mpd=mp, valley=True).shape[0]
                        )
            return acc1_f + acc_peaks
        else:
            return [0]*17
    
# get_feats(acc1c, acc2c, acc3c)

In [None]:
# peaks counts
s1 = detect_peaks(acc1c, show=True, mpd=50, valley=True)
s2 = detect_peaks(acc1c, show=True, mpd=50, valley=False)
print(s1.shape[0], s2.shape[0])

In [None]:
# coords = []
# coord_line = ''
# acc_n, mag_n, gyro_n, rot_n = 0, 0, 0, 0
# acc1, acc2, acc3 = [], [], []
# for i, line in tqdm(enumerate(full_lines)):
#     if line[1] == 'TYPE_WAYPOINT':
#         if coord_line:
#             if acc1:
#                 features = [max(acc1), min(acc1)]
#             else:
#                 features = [0, 0]
#             coords.append(coord_line + [line[0], line[2], line[3]] + [acc_n, mag_n, gyro_n, rot_n, i] + features)
#         coord_line = line
#         acc_n, mag_n, gyro_n, rot_n = 0, 0, 0, 0
#         acc1c, acc2c, acc3c = acc1.copy(), acc2.copy(), acc3.copy()
#         acc1, acc2, acc3 = [], [], []
#     elif line[1] == 'TYPE_ACCELEROMETER':
#         acc_n +=1
#         acc1.append(float(line[2])), acc2.append(float(line[3])), acc3.append(float(line[4]))
#     elif line[1] == 'TYPE_MAGNETIC_FIELD':
#         mag_n +=1
#     elif line[1] == 'TYPE_GYROSCOPE':
#         gyro_n +=1
#     elif line[1] == 'TYPE_ROTATION_VECTOR':
#         rot_n +=1

In [None]:
coords = []
coords_wifi = []
full_feats_df = []
train_path = '../input/indoor-location-navigation/train'

# all paths = 204
for path in tqdm(os.listdir(train_path)[:]):
#     full_lines = []
    # get all raw lines
    os_floors = os.listdir(train_path + '/' + path)
    for floor in os_floors:
        for trajs in os.listdir(train_path + '/' + path + '/' + floor):
            full_lines = []
            xy_timestamp = []
            with open (train_path + '/' + path + '/' + floor + '/' + trajs) as f:
                lines = f.readlines()
            lines = [i.rstrip('\n').split('\t') for i in lines]
            full_lines += lines
            # collect all features together: 
            coord_line = ''
            acc_n, mag_n, gyro_n, rot_n = 0, 0, 0, 0
#             acc_unn, mag_unn, gyro_unn = 0, 0, 0
            acc1, acc2, acc3 = [], [], []
            # collect features
            for i, line in enumerate(full_lines):
                if (line[1] == 'TYPE_WAYPOINT'):
                    # add waypoint time for wifi-features
                    xy_timestamp.append(line[0])
                    if coord_line:
                        acc_feats = get_feats(acc1, acc2, acc3)
                        mag_feats = get_feats(mag1, mag2, mag3)
                        gyro_feats = get_feats(gyro1, gyro2, gyro3)
                        rot_feats = get_feats(rot1, rot2, rot3)
                        
#                         acc_feats_un = get_feats(acc1un, acc2un, acc3un)
#                         mag_feats_un = get_feats(mag1un, mag2un, mag3un)
#                         gyro_feats_un = get_feats(gyro1un, gyro2un, gyro3un)
                        features = acc_feats + mag_feats + gyro_feats + rot_feats
#                              acc_feats_un + mag_feats_un + gyro_feats_un
    
                        coords.append(coord_line + [line[0], line[2], line[3], floor] + features)
                    coord_line = line
                    acc_n = 0
                    # acc1c, acc2c, acc3c = acc1.copy(), acc2.copy(), acc3.copy()
                    acc1, acc2, acc3, mag1, mag2, mag3 = [], [], [], [], [], []
                    gyro1, gyro2, gyro3, rot1, rot2, rot3 = [], [], [], [], [], []
                    
                    acc1un, acc2un, acc3un, mag1un, mag2un, mag3un = [], [], [], [], [], []
                    gyro1un, gyro2un, gyro3un = [], [], []
                elif line[1] == 'TYPE_ACCELEROMETER':
                    acc_n +=1
                    acc1.append(float(line[2])), acc2.append(float(line[3])), acc3.append(float(line[4]))
                elif line[1] == 'TYPE_MAGNETIC_FIELD':
                    mag_n +=1
                    mag1.append(float(line[2])), mag2.append(float(line[3])), mag3.append(float(line[4]))
                elif line[1] == 'TYPE_GYROSCOPE':
                    gyro_n +=1
                    gyro1.append(float(line[2])), gyro2.append(float(line[3])), gyro3.append(float(line[4]))
                elif line[1] == 'TYPE_ROTATION_VECTOR':
                    rot_n +=1
                    try:
                        rot1.append(float(line[2])), rot2.append(float(line[3])), rot3.append(float(line[4]))
                    except IndexError:
                        rot1.append(0), rot2.append(0), rot3.append(0)
            
            xy_ts = 0
            coord_line_wifi1, coord_line_wifi2 = [], []
            for i, line in enumerate(full_lines):
                if (line[1] == 'TYPE_WIFI'):
                    if line[0] < xy_timestamp[xy_ts]:
                        coord_line_wifi1.append(float(line[4]))
                        coord_line_wifi2.append(float(line[5]))
                    else:
                        feat_wifi_1 = get_feats(coord_line_wifi1, flag=1)
                        feat_wifi_2 = get_feats(coord_line_wifi2, flag=1)
                        coords_wifi.append([xy_timestamp[xy_ts]] + feat_wifi_1 + feat_wifi_2)
                        xy_ts += 1
                        if xy_ts >= len(xy_timestamp):
                            break

In [None]:
base_wifi = ['ts_start']
nfeats_wf = [str(i)*3 for i in range(len(features), len(features) + len(feat_wifi_1 + feat_wifi_2))]
coords_wifi = pd.DataFrame(coords_wifi)
coords_wifi.columns = base_wifi + nfeats_wf

base_targ = ['ts_start', 'type_start', 'x_start', 'y_start', 'ts_end', 'x_end', 'y_end', 'floor']
nfeats = [str(i)*3 for i in range(len(features))]

coords = pd.DataFrame(coords)
coords.columns = base_targ + nfeats
# merge all features
coords = coords.merge(coords_wifi, on='ts_start', how='inner')

In [None]:
coords_wifi.shape

In [None]:
print(coords_wifi.shape)
coords_wifi.head()

In [None]:
print(coords.shape)
coords.head()

In [None]:
def floor_code(x):
    res = -100
    if (x=='F2' or x=='2F'):
        res = 1
    elif (x=='F1' or x=='1F'):
        res = 0
    elif (x=='B1' or x=='1B'):
        res = -1
    elif (x=='B2' or x=='2B'):
        res = -2
    return res

In [None]:
coords['floor'] = coords['floor'].apply(lambda x: floor_code(x))

In [None]:
coords['floor'].value_counts()

In [None]:
coords['x_start'] = coords['x_start'].astype('float64')
coords['x_end'] = coords['x_end'].astype('float64')
coords['y_start'] = coords['y_start'].astype('float64')
coords['y_end'] = coords['y_end'].astype('float64')

In [None]:
# coords['target_x'] = coords['x_end'] - coords['x_start']
# coords['target_y'] = coords['y_end'] - coords['y_start']

## Train model

In [None]:
from catboost import CatBoostRegressor, CatBoostClassifier

In [None]:
# coords.drop(['ts_start', 'type_start', 'x_start', 'y_start', 'ts_end', 'x_end', 'y_end'], axis=1, inplace=True)
coords.fillna(0, inplace=True)

### Predict x coordinate

In [None]:
X_train, X_test, y_train, y_test = train_test_split(coords.iloc[:,8:],
                                                    coords['x_end'],
                                                    test_size=0.2,
                                                    random_state=42)

In [None]:
X_train.shape

In [None]:
%%time
model_x = CatBoostRegressor(n_estimators=5000, loss_function='RMSE', eval_metric='RMSE',
                            early_stopping_rounds=200, verbose=200);
model_x.fit(X_train, y_train,
            eval_set=(X_test, y_test), use_best_model=True
           );

### Predict y coordinate

In [None]:
X_train, X_test, y_train, y_test = train_test_split(coords.iloc[:,8:],
                                                    coords['y_end'],
                                                    test_size=0.2,
                                                    random_state=42)

In [None]:
%%time
model_y = CatBoostRegressor(n_estimators=5000, loss_function='RMSE', eval_metric='RMSE',
                            early_stopping_rounds=200, verbose=200);
model_y.fit(X_train, y_train,
            eval_set=(X_test, y_test), use_best_model=True
           );

### Predict floor

In [None]:
X_train, X_test, y_train, y_test = train_test_split(coords[coords.floor!=-100].iloc[:,8:],
                                                    coords[coords.floor!=-100]['floor'],
                                                    test_size=0.2,
                                                    random_state=42)

In [None]:
%%time
model_fl = CatBoostClassifier(n_estimators=5000, loss_function='MultiClass',
          eval_metric='MultiClass', early_stopping_rounds=200, verbose=200);
model_fl.fit(X_train, y_train,
            eval_set=(X_test, y_test), use_best_model=True
           );

# Make submission

In [None]:
subs = pd.read_csv('../input/indoor-location-navigation/sample_submission.csv')

In [None]:
subs['site'] = subs['site_path_timestamp'].apply(lambda x: x.split('_')[0])
subs['path'] = subs['site_path_timestamp'].apply(lambda x: x.split('_')[1])
subs['timestamp'] = subs['site_path_timestamp'].apply(lambda x: x.split('_')[2])

In [None]:
subs.head()

## Main features extraction function

In [None]:
full_feats_df = []
full_wifi_dfraw = []
test_path = '../input/indoor-location-navigation/test'
coords = []
ind = 0
# acc1un, acc2un, acc3un, mag1un, mag2un, mag3un = [], [], [], [], [], []
# gyro1un, gyro2un, gyro3un = [], [], []

for path in tqdm(os.listdir(test_path)[:]):
    full_lines = []
    # get all raw lines
    with open (test_path + '/' + path) as f:
        lines = f.readlines()
    lines = [i.rstrip('\n').split('\t') for i in lines]
    full_lines += lines
    
    # parse raw lines to features
    buffer_df = []
    buffer_df.append(full_lines[1][1].split(':')[1])
    buffer_df.append(path.split('.')[0])
        
    # set buffer timestamp
    if subs[(subs['site']==buffer_df[0]) & (subs['path']==buffer_df[1])].shape[0] == 0:
        continue
    buff_ts = subs[(subs['site']==buffer_df[0]) & (subs['path']==buffer_df[1])]\
        .sort_values(by='timestamp')['timestamp'].astype('int64').tolist()
    buff_ts = [0] + buff_ts
    
    # collect features:
    coords = []
    acc_n, mag_n, gyro_n, rot_n = 0, 0, 0, 0
#     acc_unn, mag_unn, gyro_unn = 0, 0, 0

    # set timestmap
    I = 1
    ts0, ts1 = buff_ts[I-1], buff_ts[I]
    for i, line in enumerate(full_lines):
        if (line[1] == 'TYPE_ACCELEROMETER'):
            if (int(line[0]) > ts1):
                # generate features
                acc_feats = get_feats(acc1, acc2, acc3)
                                      
                # add correct site_path_timestamp value
                site_path_ts = buffer_df[0] + '_' + buffer_df[1] + '_' + str(ts1).rjust(13, '0')
                coords.append([site_path_ts, 'acc'] + acc_feats)
                                      
                acc_n = 0
                acc1, acc2, acc3 = [], [], []
            else:
                acc_n +=1
                acc1.append(float(line[2])), acc2.append(float(line[3])), acc3.append(float(line[4]))
                
        elif (line[1] == 'TYPE_MAGNETIC_FIELD'):
            if (int(line[0]) > ts1):
                # generate features
                mag_feats = get_feats(mag1, mag2, mag3)
                # add correct site_path_timestamp value
                site_path_ts = buffer_df[0] + '_' + buffer_df[1] + '_' + str(ts1).rjust(13, '0')
                coords.append([site_path_ts, 'mag'] + mag_feats)
                # take to zero
                mag_n = 0
                mag1, mag2, mag3 = [], [], []
            else:
                mag_n +=1
                mag1.append(float(line[2])), mag2.append(float(line[3])), mag3.append(float(line[4]))
                
        elif (line[1] == 'TYPE_GYROSCOPE'):
            if (int(line[0]) > ts1):
                # generate features
                gyro_feats = get_feats(gyro1, gyro2, gyro3)
                                      
                # add correct site_path_timestamp value
                site_path_ts = buffer_df[0] + '_' + buffer_df[1] + '_' + str(ts1).rjust(13, '0')
                coords.append([site_path_ts, 'gyro'] + gyro_feats)
                                      
                gyro_n = 0
                gyro1, gyro2, gyro3 = [], [], []
            else:
                gyro_n +=1
                gyro1.append(float(line[2])), gyro2.append(float(line[3])), gyro3.append(float(line[4]))

        elif (line[1] == 'TYPE_ROTATION_VECTOR'):
            if (int(line[0]) > ts1):
                # generate features
                rot_feats = get_feats(rot1, rot2, rot3)
                                      
                # add correct site_path_timestamp value
                site_path_ts = buffer_df[0] + '_' + buffer_df[1] + '_' + str(ts1).rjust(13, '0')
                coords.append([site_path_ts, 'rot'] + rot_feats)

                # buffer values set to zero
                I = I+1
                try:
                    ts0, ts1 = buff_ts[I-1], buff_ts[I]
                except IndexError:
                    break
                
                rot_n = 0
                rot1, rot2, rot3 = [], [], []
            else:
                rot_n +=1
                rot1.append(float(line[2])), rot2.append(float(line[3])), rot3.append(float(line[4]))
                

    coords_wifi = []
    wfi1, wfi2 = [], []
    I = 1
    ts0, ts1 = buff_ts[I-1], buff_ts[I]
    for i, line in enumerate(full_lines):
        if (line[1] == 'TYPE_WIFI'):
            if (int(line[0]) > ts1):
                wifi_feats1 = get_feats(wfi1, flag=1)
                wifi_feats2 = get_feats(wfi2, flag=1)
                site_path_ts = buffer_df[0] + '_' + buffer_df[1] + '_' + str(ts1).rjust(13, '0')
                coords_wifi.append([site_path_ts, 'wifi'] + wifi_feats1 + wifi_feats2)

                # buffer values set to zero
                I = I+1
                try:
                    ts0, ts1 = buff_ts[I-1], buff_ts[I]
                except IndexError:
                    break

                wfi1, wfi2 = [], []
            else:
                wfi1.append(float(line[4])), wfi2.append(float(line[5]))


#         elif (line[1] == 'TYPE_MAGNETIC_FIELD_UNCALIBRATED'):
#             if (int(line[0]) > ts1):
#                 mag_feats_un = get_feats(mag1un, mag2un, mag3un)

#                 site_path_ts = buffer_df[0] + '_' + buffer_df[1] + '_' + str(ts1).rjust(13, '0')
#                 coords.append([site_path_ts, 'mag_un'] + mag_feats_un)

#                 mag_unn = 0
#                 mag1un, mag2un, mag3un = [], [], []
#             else:
#                 mag_unn +=1
#                 mag1un.append(float(line[2])), mag2un.append(float(line[3])), mag3un.append(float(line[4]))
                
#         elif (line[1] == 'TYPE_GYROSCOPE_UNCALIBRATED'):
#             if (int(line[0]) > ts1):
#                 gyro_feats_un = get_feats(gyro1un, gyro2un, gyro3un)
                                      
#                 site_path_ts = buffer_df[0] + '_' + buffer_df[1] + '_' + str(ts1).rjust(13, '0')
#                 coords.append([site_path_ts, 'gyro_un'] + gyro_feats_un)
                                      
#                 gyro_unn = 0
#                 gyro1un, gyro2un, gyro3un = [], [], []
#             else:
#                 gyro_unn +=1
#                 gyro1un.append(float(line[2])), gyro2un.append(float(line[3])), gyro3un.append(float(line[4]))
                
#         elif (line[1] == 'TYPE_ACCELEROMETER_UNCALIBRATED'):
#             if (int(line[0]) > ts1):
#                 acc_feats_un = get_feats(acc1un, acc2un, acc3un)
                                      
#                 site_path_ts = buffer_df[0] + '_' + buffer_df[1] + '_' + str(ts1).rjust(13, '0')
#                 coords.append([site_path_ts, 'acc_un'] + acc_feats_un)
                         
#                 # buffer values set to zero
#                 I = I+1
#                 try:
#                     ts0, ts1 = buff_ts[I-1], buff_ts[I]
#                 except IndexError:
#                     break
                    
#                 acc_unn = 0
#                 acc1un, acc2un, acc3un = [], [], []
#             else:
#                 acc_unn +=1
#                 acc1un.append(float(line[2])), acc2un.append(float(line[3])), acc3un.append(float(line[4]))

    full_feats_df += coords
    full_wifi_dfraw += coords_wifi

In [None]:
# full_feats_df.fillna(0, inplace=True)
# full_feats_df = pd.DataFrame(full_feats_df)

Collect test features to pandas DataFrame:

In [None]:
full_feats_df = pd.DataFrame(full_feats_df)
full_feats_df.columns = [str(i)*3 for i in range(full_feats_df.shape[1])]
test_lst = []
for sitepath in tqdm(full_feats_df['000'].unique()):
    ww = full_feats_df[full_feats_df['000']==sitepath]
    t = ww.iloc[:,2:].values.tolist()
    test_lst.append([sitepath] + [item for sublist in t for item in sublist])

In [None]:
%%time
test_lst = pd.DataFrame(test_lst)
test_lst.columns=[str(i)*3 for i in range(test_lst.shape[1])]
ress = test_lst.merge(subs, how='right', left_on='000', right_on='site_path_timestamp')
ress.drop(['site_path_timestamp','floor','x','y','site','path','timestamp'], axis=1, inplace=True)

print(subs.shape, test_lst.shape, ress.shape)
print(ress.shape)
ress.head()

In [None]:
full_wifi_df = pd.DataFrame(full_wifi_dfraw)
full_wifi_df.columns = ['000', 'WIFI'] + [str(i)*3 for i in range(1,39)]
full_wifi_df.drop(['WIFI'], axis=1, inplace=True)
print(full_wifi_df.shape)
full_wifi_df.head()

In [None]:
fulls = ress.merge(full_wifi_df, on='000', how='left')
fulls.columns = ['time_path_site'] + [str(i)*3 for i in range(X_train.shape[1])]
print(fulls.shape)

Some values not merged, so try to fillna values with previous data:

In [None]:
fulls[['000','111','222','333']] = fulls[['000','111','222','333']].fillna(method='bfill')
fulls.fillna(0, inplace=True)

In [None]:
test_data = fulls.iloc[:,1:X_train.shape[1]+1]
test_data.columns = [str(i)*3 for i in range(X_train.shape[1])]

In [None]:
x_preds = model_x.predict(test_data)
y_preds = model_y.predict(test_data)
flr = model_fl.predict(test_data)

In [None]:
subs = pd.read_csv('../input/indoor-location-navigation/sample_submission.csv')

In [None]:
subs['x'], subs['y'], subs['floor'] = x_preds, y_preds, flr

In [None]:
subs['site'] = subs['site_path_timestamp'].apply(lambda x: x.split('_')[0])
# subs['path'] = subs['site_path_timestamp'].apply(lambda x: x.split('_')[1])

In [None]:
subs['site'] = subs['site_path_timestamp'].apply(lambda x: x.split('_')[0])
flr_merge = subs[['site','floor']].groupby(['site']).agg(lambda x:x.value_counts().index[0]).reset_index()
flr_merge.columns = ['site', 'flr']

subs = flr_merge.merge(subs, how='inner', on='site')
subs = subs[['site_path_timestamp', 'flr', 'x', 'y']]
subs.columns = ['site_path_timestamp', 'floor', 'x', 'y']

In [None]:
subs.to_csv("submission_204un_floor.csv", index=False)