<a href="https://colab.research.google.com/github/kyochanpy/Kaggle_Indoor_Location_Navigation/blob/main/note_books/02_indoor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -U kaggle

from googleapiclient.discovery import build
import io, os
from googleapiclient.http import MediaIoBaseDownload
from google.colab import auth


auth.authenticate_user()

drive_service = build('drive', 'v3')
results = drive_service.files().list(
        q="name = 'kaggle.json'", fields="files(id)").execute()
kaggle_api_key = results.get('files', [])

filename = "/root/.kaggle/kaggle.json"
os.makedirs(os.path.dirname(filename), exist_ok=True)

request = drive_service.files().get_media(fileId=kaggle_api_key[0]['id'])
fh = io.FileIO(filename, 'wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
    status, done = downloader.next_chunk()
    print("Download %d%%." % int(status.progress() * 100))
os.chmod(filename, 600)


!kaggle datasets download -d hiro5299834/indoor-navigation-and-location-wifi-features
!unzip indoor-navigation-and-location-wifi-features.zip -d indoor-navigation-and-location-wifi-features

Collecting kaggle
[?25l  Downloading https://files.pythonhosted.org/packages/3a/e7/3bac01547d2ed3d308ac92a0878fbdb0ed0f3d41fb1906c319ccbba1bfbc/kaggle-1.5.12.tar.gz (58kB)
[K     |█████▋                          | 10kB 19.4MB/s eta 0:00:01[K     |███████████▏                    | 20kB 18.7MB/s eta 0:00:01[K     |████████████████▊               | 30kB 14.5MB/s eta 0:00:01[K     |██████████████████████▎         | 40kB 13.5MB/s eta 0:00:01[K     |███████████████████████████▉    | 51kB 10.4MB/s eta 0:00:01[K     |████████████████████████████████| 61kB 4.9MB/s 
Building wheels for collected packages: kaggle
  Building wheel for kaggle (setup.py) ... [?25l[?25hdone
  Created wheel for kaggle: filename=kaggle-1.5.12-cp37-none-any.whl size=73053 sha256=1b58363feade8e590b9c9f80751dfaa7ad213dab26e3efce80ec81f9b974040d
  Stored in directory: /root/.cache/pip/wheels/a1/6a/26/d30b7499ff85a4a4593377a87ecf55f7d08af42f0de9b60303
Successfully built kaggle
Installing collected packages: k

In [2]:
!git clone --depth 1 https://github.com/location-competition/indoor-location-competition-20 indoor_location_competition_20
!rm -rf indoor_location_competition_20/data

Cloning into 'indoor_location_competition_20'...
remote: Enumerating objects: 1169, done.[K
remote: Counting objects: 100% (1169/1169), done.[K
remote: Compressing objects: 100% (1131/1131), done.[K
remote: Total 1169 (delta 38), reused 1167 (delta 38), pack-reused 0[K
Receiving objects: 100% (1169/1169), 411.37 MiB | 16.91 MiB/s, done.
Resolving deltas: 100% (38/38), done.
Checking out files: 100% (1145/1145), done.


In [3]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from pathlib import Path
import glob
from tqdm import tqdm


from sklearn.model_selection import KFold
import lightgbm as lgb

import psutil
import random
import os
import time
import sys
import math
from contextlib import contextmanager


import multiprocessing
import scipy.interpolate
import scipy.sparse

from indoor_location_competition_20.io_f import read_data_file
import indoor_location_competition_20.compute_f as compute_f

In [4]:
@contextmanager
def timer(name: str):
    t0 = time.time()
    p = psutil.Process(os.getpid())
    m0 = p.memory_info()[0] / 2. ** 30
    try:
        yield
    finally:
        m1 = p.memory_info()[0] / 2. ** 30
        delta = m1 - m0
        sign = '+' if delta >= 0 else '-'
        delta = math.fabs(delta)
        print(f"[{m1:.1f}GB({sign}{delta:.1f}GB): {time.time() - t0:.3f}sec] {name}", file=sys.stderr)


def set_seed(seed=527):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)

    
def comp_metric(xhat, yhat, fhat, x, y, f):
    intermediate = np.sqrt(np.power(xhat-x, 2) + np.power(yhat-y, 2)) + 15 * np.abs(fhat-f)
    return intermediate.sum()/xhat.shape[0]


def score_log(df: pd.DataFrame, num_files: int, nam_file: str, data_shape: tuple, n_fold: int, seed: int, mpe: float):
    score_dict = {'n_files': num_files, 'file_name': nam_file, 'shape': data_shape, 'fold': n_fold, 'seed': seed, 'score': mpe}
    # noinspection PyTypeChecker
    df = pd.concat([df, pd.DataFrame.from_dict([score_dict])])
    df.to_csv(LOG_PATH / f"log_score.csv", index=False)
    return df

In [5]:
N_SPLITS = 5
SEED = 618
set_seed(SEED)
/content/indoor-navigation-and-location-wifi-features

In [6]:
LOG_PATH = Path("./log/")
LOG_PATH.mkdir(parents=True, exist_ok=True)

In [17]:
feature_dir = "/content/indoor-navigation-and-location-wifi-features"
train_files = sorted(glob.glob(os.path.join(feature_dir, '*_train.csv')))
test_files = sorted(glob.glob(os.path.join(feature_dir, '*_test.csv')))
subm = pd.read_csv('/content/sample_submission.csv', index_col=0)

In [18]:
lgb_params = {'objective': 'root_mean_squared_error',
              'boosting_type': 'gbdt',
              'n_estimators': 50000,
              'learning_rate': 0.1,
              'num_leaves': 90,
              'colsample_bytree': 0.4,
              'subsample': 0.6,
              'subsample_freq': 2,
              'bagging_seed': SEED,
              'reg_alpha': 8,
              'reg_lambda': 2,
              'random_state': SEED,
              'n_jobs': -1
              }

lgb_f_params = {'objective': 'multiclass',
                'boosting_type': 'gbdt',
                'n_estimators': 50000,
                'learning_rate': 0.1,
                'num_leaves': 90,
                'colsample_bytree': 0.4,
                'subsample': 0.6,
                'subsample_freq': 2,
                'bagging_seed': SEED,
                'reg_alpha': 10,
                'reg_lambda': 2,
                'random_state': SEED,
                'n_jobs': -1
                }

In [19]:
score_df = pd.DataFrame()
oof = list()
predictions = list()
for n_files, file in enumerate(train_files):
    data = pd.read_csv(file, index_col=0)
    test_data = pd.read_csv(test_files[n_files], index_col=0)

    oof_x, oof_y, oof_f = np.zeros(data.shape[0]), np.zeros(data.shape[0]), np.zeros(data.shape[0])
    preds_x, preds_y = 0, 0
    preds_f_arr = np.zeros((test_data.shape[0], N_SPLITS))

    kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED)
    for fold, (trn_idx, val_idx) in enumerate(kf.split(data.iloc[:, :-4])):
        X_train = data.iloc[trn_idx, :-4]
        y_trainx = data.iloc[trn_idx, -4]
        y_trainy = data.iloc[trn_idx, -3]
        y_trainf = data.iloc[trn_idx, -2]

        X_valid = data.iloc[val_idx, :-4]
        y_validx = data.iloc[val_idx, -4]
        y_validy = data.iloc[val_idx, -3]
        y_validf = data.iloc[val_idx, -2]
        
        modelx = lgb.LGBMRegressor(**lgb_params)
        with timer("fit X"):
            modelx.fit(X_train, y_trainx,
                       eval_set=[(X_valid, y_validx)],
                       eval_metric='rmse',
                       verbose=False,
                       early_stopping_rounds=20
                       )

        modely = lgb.LGBMRegressor(**lgb_params)
        with timer("fit Y"):
            modely.fit(X_train, y_trainy,
                       eval_set=[(X_valid, y_validy)],
                       eval_metric='rmse',
                       verbose=False,
                       early_stopping_rounds=20
                       )
            
        modelf = lgb.LGBMClassifier(**lgb_f_params)
        with timer("fit F"):
            modelf.fit(X_train, y_trainf,
                       eval_set=[(X_valid, y_validf)],
                       eval_metric='multi_logloss',
                       verbose=False,
                       early_stopping_rounds=20
                       )
            
        oof_x[val_idx] = modelx.predict(X_valid)
        oof_y[val_idx] = modely.predict(X_valid)
        oof_f[val_idx] = modelf.predict(X_valid).astype(int)

        preds_x += modelx.predict(test_data.iloc[:, :-1]) / N_SPLITS
        preds_y += modely.predict(test_data.iloc[:, :-1]) / N_SPLITS
        preds_f_arr[:, fold] = modelf.predict(test_data.iloc[:, :-1]).astype(int)

        score = comp_metric(oof_x[val_idx], oof_y[val_idx], oof_f[val_idx],
                            y_validx.to_numpy(), y_validy.to_numpy(), y_validf.to_numpy())
        print(f"fold {fold}: mean position error {score}")
        score_df = score_log(score_df, n_files, os.path.basename(file), data.shape, fold, SEED, score)

    print("*+"*40)
    print(f"file #{n_files}, shape={data.shape}, name={os.path.basename(file)}")
    score = comp_metric(oof_x, oof_y, oof_f,
                        data.iloc[:, -4].to_numpy(), data.iloc[:, -3].to_numpy(), data.iloc[:, -2].to_numpy())
    oof.append(score)
    print(f"mean position error {score}")
    print("*+"*40)
    score_df = score_log(score_df, n_files, os.path.basename(file), data.shape, 999, SEED, score)

    preds_f_mode = stats.mode(preds_f_arr, axis=1)
    preds_f = preds_f_mode[0].astype(int).reshape(-1)
    test_preds = pd.DataFrame(np.stack((preds_f, preds_x, preds_y))).T
    test_preds.columns = subm.columns
    test_preds.index = test_data["site_path_timestamp"]
    test_preds["floor"] = test_preds["floor"].astype(int)
    predictions.append(test_preds)

[1.6GB(+0.2GB): 40.397sec] fit X
[1.6GB(+0.0GB): 27.507sec] fit Y
[1.6GB(+0.0GB): 22.681sec] fit F


fold 0: mean position error 3.796929387094887


[1.8GB(+0.2GB): 30.828sec] fit X
[1.8GB(+0.0GB): 36.683sec] fit Y
[1.8GB(+0.0GB): 14.511sec] fit F


fold 1: mean position error 3.7084058104886153


[2.0GB(+0.2GB): 35.690sec] fit X
[2.0GB(+0.0GB): 24.582sec] fit Y
[2.0GB(+0.0GB): 20.160sec] fit F


fold 2: mean position error 3.67866649657935


[2.0GB(+0.0GB): 28.684sec] fit X
[2.0GB(+0.0GB): 27.611sec] fit Y
[2.0GB(+0.0GB): 18.039sec] fit F


fold 3: mean position error 3.8027109909074124


[2.0GB(+0.0GB): 45.569sec] fit X
[2.0GB(+0.0GB): 36.269sec] fit Y
[2.0GB(+0.0GB): 19.266sec] fit F


fold 4: mean position error 3.654773421352437
*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+
file #0, shape=(9296, 3401), name=5a0546857ecc773753327266_train.csv
mean position error 3.72830460426279
*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+


[2.0GB(+0.0GB): 23.834sec] fit X
[2.0GB(+0.0GB): 25.264sec] fit Y
[2.0GB(+0.0GB): 26.089sec] fit F


fold 0: mean position error 4.695191108097891


[2.0GB(+0.0GB): 38.897sec] fit X
[2.0GB(+0.0GB): 22.741sec] fit Y
[2.0GB(+0.0GB): 25.961sec] fit F


fold 1: mean position error 4.8243963535899335


[2.0GB(+0.0GB): 34.775sec] fit X
[2.0GB(+0.0GB): 30.224sec] fit Y
[2.0GB(+0.0GB): 27.963sec] fit F


fold 2: mean position error 4.727554870661329


[2.0GB(+0.0GB): 30.155sec] fit X
[2.0GB(+0.0GB): 24.901sec] fit Y
[2.0GB(+0.0GB): 25.301sec] fit F


fold 3: mean position error 4.677396679419445


[2.0GB(-0.0GB): 32.538sec] fit X
[2.0GB(+0.0GB): 34.568sec] fit Y
[2.0GB(+0.0GB): 23.582sec] fit F


fold 4: mean position error 4.585614485994146
*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+
file #1, shape=(9737, 3067), name=5c3c44b80379370013e0fd2b_train.csv
mean position error 4.702042564199009
*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+


[8.8GB(+1.9GB): 187.515sec] fit X
[8.8GB(-0.0GB): 239.144sec] fit Y
[7.6GB(-1.2GB): 244.711sec] fit F


fold 0: mean position error 3.834063898827922


[8.5GB(+0.7GB): 246.755sec] fit X
[7.0GB(-1.5GB): 226.754sec] fit Y
[8.4GB(+1.4GB): 211.212sec] fit F


fold 1: mean position error 3.853982181633548


[8.7GB(+0.3GB): 204.286sec] fit X
[7.0GB(-1.6GB): 226.243sec] fit Y
[8.5GB(+1.5GB): 252.170sec] fit F


fold 2: mean position error 3.7852958502202982


[9.5GB(+1.0GB): 261.165sec] fit X
[8.4GB(-1.2GB): 274.442sec] fit Y
[9.4GB(+1.0GB): 185.773sec] fit F


fold 3: mean position error 3.8029187845368577


[7.7GB(-1.7GB): 228.543sec] fit X
[9.0GB(+1.3GB): 217.785sec] fit Y
[9.0GB(+0.0GB): 186.698sec] fit F


fold 4: mean position error 3.847196123669304
*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+
file #2, shape=(23666, 7033), name=5d27075f03f801723c2e360f_train.csv
mean position error 3.824691763811181
*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+


[9.6GB(+0.1GB): 17.474sec] fit X
[9.6GB(+0.0GB): 16.295sec] fit Y
[9.6GB(+0.0GB): 11.029sec] fit F


fold 0: mean position error 2.531817743814894


[9.6GB(+0.0GB): 12.285sec] fit X
[9.6GB(+0.0GB): 11.857sec] fit Y
[9.6GB(+0.0GB): 11.293sec] fit F


fold 1: mean position error 2.540947869173537


[9.6GB(-0.0GB): 13.736sec] fit X
[9.6GB(+0.0GB): 12.912sec] fit Y
[9.6GB(-0.0GB): 9.782sec] fit F


fold 2: mean position error 2.604383139893346


[7.1GB(-2.5GB): 21.140sec] fit X
[7.5GB(+0.4GB): 16.010sec] fit Y
[7.5GB(-0.0GB): 11.100sec] fit F


fold 3: mean position error 2.639681274396755


[7.5GB(+0.0GB): 16.336sec] fit X
[7.5GB(+0.0GB): 18.330sec] fit Y
[7.5GB(+0.0GB): 12.209sec] fit F


fold 4: mean position error 2.509531665288294
*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+
file #3, shape=(9100, 4968), name=5d27096c03f801723c31e5e0_train.csv
mean position error 2.5652723385133647
*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+


[7.6GB(+0.0GB): 23.420sec] fit X
[7.6GB(+0.0GB): 34.468sec] fit Y
[7.6GB(+0.0GB): 21.087sec] fit F


fold 0: mean position error 4.969307623910416


[7.6GB(+0.0GB): 25.631sec] fit X
[7.6GB(+0.0GB): 31.145sec] fit Y
[7.6GB(+0.0GB): 24.947sec] fit F


fold 1: mean position error 4.640284593423071


[7.6GB(+0.0GB): 36.264sec] fit X
[7.6GB(+0.0GB): 40.129sec] fit Y
[7.6GB(+0.0GB): 25.044sec] fit F


fold 2: mean position error 4.668633751466755


[7.6GB(+0.0GB): 29.491sec] fit X
[7.6GB(+0.0GB): 31.434sec] fit Y
[7.6GB(+0.0GB): 24.337sec] fit F


fold 3: mean position error 4.613417005755175


[7.6GB(+0.0GB): 26.037sec] fit X
[7.6GB(-0.0GB): 29.917sec] fit Y
[7.6GB(+0.0GB): 24.942sec] fit F


fold 4: mean position error 4.602609205171416
*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+
file #4, shape=(10507, 2494), name=5d27097f03f801723c320d97_train.csv
mean position error 4.698870602629048
*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+


[7.6GB(+0.0GB): 2.755sec] fit X
[7.6GB(+0.0GB): 2.843sec] fit Y
[7.6GB(+0.0GB): 2.457sec] fit F


fold 0: mean position error 2.7581024707111115


[7.6GB(+0.0GB): 4.275sec] fit X
[7.6GB(+0.0GB): 3.997sec] fit Y
[7.6GB(+0.0GB): 2.024sec] fit F


fold 1: mean position error 2.759960706415104


[7.6GB(+0.0GB): 3.164sec] fit X
[7.6GB(+0.0GB): 2.241sec] fit Y
[7.6GB(+0.0GB): 2.678sec] fit F


fold 2: mean position error 2.7533813404972043


[7.6GB(+0.0GB): 3.214sec] fit X
[7.6GB(+0.0GB): 3.749sec] fit Y
[7.6GB(+0.0GB): 2.530sec] fit F


fold 3: mean position error 2.860129353408569


[7.6GB(+0.0GB): 3.709sec] fit X
[7.6GB(+0.0GB): 3.304sec] fit Y
[7.6GB(+0.0GB): 3.076sec] fit F


fold 4: mean position error 2.781123412634938
*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+
file #5, shape=(4251, 929), name=5d27099f03f801723c32511d_train.csv
mean position error 2.7825337082069157
*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+


[7.6GB(+0.0GB): 4.421sec] fit X
[7.6GB(+0.0GB): 3.776sec] fit Y
[7.6GB(+0.0GB): 2.739sec] fit F


fold 0: mean position error 2.8318592357111534


[7.6GB(+0.0GB): 3.585sec] fit X
[7.6GB(+0.0GB): 3.261sec] fit Y
[7.6GB(+0.0GB): 2.001sec] fit F


fold 1: mean position error 3.0282334101119575


[7.6GB(+0.0GB): 4.156sec] fit X
[7.6GB(+0.0GB): 4.456sec] fit Y
[7.6GB(+0.0GB): 3.279sec] fit F


fold 2: mean position error 2.8724970035868216


[5.6GB(-2.1GB): 10.959sec] fit X
[5.6GB(+0.1GB): 4.820sec] fit Y
[5.6GB(+0.0GB): 2.332sec] fit F


fold 3: mean position error 2.8488464056368312


[5.7GB(+0.0GB): 4.267sec] fit X
[5.7GB(+0.0GB): 4.298sec] fit Y
[5.7GB(+0.0GB): 2.803sec] fit F


fold 4: mean position error 2.8776560413031493
*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+
file #6, shape=(3940, 1256), name=5d2709a003f801723c3251bf_train.csv
mean position error 2.8918184192699825
*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+


[6.2GB(+0.1GB): 28.826sec] fit X
[6.2GB(+0.0GB): 39.478sec] fit Y
[6.2GB(+0.0GB): 31.855sec] fit F


fold 0: mean position error 3.0707521663033632


[6.3GB(+0.0GB): 52.658sec] fit X
[6.3GB(+0.0GB): 33.197sec] fit Y
[6.3GB(+0.0GB): 37.988sec] fit F


fold 1: mean position error 3.090834983087904


[6.3GB(+0.0GB): 45.313sec] fit X
[6.3GB(+0.0GB): 34.623sec] fit Y
[6.3GB(+0.0GB): 30.479sec] fit F


fold 2: mean position error 3.134694559642123


[6.3GB(+0.0GB): 39.717sec] fit X
[6.3GB(+0.0GB): 35.746sec] fit Y
[6.3GB(+0.0GB): 37.219sec] fit F


fold 3: mean position error 3.0976086478885505


[6.3GB(+0.0GB): 31.198sec] fit X
[6.3GB(+0.0GB): 27.530sec] fit Y
[6.3GB(+0.0GB): 6.982sec] fit F


KeyboardInterrupt: ignored

In [20]:
INPUT_PATH = '/content/indoor-location-navigation'


In [21]:
def compute_rel_positions(acce_datas, ahrs_datas):
    step_timestamps, step_indexs, step_acce_max_mins = compute_f.compute_steps(acce_datas)
    headings = compute_f.compute_headings(ahrs_datas)
    stride_lengths = compute_f.compute_stride_length(step_acce_max_mins)
    step_headings = compute_f.compute_step_heading(step_timestamps, headings)
    rel_positions = compute_f.compute_rel_positions(stride_lengths, step_headings)
    return rel_positions

In [22]:
def correct_path(args):
    path, path_df = args
    
    T_ref  = path_df['timestamp'].values
    xy_hat = path_df[['x', 'y']].values
    
    example = read_data_file(f'{INPUT_PATH}/test/{path}.txt')
    rel_positions = compute_rel_positions(example.acce, example.ahrs)
    if T_ref[-1] > rel_positions[-1, 0]:
        rel_positions = [np.array([[0, 0, 0]]), rel_positions, np.array([[T_ref[-1], 0, 0]])]
    
    else:
        rel_positions = [np.array([[0, 0, 0]]), rel_positions]
    rel_positions = np.concatenate(rel_positions)
    
    T_rel = rel_positions[:, 0]
    delta_xy_hat = np.diff(scipy.interpolate.interp1d(T_rel, np.cumsum(rel_positions[:, 1:3], axis=0), axis=0)(T_ref), axis=0)

    N = xy_hat.shape[0]
    delta_t = np.diff(T_ref)
    alpha = (8.1)**(-2) * np.ones(N)
    beta  = (0.3 + 0.3 * 1e-3 * delta_t)**(-2)
    A = scipy.sparse.spdiags(alpha, [0], N, N)
    B = scipy.sparse.spdiags( beta, [0], N-1, N-1)
    D = scipy.sparse.spdiags(np.stack([-np.ones(N), np.ones(N)]), [0, 1], N-1, N)

    Q = A + (D.T @ B @ D)
    c = (A @ xy_hat) + (D.T @ (B @ delta_xy_hat))
    xy_star = scipy.sparse.linalg.spsolve(Q, c)

    return pd.DataFrame({
        'site_path_timestamp' : path_df['site_path_timestamp'],
        'floor' : path_df['floor'],
        'x' : xy_star[:, 0],
        'y' : xy_star[:, 1],
    })

In [23]:
all_preds = pd.concat(predictions)
all_preds = all_preds.reindex(subm.index)
all_preds.to_csv('submission.csv')

In [24]:
sub = pd.read_csv('submission.csv')
tmp = sub['site_path_timestamp'].apply(lambda s : pd.Series(s.split('_')))
sub['site'] = tmp[0]
sub['path'] = tmp[1]
sub['timestamp'] = tmp[2].astype(float)

processes = multiprocessing.cpu_count()
with multiprocessing.Pool(processes=processes) as pool:
    dfs = pool.imap_unordered(correct_path, sub.groupby('path'))
    dfs = tqdm(dfs)
    dfs = list(dfs)
sub = pd.concat(dfs).sort_values('site_path_timestamp')
sub.to_csv('submission_lgbm_02.csv', index=False)

0it [00:00, ?it/s]

FileNotFoundError: ignored