<a href="https://colab.research.google.com/github/mavillan/indoor-location-navigation/blob/main/train/iln_dnn_ds1_50f_5lt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
from google.colab import files
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
!pip install --upgrade kaggle > /dev/null 2>&1
!pip install pytorch_axe > /dev/null 2>&1
!pip install adabelief-pytorch > /dev/null 2>&1

In [3]:
# move kaggle.json into the folder where the API expects to find it
!mkdir -p ~/.kaggle/ && cp /content/drive/MyDrive/kaggle/kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json

***

In [4]:
import gc
import os
import numpy as np
import pandas as pd
from glob import glob
from sklearn import preprocessing
import yaml

import torch
from torch import nn
from torch.utils.data import DataLoader,TensorDataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
from adabelief_pytorch import AdaBelief
import pytorch_axe as pax
import time

# device in which the model will be trained
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [5]:
def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

def mean_position_error(x_label, y_label, x_pred, y_pred):
    return np.mean(np.sqrt((x_label-x_pred)**2 + (y_label-y_pred)**2))

@torch.jit.script
def mpe_loss(preds, targets):
    pw_loss = torch.sqrt((preds[:,0]-targets[:,0])**2 + (preds[:,1]-targets[:,1])**2)
    return torch.mean(pw_loss)

In [6]:
def train_encoder(train_frame, test_frame, bssid_cols):
    bssid_unique_train = set(np.unique(train_frame[bssid_cols].values))
    bssid_unique_test = set(np.unique(test_frame[bssid_cols].values))
    bssid_unique = np.array(list(bssid_unique_train | bssid_unique_test))
    encoder = preprocessing.LabelEncoder()
    encoder.fit(bssid_unique)
    return encoder

def encode_bssids(dataframe, bssid_cols, encoder):
    for col in bssid_cols:
        dataframe[col] = encoder.transform(dataframe[col].values)
    return dataframe

In [7]:
# stratified group kfold split - stratified by flood and grouped by path

def cv_split(dataframe, n_splits, random_state=2):
    np.random.seed(random_state)

    fold_indexes = [list() for i in range(n_splits)]

    for floor in np.sort(dataframe.floor.unique()):
        _dataframe = dataframe[dataframe["floor"]==floor]

        # split of paths
        unique_paths = _dataframe.path.unique()
        np.random.shuffle(unique_paths)
        paths_per_fold = np.array_split(unique_paths, n_splits)

        for i,paths in enumerate(paths_per_fold):
            fold_indexes[i].append(_dataframe.query("path in @paths").index.values)

    fold_indexes = [np.concatenate(indexes) for indexes in fold_indexes]
    
    for i in range(n_splits):
        train_idx = np.concatenate([idx for j,idx in enumerate(fold_indexes) if j!=i])
        valid_idx = fold_indexes[i]
        yield train_idx,valid_idx

In [8]:
project_path = "/content/drive/MyDrive/kaggle/indoor-location-navigation"

***

In [9]:
# params to configure the dataset
MAX_WIFI_FEATS = 50
MAX_LASTSEEN = 5
MAX_TIME_DIFF = 10000

# params for reproducibility
N_SPLITS = 5
SEED = 2
CV_SEED = 2
set_seed(SEED)

# params of model
BATCH_SIZE = 2048
EMBEDDING_DIM = 12

In [10]:
!kaggle datasets download -d $(echo "mavillan/iln-ds1-{MAX_WIFI_FEATS}f-{MAX_LASTSEEN}lt") --force --unzip

Downloading iln-ds1-50f-5lt.zip to /content
 97% 309M/317M [00:03<00:00, 105MB/s]
100% 317M/317M [00:03<00:00, 86.6MB/s]


***

In [11]:
train_files = sorted(glob(f"./train/*.parquet"))
test_files = sorted(glob(f"./test/*.parquet"))
sites = [f.split("/")[-1].split(".")[0] for f in train_files]

In [12]:
floor_predictions = pd.read_csv(f"{project_path}/input/simple-99-accurate-floor-model/submission.csv")
floor_predictions.drop(["x","y"], axis=1, inplace=True)

In [13]:
# dictionary used to map the floor codes to the values used in the submission file. 
floor_map = {
    "B2":-2, "B1":-1, 
    "F1":0, "F2":1, "F3":2, "F4":3, "F5":4, "F6":5, "F7":6, "F8":7, "F9":8,
    "1F":0, "2F":1, "3F":2, "4F":3, "5F":4, "6F":5, "7F":6, "8F":7, "9F":8
}

In [14]:
# features & target definition
bssid_cols = [f"bssid_{i}" for i in range(MAX_WIFI_FEATS)]
rssi_cols = [f"rssi_{i}" for i in range(MAX_WIFI_FEATS)]
delta_cols = [f"deltaseen_{i}" for i in range(MAX_WIFI_FEATS)]
wifi_cols = bssid_cols + rssi_cols + delta_cols
            
nowifi_cols = [
    "floor", 
    "imu_time_delta", "wifi_time_delta",
    "acce_x", "acce_y", "acce_z", 
    #"acce_uncali_x", "acce_uncali_y", "acce_uncali_z",
    "gyro_x", "gyro_y", "gyro_z", 
    #"gyro_uncali_x", "gyro_uncali_y", "gyro_uncali_z",
    "magn_x", "magn_y", "magn_z",
    #"magn_uncali_x", "magn_uncali_y", "magn_uncali_z",
    "ahrs_x", "ahrs_y", "ahrs_z",   
]

target_cols = ["x","y"]

***
## model definition

In [15]:
class DNN(nn.Module):

    def __init__(self, input_dims, output_dim, nn_depth, nn_width, dropout, momentum,
                 n_embeddings, embedding_dim):
        super().__init__()

        self.input_dims = input_dims
        self.n_wifi_feats = input_dims[0]
        self.embedder = nn.Embedding(n_embeddings, embedding_dim, max_norm=True)
        
        self.bnorms_parallel = nn.ModuleList(
            [nn.BatchNorm1d(embedding_dim+2, momentum=momentum) 
            for i in range(self.n_wifi_feats)]
            )
        self.dropouts_parallel = nn.ModuleList(
            [nn.Dropout(dropout/2) 
            for i in range(self.n_wifi_feats)]
            )
        self.linears_parallel = nn.ModuleList(
            [nn.utils.weight_norm(nn.Linear(embedding_dim+2, embedding_dim, bias=False))
            for i in range(self.n_wifi_feats)]
            )
        
        input_dim = self.n_wifi_feats*embedding_dim + input_dims[3]
        self.bn_in = nn.BatchNorm1d(input_dim, momentum=momentum)
        self.dp_in = nn.Dropout(dropout/2)
        self.ln_in = nn.utils.weight_norm(nn.Linear(input_dim, nn_width, bias=False))

        self.bnorms = nn.ModuleList([
            nn.BatchNorm1d(nn_width, momentum=momentum) 
            for i in range(nn_depth-1)])
        self.dropouts = nn.ModuleList([
            nn.Dropout(dropout) 
            for i in range(nn_depth-1)])
        self.linears = nn.ModuleList([
            nn.utils.weight_norm(nn.Linear(nn_width, nn_width, bias=False))
            for i in range(nn_depth-1)])
        
        self.bn_out = nn.BatchNorm1d(nn_width, momentum=momentum)
        self.dp_out = nn.Dropout(dropout/2)
        self.ln_out = nn.utils.weight_norm(nn.Linear(nn_width, output_dim, bias=False))

    def forward(self, x_bssid, x_rssi, x_delta, x_rest):
        
        # processing of bssid & rssi features
        x_encoded = self.embedder(x_bssid)
        all_outs = list()
        for i in range(self.n_wifi_feats):
            x_tmp = torch.cat([x_encoded[:,i,:],  x_rssi[:,i].reshape(-1,1), x_delta[:,i].reshape(-1,1)], dim=1)
            x_tmp = self.bnorms_parallel[i](x_tmp)
            x_tmp = self.dropouts_parallel[i](x_tmp)
            x_tmp = nn.functional.relu(self.linears_parallel[i](x_tmp))
            all_outs.append(x_tmp)
            
        all_outs.append(x_rest)
        x = torch.cat(all_outs, dim=1)
        
        x = self.bn_in(x)
        x = self.dp_in(x)
        x = nn.functional.relu(self.ln_in(x))

        for bn_layer,dp_layer,ln_layer in zip(self.bnorms,self.dropouts,self.linears):
            x = bn_layer(x)
            x = dp_layer(x)
            x = ln_layer(x)
            x = nn.functional.relu(x)
            
        x = self.bn_out(x)
        x = self.dp_out(x)
        x = self.ln_out(x)
        return x

    def training_step(self, batch):
        x_bssid, x_rssi, x_delta, x_rest, y = batch
        y_hat = self.forward(x_bssid, x_rssi, x_delta, x_rest)
        loss = mpe_loss(y_hat, y)
        return loss
    
    def validation_step(self, batch):
        x_bssid, x_rssi, x_delta, x_rest, y = batch
        y_hat = self.forward(x_bssid, x_rssi, x_delta, x_rest)
        loss = mpe_loss(y_hat, y)
        return loss
    
    def prediction_step(self, batch):
        x_bssid, x_rssi, x_delta, x_rest, _ = batch
        pred = self.forward(x_bssid, x_rssi, x_delta, x_rest)
        return pred
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-2)
        scheduler = ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=5, min_lr=1e-5)
        return optimizer,scheduler

***
## model training by site

In [16]:
%%time

oofs_by_site = list()
preds_by_site = list()
preds_by_site_raw = list()
cv_errors = dict()

for i,(train_file,test_file,site) in enumerate(zip(train_files,test_files,sites)):
    print(f" training models for site {i+1}/24: {site} ".center(70, "#"))
    
    train = pd.read_parquet(train_file)
    test = pd.read_parquet(test_file)
    
    train["site"] = site
    test["site"] = site

    # adds floor predictions on test
    train["floor"] = train.floor.map(floor_map)
    test = pd.merge(test, floor_predictions, how="left", on="site_path_timestamp")

    # filter wifi records that are near (in time) to waypoints
    train = train.query("wifi_time_diff <= @MAX_TIME_DIFF | wifi_idx==0").reset_index(drop=True)
    test = test.query("wifi_time_diff <= @MAX_TIME_DIFF | wifi_idx==0").reset_index(drop=True)
    
    train_bssids = set(np.unique(train[bssid_cols].values))
    test_bssids = set(np.unique(test[bssid_cols].values))
    print(f"bssids diff:", test_bssids - train_bssids)
        
    # integer encoder for string features
    encoder = train_encoder(train, test, bssid_cols)
    n_bssids = len(encoder.classes_)
    print("number of bssids:", n_bssids)
    
    # cv split here
    mskf = cv_split(train, n_splits=N_SPLITS, random_state=CV_SEED)
    all_models = list()
    
    oof_cols = ["site","path","timestamp","wifi_time_delta","wifi_time_diff","seq_nbr","x","y"]
    oof_preds = train.loc[:,oof_cols].copy()
    oof_preds["oof_idx"] = -1
    oof_preds["oof_x"] = np.nan
    oof_preds["oof_y"] = np.nan
    
    for i,(train_idx,valid_idx) in enumerate(mskf):
        print(f"training for fold {i+1}/{N_SPLITS}".center(70, "-"))
        
        _train = train.loc[train_idx, wifi_cols+nowifi_cols+target_cols]
        _valid = train.loc[valid_idx, wifi_cols+nowifi_cols+target_cols]
        
        _train = encode_bssids(_train, bssid_cols, encoder)
        _valid = encode_bssids(_valid, bssid_cols, encoder)
        
        train_dset = TensorDataset(
            torch.tensor(_train[bssid_cols].values, dtype=torch.long),
            torch.tensor(_train[rssi_cols].values, dtype=torch.float),
            torch.tensor(_train[delta_cols].values, dtype=torch.float),
            torch.tensor(_train[nowifi_cols].values, dtype=torch.float),
            torch.tensor(_train[target_cols].values, dtype=torch.float)
        )

        valid_dset = TensorDataset(
            torch.tensor(_valid[bssid_cols].values, dtype=torch.long),
            torch.tensor(_valid[rssi_cols].values, dtype=torch.float),
            torch.tensor(_valid[delta_cols].values, dtype=torch.float),
            torch.tensor(_valid[nowifi_cols].values, dtype=torch.float),
            torch.tensor(_valid[target_cols].values, dtype=torch.float)
        )
        
        input_dims = (len(bssid_cols),len(rssi_cols),len(delta_cols),len(nowifi_cols))
        model = DNN(
            input_dims=input_dims,
            output_dim=2, 
            nn_depth=3, 
            nn_width=256, 
            dropout=0.2, 
            momentum=0.1,
            n_embeddings=n_bssids,
            embedding_dim=EMBEDDING_DIM,
        )
        
        train_dataloader = DataLoader(train_dset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
        valid_dataloader = DataLoader(valid_dset, batch_size=len(valid_dset), shuffle=False, num_workers=2, pin_memory=True)
        
        model,monitor = pax.iterative_train(model, train_dataloader, valid_dataloader, max_epochs=200, patience=20)
        model.load_state_dict(monitor.best_model_state)
        
        # saving oof predictions
        preds = pax.iterative_predict(model, valid_dataloader).detach().cpu().numpy()
        oof_preds.loc[valid_idx, "oof_idx"] = i
        oof_preds.loc[valid_idx, "oof_x"] = preds[:,0]
        oof_preds.loc[valid_idx, "oof_y"] = preds[:,1]
        
        all_models.append(model)
                
        del train_dataloader,valid_dataloader
        del train_dset,valid_dset
        gc.collect()
    
    # calculating the oof errors
    oof_preds["oof_x_agg"] = oof_preds.groupby(["path","seq_nbr"])["oof_x"].transform("mean")
    oof_preds["oof_y_agg"] = oof_preds.groupby(["path","seq_nbr"])["oof_y"].transform("mean")  
    oof_preds_agg = oof_preds.loc[:,["x","y","oof_x_agg","oof_y_agg"]].drop_duplicates()
    mpe1 = mean_position_error(oof_preds.x, oof_preds.y, oof_preds.oof_x, oof_preds.oof_y)
    mpe2 = mean_position_error(oof_preds_agg.x, oof_preds_agg.y, oof_preds_agg.oof_x_agg, oof_preds_agg.oof_y_agg)
    cv_errors[site] = {"mpe1":mpe1, "mpe2":mpe2,}
    print(f"site: {site} - mpe1: {mpe1} - mpe2: {mpe2} \n")
         
    # generating the predictions for test dataset
    test = encode_bssids(test, bssid_cols, encoder)
    test_bssid = torch.tensor(test[bssid_cols].values, dtype=torch.long, device=device)
    test_rssi = torch.tensor(test[rssi_cols].values, dtype=torch.float, device=device)
    test_delta = torch.tensor(test[delta_cols].values, dtype=torch.float, device=device)
    test_nowifi = torch.tensor(test[nowifi_cols].values, dtype=torch.float, device=device)

    all_preds = list()
    for model in all_models:
        model.eval()
        with torch.set_grad_enabled(False):
            preds = model(test_bssid, test_rssi, test_delta, test_nowifi).detach().cpu().numpy()
            all_preds.append(preds)
        
    preds_avg = np.mean(all_preds, axis=0)
    test["x_pred"] = preds_avg[:,0]
    test["y_pred"] = preds_avg[:,1]
    test["x"] = test.groupby(["path","seq_nbr"])["x_pred"].transform("mean")
    test["y"] = test.groupby(["path","seq_nbr"])["y_pred"].transform("mean")
    
    oofs_by_site.append(oof_preds)
    preds_by_site.append(test.loc[:, ["site_path_timestamp","floor","x","y"]].drop_duplicates())
    preds_by_site_raw.append(test.loc[:,["site_path_timestamp","site","path","timestamp","wifi_time_delta","wifi_time_diff","seq_nbr","x","y","x_pred","y_pred"]])


###### training models for site 1/24: 5a0546857ecc773753327266 #######
bssids diff: set()
number of bssids: 2799
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5a0546857ecc773753327266 - mpe1: 7.463095698816919 - mpe2: 5.653713499441658 

###### training models for site 2/24: 5c3c44b80379370013e0fd2b #######
bssids diff: {'57b44b5a40d857f0077e237fc0b91524b939bb52'}
number of bssids: 2597
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5c3c44b80379370013e0fd2b - mpe1: 8.158920535296824 - mpe2: 6.2306186135302 

###### training models for site 3/24: 5d27075f03f801723c2e360f #######
bssids diff: set()
number of bssids: 6544
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5d27075f03f801723c2e360f - mpe1: 8.198658404353864 - mpe2: 6.338918443580201 

###### training models for site 4/24: 5d27096c03f801723c31e5e0 #######
bssids diff: {'6bd56e0aec03aeb4af07f47d7a4f34755540e1a5'}
number of bssids: 4050
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5d27096c03f801723c31e5e0 - mpe1: 5.093158540970727 - mpe2: 3.765893519242048 

###### training models for site 5/24: 5d27097f03f801723c320d97 #######
bssids diff: set()
number of bssids: 2357
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5d27097f03f801723c320d97 - mpe1: 9.167030747705217 - mpe2: 7.134414959774054 

###### training models for site 6/24: 5d27099f03f801723c32511d #######
bssids diff: set()
number of bssids: 772
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5d27099f03f801723c32511d - mpe1: 6.838378357044607 - mpe2: 5.555329830498004 

###### training models for site 7/24: 5d2709a003f801723c3251bf #######
bssids diff: set()
number of bssids: 1183
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5d2709a003f801723c3251bf - mpe1: 6.090190999367244 - mpe2: 4.886328655714451 

###### training models for site 8/24: 5d2709b303f801723c327472 #######
bssids diff: set()
number of bssids: 1743
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5d2709b303f801723c327472 - mpe1: 6.546885521349466 - mpe2: 5.359056765944062 

###### training models for site 9/24: 5d2709bb03f801723c32852c #######
bssids diff: set()
number of bssids: 2301
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5d2709bb03f801723c32852c - mpe1: 10.309810357564908 - mpe2: 8.876163653724543 

###### training models for site 10/24: 5d2709c303f801723c3299ee ######
bssids diff: set()
number of bssids: 5332
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5d2709c303f801723c3299ee - mpe1: 6.556469507842658 - mpe2: 5.38602279899044 

###### training models for site 11/24: 5d2709d403f801723c32bd39 ######
bssids diff: {'db6f31305fb88f9f61c3b0a3b7bb6b7bcd7ceee6', '849dd63492d15509d61022e4a53417b4975afa20'}
number of bssids: 1977
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5d2709d403f801723c32bd39 - mpe1: 9.052676580185684 - mpe2: 7.885046294199367 

###### training models for site 12/24: 5d2709e003f801723c32d896 ######
bssids diff: set()
number of bssids: 1246
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5d2709e003f801723c32d896 - mpe1: 9.722328748027266 - mpe2: 8.377510672117273 

###### training models for site 13/24: 5da138274db8ce0c98bbd3d2 ######
bssids diff: set()
number of bssids: 460
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5da138274db8ce0c98bbd3d2 - mpe1: 6.815076889652529 - mpe2: 5.398998295377751 

###### training models for site 14/24: 5da1382d4db8ce0c98bbe92e ######
bssids diff: set()
number of bssids: 2510
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5da1382d4db8ce0c98bbe92e - mpe1: 9.060606533088373 - mpe2: 6.919328990687983 

###### training models for site 15/24: 5da138314db8ce0c98bbf3a0 ######
bssids diff: {'f5ae34b06864d1438c190bcfa3c4b70d33f26ab8'}
number of bssids: 1020
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5da138314db8ce0c98bbf3a0 - mpe1: 4.628027680837616 - mpe2: 3.448209929381983 

###### training models for site 16/24: 5da138364db8ce0c98bc00f1 ######
bssids diff: set()
number of bssids: 806
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5da138364db8ce0c98bc00f1 - mpe1: 5.245813429180369 - mpe2: 4.256959137941528 

###### training models for site 17/24: 5da1383b4db8ce0c98bc11ab ######
bssids diff: set()
number of bssids: 1481
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5da1383b4db8ce0c98bc11ab - mpe1: 10.82567138954808 - mpe2: 9.3313521268716 

###### training models for site 18/24: 5da138754db8ce0c98bca82f ######
bssids diff: set()
number of bssids: 1544
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5da138754db8ce0c98bca82f - mpe1: 7.890340591059056 - mpe2: 6.487445965623841 

###### training models for site 19/24: 5da138764db8ce0c98bcaa46 ######
bssids diff: set()
number of bssids: 1801
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5da138764db8ce0c98bcaa46 - mpe1: 7.22703212963642 - mpe2: 5.667574123357005 

###### training models for site 20/24: 5da1389e4db8ce0c98bd0547 ######
bssids diff: set()
number of bssids: 940
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5da1389e4db8ce0c98bd0547 - mpe1: 6.614876827903141 - mpe2: 5.426821637244474 

###### training models for site 21/24: 5da138b74db8ce0c98bd4774 ######
bssids diff: set()
number of bssids: 3111
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5da138b74db8ce0c98bd4774 - mpe1: 9.100678170051884 - mpe2: 7.3875904803297505 

###### training models for site 22/24: 5da958dd46f8266d0737457b ######
bssids diff: set()
number of bssids: 3285
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5da958dd46f8266d0737457b - mpe1: 8.089703698932786 - mpe2: 6.787173527005163 

###### training models for site 23/24: 5dbc1d84c1eb61796cf7c010 ######
bssids diff: set()
number of bssids: 4154
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5dbc1d84c1eb61796cf7c010 - mpe1: 8.475890215422787 - mpe2: 6.689940960802877 

###### training models for site 24/24: 5dc8cea7659e181adb076a3f ######
bssids diff: set()
number of bssids: 4613
------------------------training for fold 1/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 2/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 3/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 4/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


------------------------training for fold 5/5-------------------------


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


site: 5dc8cea7659e181adb076a3f - mpe1: 8.587488712802092 - mpe2: 7.02610037323501 

CPU times: user 3h 13min 34s, sys: 50min 15s, total: 4h 3min 49s
Wall time: 10h 16min 59s


In [17]:
output_path = f"{project_path}/output/dnn-ds1-{MAX_WIFI_FEATS}f-{MAX_LASTSEEN}lt"
if not os.path.exists(output_path): os.makedirs(output_path)

# save the oof preds
oof_preds = pd.concat(oofs_by_site, ignore_index=True)
oof_preds.to_csv(f"{output_path}/oof_preds.csv", index=False)

# save raw test preds
submission_raw = pd.concat(preds_by_site_raw, ignore_index=True)
submission_raw.to_csv(f"{output_path}/submission_raw.csv", index=False)

# save the cv metrics
oof_preds_agg = oof_preds.loc[:,["x","y","oof_x_agg","oof_y_agg"]].drop_duplicates()
mpe1 = mean_position_error(
    oof_preds.x, oof_preds.y, 
    oof_preds.oof_x, oof_preds.oof_y)
mpe2 = mean_position_error(
    oof_preds_agg.x, oof_preds_agg.y, 
    oof_preds_agg.oof_x_agg, oof_preds_agg.oof_y_agg)
cv_errors["overall"] = {"mpe1":mpe1, "mpe2":mpe2,}
print({"mpe1":mpe1, "mpe2":mpe2,})

with open(f'{output_path}/cv_errors.yml', 'w') as file:
    yaml.dump(cv_errors, file, default_flow_style=False)
    file.close()

# save the final submission
sample_sub = pd.read_csv(f"{project_path}/input/sample_submission.csv")
predictions = pd.concat(preds_by_site, ignore_index=True)
submission = pd.merge(sample_sub.loc[:,["site_path_timestamp"]], predictions, how="left")
assert (sample_sub.site_path_timestamp == submission.site_path_timestamp).all()
submission.to_csv(f"{output_path}/submission.csv", index=False)

{'mpe1': 8.232389541239618, 'mpe2': 6.618641604049759}


- Current best: {'mpe1': 8.132519384855483, 'mpe2': 6.742418387295461}
- Description: "[colab] dnn-ds1-40f with weight_norm, batch_size=2048 & learning_rate=1e-2"
- Wall time: 6h 6min 53s

***