<a href="https://colab.research.google.com/github/mixidota2/kaggle-indoor/blob/main/notebook/Indoor_001_baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Overview
- Baselineを構築するためのnotebook
- とりあえずデータ読んで最低限のsubをするだけを目的とする

In [1]:
!nvidia-smi

Wed Mar 24 11:14:17 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.56       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   46C    P8    10W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
import os
filename = "/root/.kaggle/kaggle.json"
os.makedirs(os.path.dirname(filename), exist_ok=True)
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
os.chmod(filename, 600)

In [3]:
!kaggle datasets download -d kokitanisaka/indoorunifiedwifids
!unzip indoorunifiedwifids.zip > /dev/null

Downloading indoorunifiedwifids.zip to /content
 99% 459M/463M [00:06<00:00, 55.1MB/s]
100% 463M/463M [00:06<00:00, 73.3MB/s]


In [4]:
!pip install memory_profiler > /dev/null
%load_ext memory_profiler

Collecting memory_profiler
  Downloading https://files.pythonhosted.org/packages/8f/fd/d92b3295657f8837e0177e7b48b32d6651436f0293af42b76d134c3bb489/memory_profiler-0.58.0.tar.gz
Building wheels for collected packages: memory-profiler
  Building wheel for memory-profiler (setup.py) ... [?25l[?25hdone
  Created wheel for memory-profiler: filename=memory_profiler-0.58.0-cp37-none-any.whl size=30180 sha256=9ad50c972ed56dc5ecad9256b2c15709df7d8ccbedec7cc8114377be4c1642e9
  Stored in directory: /root/.cache/pip/wheels/02/e4/0b/aaab481fc5dd2a4ea59e78bc7231bb6aae7635ca7ee79f8ae5
Successfully built memory-profiler
Installing collected packages: memory-profiler
Successfully installed memory-profiler-0.58.0


In [24]:
import os
import gc
import glob 
import copy
import pickle
import random

import pandas as pd
import numpy as np

import yaml
from tqdm import tqdm
from joblib import Parallel, delayed

import seaborn as sns
import matplotlib.pyplot as plt

import cv2

import scipy.stats as stats

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error
from sklearn.decomposition import TruncatedSVD

import torch
import torch.nn as nn 
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

tqdm.pandas(position=0, leave=True)

  from pandas import Panel


In [6]:
# consts
N_SPLITS = 10

SEED = 42

NUM_FEATS = 20

In [7]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
def get_timestamp():
    import time
    timestamp = ''
    for i, d in enumerate(time.localtime()):
        if i == 3:
            d += 8
        timestamp += str(d) + '-'
        if i == 4:
            break
    return timestamp[:-1]
def comp_metric(xhat, yhat, fhat, x, y, f):
    intermediate = np.sqrt((xhat-x)**2 + (yhat-y)**2) + 15 * np.abs(fhat-f)
#     intermediate = np.sqrt((xhat-x)**2 + (yhat-y)**2)
    return intermediate.sum()/xhat.shape[0]

## Preprocess

In [8]:
with open(f'train_all.pkl', 'rb') as f:
  data = pickle.load( f)
with open(f'test_all.pkl', 'rb') as f:
  test_data = pickle.load(f)

In [9]:
# count n features
BSSID_FEATS = [f'bssid_{i}' for i in range(NUM_FEATS)]
RSSI_FEATS  = [f'rssi_{i}' for i in range(NUM_FEATS)]

In [10]:
# get unique wifi bssids
wifi_bssids = []
for i in range(100):
    wifi_bssids.extend(data.iloc[:,i].values.tolist())
wifi_bssids = list(set(wifi_bssids))

wifi_bssids_size = len(wifi_bssids)
print(f'BSSID TYPES: {wifi_bssids_size}')

wifi_bssids_test = []
for i in range(100):
    wifi_bssids_test.extend(test_data.iloc[:,i].values.tolist())
wifi_bssids_test = list(set(wifi_bssids_test))

wifi_bssids_size = len(wifi_bssids_test)
print(f'BSSID TYPES: {wifi_bssids_size}')

wifi_bssids.extend(wifi_bssids_test)
wifi_bssids_size = len(wifi_bssids)

BSSID TYPES: 61206
BSSID TYPES: 33042


In [11]:
# preprocess

le = LabelEncoder()
le.fit(wifi_bssids)
le_site = LabelEncoder()
le_site.fit(data['site_id'])

ss = StandardScaler()
ss.fit(data.loc[:,RSSI_FEATS])

StandardScaler(copy=True, with_mean=True, with_std=True)

In [12]:
# apply transforms

data.loc[:,RSSI_FEATS] = ss.transform(data.loc[:,RSSI_FEATS])
for i in BSSID_FEATS:
    data.loc[:,i] = le.transform(data.loc[:,i])
    data.loc[:,i] = data.loc[:,i] + 1
    
data.loc[:, 'site_id'] = le_site.transform(data.loc[:, 'site_id'])

data.loc[:,RSSI_FEATS] = ss.transform(data.loc[:,RSSI_FEATS])

In [13]:
test_data.loc[:,RSSI_FEATS] = ss.transform(test_data.loc[:,RSSI_FEATS])
for i in BSSID_FEATS:
    test_data.loc[:,i] = le.transform(test_data.loc[:,i])
    test_data.loc[:,i] = test_data.loc[:,i] + 1
    
test_data.loc[:, 'site_id'] = le_site.transform(test_data.loc[:, 'site_id'])

test_data.loc[:,RSSI_FEATS] = ss.transform(test_data.loc[:,RSSI_FEATS])

In [14]:
site_count = len(data['site_id'].unique())
data.reset_index(drop=True, inplace=True)

In [15]:
seed_everything(SEED)

## Some EDA

In [None]:
plt.figure(figsize=(10,3))
max_iter = 10
for i, (name, group) in enumerate(data.groupby("path")):
    sns.lineplot(data=group, y=RSSI_FEATS[0], x=range(group.shape[0]))
    if i > max_iter:
        break
plt.figure(figsize=(10,3))
for i, (name, group) in enumerate(data.groupby("path")):
    sns.lineplot(data=group, y="x", x=range(group.shape[0]))
    if i > max_iter:
        break
plt.figure(figsize=(10,3))
for i, (name, group) in enumerate(data.groupby("path")):
    sns.lineplot(data=group, y="y", x=range(group.shape[0]))
    if i > max_iter:
        break

In [None]:
tmp = data.loc[:,RSSI_FEATS]
tmp.head(10)

## Modeling

In [16]:
class IndoorDataset(Dataset):
    def __init__(self, data, flag='TRAIN'):
        self.data = data
        self.flag = flag
    def __len__(self):
        return self.data.shape[0]
    def __getitem__(self, index):
        tmp_data = self.data.iloc[index]
        if self.flag == 'TRAIN':
            return {
                'BSSID_FEATS':tmp_data[BSSID_FEATS].values.astype(float),
                'RSSI_FEATS':tmp_data[RSSI_FEATS].values.astype(float),
                'site_id':tmp_data['site_id'].astype(int),
                'x':tmp_data['x'],
                'y':tmp_data['y'],
                'floor':tmp_data['floor'],
            }
        else:
            return {
                'BSSID_FEATS':tmp_data[BSSID_FEATS].values.astype(float),
                'RSSI_FEATS':tmp_data[RSSI_FEATS].values.astype(float),
                'site_id':tmp_data['site_id'].astype(int)
            }

In [17]:
class SimpleLSTM(nn.Module):
    def __init__(self, embedding_dim = 64, seq_len=20):
        super(SimpleLSTM, self).__init__()
        self.emb_BSSID_FEATS = nn.Embedding(wifi_bssids_size, embedding_dim)
        self.emb_site_id = nn.Embedding(site_count, 2)
        self.lstm1 = nn.LSTM(input_size=256,hidden_size=128, dropout=0.3, bidirectional=False)
        self.lstm2 = nn.LSTM(input_size=128,hidden_size=16, dropout=0.1, bidirectional=False)
        self.lr = nn.Linear(NUM_FEATS, NUM_FEATS * embedding_dim)
        self.lr1 = nn.Linear(2562, 256)
        self.lr_xy = nn.Linear(16, 2)
        self.lr_floor = nn.Linear(16, 1)
        self.batch_norm1 = nn.BatchNorm1d(NUM_FEATS)
        self.batch_norm2 = nn.BatchNorm1d(2562)
        self.batch_norm3 = nn.BatchNorm1d(1)
        self.dropout = nn.Dropout(0.3)
    def forward(self, x):
        
        x_bssid = self.emb_BSSID_FEATS(x['BSSID_FEATS'])
        x_bssid = torch.flatten(x_bssid, start_dim=-2)
        
        x_site_id = self.emb_site_id(x['site_id'])
        x_site_id = torch.flatten(x_site_id, start_dim=-1)
        x_rssi = self.batch_norm1(x['RSSI_FEATS'])
        x_rssi = self.lr(x_rssi)
        x_rssi = torch.relu(x_rssi)
        
        x = torch.cat([x_bssid, x_site_id, x_rssi], dim=-1)
        x = self.batch_norm2(x)
        x = self.dropout(x)
        x = torch.relu(self.lr1(x))

        x = x.unsqueeze(-2)
        x = self.batch_norm3(x)
        x = x.transpose(0, 1)
        x, _ = self.lstm1(x)
        x = x.transpose(0, 1)
        x = torch.relu(x)
        x = x.transpose(0, 1)
        x, _ = self.lstm2(x)
        x = x.transpose(0, 1)
        x = torch.relu(x)
        xy = self.lr_xy(x)
        floor = self.lr_floor(x)
        floor = torch.relu(floor)
        return xy.squeeze(-2), floor.squeeze(-2)

In [25]:
def evaluate(net, data_loader,  device='cuda'):
    net.to(device)
    net.eval()
    x_list = []
    y_list = []
    floor_list = []
    prexs_list = []
    preys_list = []
    prefloors_list = []
    for d in tqdm(data_loader, position=0):
        data_dict = {}
        data_dict['BSSID_FEATS'] = d['BSSID_FEATS'].to(device).long()
        data_dict['RSSI_FEATS'] = d['RSSI_FEATS'].to(device).float()
        data_dict['site_id'] = d['site_id'].to(device).long()
        x = d['x'].to(device).float()
        y = d['y'].to(device).float()
        floor = d['floor'].to(device).long()
        x_list.append(x.cpu().detach().numpy())
        y_list.append(y.cpu().detach().numpy())
        floor_list.append(floor.cpu().detach().numpy())
        xy, floor = net(data_dict)
        prexs_list.append(xy[:, 0].cpu().detach().numpy())
        preys_list.append(xy[:, 1].cpu().detach().numpy())
        prefloors_list.append(floor.squeeze().cpu().detach().numpy())
    x = np.concatenate(x_list)
    y = np.concatenate(y_list)
    floor = np.concatenate(floor_list)
    prexs = np.concatenate(prexs_list)
    preys =np.concatenate(preys_list)
    prefloors = np.concatenate(prefloors_list)
    eval_score = comp_metric(x, y, floor, prexs, preys, prefloors)
    return eval_score
def get_result(net, data_loader, device='cuda'):
    net.eval()
    net.to(device)
    prexs_list = []
    preys_list = []
    prefloors_list = []
    data_dict = {}
    for d in tqdm(data_loader, position=0):
        data_dict['BSSID_FEATS'] = d['BSSID_FEATS'].to(device).long()
        data_dict['RSSI_FEATS'] = d['RSSI_FEATS'].to(device).float()
        data_dict['site_id'] = d['site_id'].to(device).long()
        xy, floor = net(data_dict)
        prexs_list.append(xy[:, 0].cpu().detach().numpy())
        preys_list.append(xy[:, 1].cpu().detach().numpy())
        prefloors_list.append(floor.squeeze(-1).cpu().detach().numpy())
    prexs = np.concatenate(prexs_list)
    preys =np.concatenate(preys_list)
    prefloors = np.concatenate(prefloors_list)
    return prexs, preys, prefloors

## Training

In [26]:
%memit
score_df = pd.DataFrame()
oof = list()
predictions = list()

oof_x, oof_y, oof_f = np.zeros(data.shape[0]), np.zeros(data.shape[0]), np.zeros(data.shape[0])
preds_x, preds_y = 0, 0
preds_f_arr = np.zeros((test_data.shape[0], N_SPLITS))

for fold, (trn_idx, val_idx) in enumerate(StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED).split(data.loc[:, 'path'], data.loc[:, 'path'])):

    train_data = data.loc[trn_idx]
    valid_data = data.loc[val_idx]
    train_dataset = IndoorDataset(train_data)
    train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=1)
    valid_dataset = IndoorDataset(valid_data)
    valid_dataloader = DataLoader(valid_dataset, batch_size=128, shuffle=True, num_workers=1)
    test_dataset = IndoorDataset(test_data, 'TEST')
    test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=1)
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    net = SimpleLSTM()
    net = net.to(device)

    mse = nn.MSELoss()
    mse = mse.to(device)
    optim = torch.optim.Adam(net.parameters(), lr=5e-3)

    best_loss = 1000
    num_epochs = 1
    best_epoch = 0
    for epoch in range(num_epochs):
        net.train()
        losses = []
        pbar = tqdm(train_dataloader, position=0)
        for d in pbar:
            data_dict = {}
            data_dict['BSSID_FEATS'] = d['BSSID_FEATS'].to(device).long()
            data_dict['RSSI_FEATS'] = d['RSSI_FEATS'].to(device).float()
            data_dict['site_id'] = d['site_id'].to(device).long()
            x = d['x'].to(device).float().unsqueeze(-1)
            y = d['y'].to(device).float().unsqueeze(-1)
            floor = d['floor'].to(device).long()
            xy, floor = net(data_dict)
            label = torch.cat([x, y], dim=-1)
            loss = mse(xy, label)
            loss.backward()
            optim.step()
            optim.zero_grad()
            losses.append(loss.cpu().detach().numpy())
            pbar.set_description(f'loss:{np.mean(losses)}')
            data_dict['BSSID_FEATS'] = data_dict['BSSID_FEATS'].detach()
            data_dict['RSSI_FEATS'] = data_dict['RSSI_FEATS'].detach()
            data_dict['site_id'] = data_dict['site_id'].detach()
            del x, y, xy, floor, label, data_dict, loss, d
            gc.collect()
            torch.cuda.empty_cache()
        score = evaluate(net, valid_dataloader, device)
        if score < best_loss:
            best_loss = score
            best_epoch = epoch
            best_model = copy.deepcopy(net)
        if best_epoch + 2 < epoch:
            break
        print("*="*50)
        print(f"fold {fold} EPOCH {epoch}: mean position error {score}")
        print("*="*50)
    test_x, test_y, test_floor = get_result(best_model, test_dataloader, device)
    preds_f_arr[:,fold] = test_floor
    preds_x += test_x
    preds_y += test_y

peak memory: 8398.34 MiB, increment: 0.00 MiB


  "num_layers={}".format(dropout, num_layers))
  "num_layers={}".format(dropout, num_layers))
loss:10982.5810546875: 100%|██████████| 1815/1815 [05:44<00:00,  5.26it/s]
100%|██████████| 202/202 [00:35<00:00,  5.71it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
fold 0 EPOCH 0: mean position error 126.07799031000532
*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=


100%|██████████| 80/80 [00:11<00:00,  6.86it/s]
loss:10170.9765625: 100%|██████████| 1815/1815 [05:45<00:00,  5.25it/s]
100%|██████████| 202/202 [00:35<00:00,  5.74it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
fold 1 EPOCH 0: mean position error 118.67003843967201
*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=


100%|██████████| 80/80 [00:11<00:00,  6.85it/s]
loss:10603.224609375: 100%|██████████| 1815/1815 [05:45<00:00,  5.26it/s]
100%|██████████| 202/202 [00:35<00:00,  5.73it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
fold 2 EPOCH 0: mean position error 122.46265747987101
*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=


100%|██████████| 80/80 [00:11<00:00,  6.85it/s]
loss:10190.4921875: 100%|██████████| 1815/1815 [05:49<00:00,  5.19it/s]
100%|██████████| 202/202 [00:35<00:00,  5.66it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
fold 3 EPOCH 0: mean position error 118.55116443313786
*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=


100%|██████████| 80/80 [00:11<00:00,  6.85it/s]
loss:10223.34765625: 100%|██████████| 1815/1815 [05:50<00:00,  5.17it/s]
100%|██████████| 202/202 [00:35<00:00,  5.61it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
fold 4 EPOCH 0: mean position error 117.34193707633862
*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=


100%|██████████| 80/80 [00:11<00:00,  6.81it/s]
loss:10581.9951171875: 100%|██████████| 1815/1815 [05:52<00:00,  5.15it/s]
100%|██████████| 202/202 [00:35<00:00,  5.65it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
fold 5 EPOCH 0: mean position error 122.19395278053156
*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=


100%|██████████| 80/80 [00:11<00:00,  6.81it/s]
loss:9500.7900390625: 100%|██████████| 1815/1815 [05:51<00:00,  5.16it/s]
100%|██████████| 202/202 [00:36<00:00,  5.58it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
fold 6 EPOCH 0: mean position error 112.08666775494643
*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=


100%|██████████| 80/80 [00:11<00:00,  6.88it/s]
loss:9556.54296875: 100%|██████████| 1815/1815 [05:53<00:00,  5.13it/s]
100%|██████████| 202/202 [00:35<00:00,  5.62it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
fold 7 EPOCH 0: mean position error 113.90920383689036
*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=


100%|██████████| 80/80 [00:11<00:00,  6.83it/s]
loss:9896.306640625: 100%|██████████| 1815/1815 [05:52<00:00,  5.15it/s]
100%|██████████| 202/202 [00:36<00:00,  5.60it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
fold 8 EPOCH 0: mean position error 115.5342398808053
*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=


100%|██████████| 80/80 [00:11<00:00,  6.84it/s]
loss:10260.4462890625: 100%|██████████| 1815/1815 [05:52<00:00,  5.14it/s]
100%|██████████| 202/202 [00:36<00:00,  5.60it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
fold 9 EPOCH 0: mean position error 117.25276521237834
*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=


100%|██████████| 80/80 [00:11<00:00,  6.79it/s]


## Check Tensor Shapes

In [None]:
tmp = data.loc[:9,BSSID_FEATS]
tmp_ids = []
for i in range(20):
    tmp_ids.extend(tmp.iloc[:,i].values.tolist())
tmp_ids = len(list(set(tmp_ids)))
_emb = nn.Embedding(wifi_bssids_size, 64)
_res = _emb(torch.tensor(tmp.values.astype(float)).long())
_res = torch.flatten(_res, start_dim=-2)
#torch.tensor(tmp.values).size()

tmp2 = data.loc[:9,RSSI_FEATS]
tmp2_ids = []
for i in range(20):
    tmp2_ids.extend(tmp2.iloc[:,i].values.tolist())
tmp2_ids = len(list(set(tmp2_ids)))
lr = nn.Linear(20, 1280)
_res2 = lr(torch.tensor(tmp2.values.astype(float)).float())

tmp3 = data.loc[:9,"site_id"]
_emb2 = nn.Embedding(site_count, 2)
_res3 = _emb2(torch.tensor(tmp3.values.astype(float)).long())
_res3 = torch.flatten(_res3, start_dim=-1)

In [None]:
_res.size(), _res2.size(), _res3.size()

In [None]:
_all = torch.cat([_res, _res2, _res3], dim=-1)
_all = nn.Linear(2562, 256)(_all)

In [None]:
_all.size()

In [None]:
_un = _all.unsqueeze(-2)
_un.size()

In [None]:
_tr = _un.transpose(0, 1)
_tr.size()

In [None]:
_lstm1 = nn.LSTM(input_size=256,hidden_size=128, dropout=0.3, bidirectional=False)
_ls1, _ = _lstm1(_tr)
_ls1.size()

In [None]:
_xy = _ls1.transpose(0, 1)
_xy = nn.Linear(128,2)(_xy)
_xy.size()

In [None]:
_xy.squeeze(-2).size()