## Library imports

In [1]:
# basic imports
import os
import gc
import math
import glob
import random
import itertools
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

# DL library imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau
from  torch.cuda.amp import autocast, GradScaler

# metrics calculation
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GroupKFold

# basic plotting library
import matplotlib.pyplot as plt

# interactive plots
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import warnings  
warnings.filterwarnings('ignore')

## Config parameters

In [2]:
class CFG:
    # pipeline parameters
    SEED        = 42
    TRAIN       = True
    LR_FIND     = False
    TEST        = False
    N_FOLDS     = 5 
    N_EPOCHS    = 27 
    TEST_BATCH_SIZE  = 32
    TRAIN_BATCH_SIZE = 16
    NUM_WORKERS      = 4
    DATA_FRAC        = 1.0
    FOLD_TO_TRAIN    = [0] # , 1, 2, 3, 4

    # model parameters
    MODEL_ARCH  = 'tf_efficientnet_b4_ns'
    MODEL_NAME  = 'eff_b4_v5'
    WGT_PATH    = ''
    WGT_MODEL   = ''
    
    # scheduler variables
    MAX_LR    = 1e-3
    MIN_LR    = 1e-6
    SCHEDULER = 'CosineAnnealingLR'  # ['ReduceLROnPlateau', 'OneCycleLR', CosineAnnealingWarmRestarts']
    T_0       = 10   # CosineAnnealingWarmRestarts
    T_MAX     = 2.5    # CosineAnnealingLR

    # optimizer variables
    OPTIMIZER     = 'Adam'
    WEIGHT_DECAY  = 1e-6
    GRD_ACC_STEPS = 1
    MAX_GRD_NORM  = 1000

In [3]:
floor_map = {"B2": -2, "B1": -1, "F1": 0, "F2": 1, "F3": 2, "F4": 3, "F5": 4, "F6": 5, "F7": 6, "F8": 7, "F9": 8,
             "1F": 0, "2F": 1, "3F": 2, "4F": 3, "5F": 4, "6F": 5, "7F": 6, "8F": 7, "9F": 8}

minCount = 1
rssiFillerValue = -999.0
dtFillerValue   = 1000.0
freqFillerValue = 0
outputDir = 'referencePublicNotebooks/wiFiFeatures'
sampleCsvPath = 'sample_submission.csv'


buildingsList = glob.glob(f"{outputDir}/*.csv")
print([x.split('/')[-1] for x in buildingsList])

['5a0546857ecc773753327266_train.csv']


## Helper functions

In [4]:
def find_no_of_trainable_params(model):
    total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return total_trainable_params

In [5]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

set_seed(CFG.SEED)

## Dataset class

In [14]:
class wiFiFeaturesDataset(Dataset):
    def __init__(self, X_data, y_data, transform=None):
        self.X_data = X_data
        self.y_data = y_data
        
    def __getitem__(self, index):
        x = torch.from_numpy(self.X_data[index].astype(np.float32))
        y = torch.from_numpy(self.y_data[index].astype(np.float32))
        return x,y
    
    def __len__ (self):
        return len(self.X_data)

## MLP Model class

In [16]:
class wiFiFeaturesMLPModel(nn.Module):
    def __init__(self, n_input, n_output):
        super().__init__()
        self.lin1 = nn.Linear(in_features=n_input, out_features=512)
        self.lin2 = nn.Linear(in_features=512,     out_features=32)
        self.lin3 = nn.Linear(in_features=32,      out_features= 3)
        self.bn1 = nn.BatchNorm1d(512)
        self.bn2 = nn.BatchNorm1d(32)
        self.drops = nn.Dropout(0.3)        

    def forward(self, x_input):
        x = F.relu(self.lin1(x))
        x = self.drops(x)
        x = self.bn1(x)
        x = F.relu(self.lin2(x))
        x = self.drops(x)
        x = self.bn3(x)
        x = self.lin3(x)
        return x

## Read input data

In [7]:
building = buildingsList[0]

In [8]:
# read building data 
data = pd.read_csv(building)
# use fraction if needed
if CFG.DATA_FRAC < 1:
    data = data.sample(frac=CFG.DATA_FRAC).reset_index(drop=True)
#data.head(3)
print(data.shape)

# separate into features and target variables
X = data.iloc[:,0:-4].values
y = data.iloc[:,-4:-1].values
groups = data.iloc[:,-1].values
print(X.shape, y.shape, groups.shape)
del data
gc.collect()

(9296, 10174)
(9296, 10170) (9296, 3) (9296,)


0

In [9]:
folds = GroupKFold(n_splits=CFG.N_FOLDS)
for i_fold, (train_idx, valid_idx) in enumerate(folds.split(X=X, y=y[:,0],groups=groups)):
    break
print(train_idx.shape, valid_idx.shape)

(7437,) (1859,)


In [10]:
# splitting into train and validataion sets
X_train, y_train = X[train_idx], y[train_idx]
X_valid, y_valid = X[valid_idx], y[valid_idx]

In [11]:
# normalize input
print(f"Before stdscaler : train_mean{X_train.mean(), X_train.std(), X_valid.mean(), X_valid.std()}")
stdScaler = StandardScaler()
X_train = stdScaler.fit_transform(X_train)
X_valid = stdScaler.transform(X_valid)
print(f"After stdscaler : train_mean{X_train.mean(), X_train.std(), X_valid.mean(), X_valid.std()}")

Before stdscaler : train_mean(137.06707880060725, 1136.4634492114315, 127.1956956176943, 1116.6455077288429)
After stdscaler : train_mean(-1.4644823927370838e-17, 0.9930436512134494, 0.08787075280251733, 19.70137847102953)


In [12]:
X_train.shape, y_train.shape, X_valid.shape, y_valid.shape

((7437, 10170), (7437, 3), (1859, 10170), (1859, 3))

In [13]:
dataset_train = wiFiFeaturesDataset(X_train, y_train)
dataset_valid = wiFiFeaturesDataset(X_valid, y_valid)            
dataloader_train = DataLoader(dataset_train, batch_size= CFG.TRAIN_BATCH_SIZE, shuffle=True,
                          num_workers=CFG.NUM_WORKERS, pin_memory=False, drop_last=False)
dataloader_valid = DataLoader(dataset_valid, batch_size= CFG.TEST_BATCH_SIZE, shuffle=True,
                          num_workers=CFG.NUM_WORKERS, pin_memory=False, drop_last=False)

In [None]:
# the metric used in this competition
def comp_metric(xhat, yhat, fhat, x, y, f):
    intermediate = torch.sqrt(torch.square(torch.subtract(xhat,x)) + 
                              torch.square(torch.subtract(yhat,y)) +
                              15 * np.abs(fhat-f)
    return intermediate.sum()/xhat.shape[0]
