In [None]:
packages = [
    '../input/indoor-locationnavigation-2021/indoor-location-competition-20-master/indoor-location-competition-20-master'
]
import sys
for pth in packages:
    sys.path.append(pth)

In [None]:
from io_f import read_data_file 

In [None]:
def all_files(f_list,directory):
    for info in walk(directory):
        dirpath,dirnames,filenames =  info
        for fname in filenames:
            f_list.append(dirpath+'/'+fname)
        for dname in dirnames:
            all_files(f_list,dirpath+'/'+dname)
        return f_list
        

In [None]:
import pandas as pd
import numpy as np
import os

In [None]:
train = pd.read_parquet('../input/indoor-prediction-data-manager/train_data.parquet')
test = pd.read_parquet('../input/indoor-prediction-data-manager/test_data.parquet')

In [None]:
unique_bssid = pd.read_csv('../input/indoor-prediction-data-manager/unique_bssid.csv') 

In [None]:
train.x = train.x.astype('float')
train.y = train.y.astype('float')
train.f = train.f.astype('int')

In [None]:
train.td = train.td.astype('int')
train = train[train.td<2000]

In [None]:
bssids = {}
for i,val in enumerate(unique_bssid['0'].to_numpy()):
    bssids[val] = i

In [None]:
submission = pd.read_csv('../input/indoor-location-navigation/sample_submission.csv')

In [None]:
!pip install tf-models-official

In [None]:
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import GroupKFold,KFold
import numpy as np
from time import time
from contextlib import contextmanager
from psutil import virtual_memory, Process

In [None]:
from official.nlp.transformer import model_utils
from official.nlp.transformer import transformer

In [None]:
import gc
from tqdm.notebook import tqdm

In [None]:
def mpe_error(pred,tar):
    xy = np.sqrt(np.square(pred[:,0]-tar[:,0])+np.square(pred[:,1]-tar[:,1]))
    f = 15*np.abs(pred[:,2]-tar[:,2])
    return (xy+f).mean()

def mpe_loss(tar,pred):
    tar = tf.expand_dims(tar,1)
    xy = tf.sqrt(tf.square(pred[:,:,0]-tar[:,:,0])+tf.square(pred[:,:,1]-tar[:,:,1]))
    f = 15*tf.abs(pred[:,:,2]-tar[:,:,2])
    return tf.reduce_mean(xy+f)

In [None]:
@contextmanager
def timer():
    start = time()
    yield
    end = time()
    m,s = (end-start)//60, (end-start)%60
    print(f"Training ended in {m} min {s} sec. Current Memory Usage: {virtual_memory().percent} \n")
    

In [None]:
class ChangeModes(keras.callbacks.Callback):
    def __init__(self,model,features,targ,max_f,min_f):
        self.model = model
        self.fea = features
        self.tar = np.expand_dims(targ,1)
        self.min_f = min_f
        self.max_f = max_f
    def on_train_begin(self, logs=None):
        self.model.training()
    def on_test_begin(self, logs=None):
        self.model.testing()
    def on_epoch_end(self,epoch,logs=None):
        pred = model.predict(self.fea)*(self.max_f-self.min_f)+self.min_f
        xy = np.sqrt(np.square(pred[:,:,0]-self.tar[:,:,0])+np.square(pred[:,:,1]-self.tar[:,:,1]))
        f = (15*np.abs(pred[:,:,2]-self.tar[:,:,2]))
        print(f"Epoch:{epoch},Est_lbscore:{(xy+f).mean()} validation loss: {logs['val_mpe_loss']}, building_loss={f.mean()}, location_loss = {xy.mean()}")

In [None]:
class IndoorPrediction(keras.Model):
    def __init__(self,inp_shape,params,emb_length,emb_dim_bssid, emb_dim_rssi,bssid_dict):
        super(IndoorPrediction, self).__init__()
        self.train=True
        self.bssid_dict = bssid_dict
        self.inp_shape = inp_shape
        self.embedding_bssid = keras.layers.Embedding(emb_length,emb_dim_bssid)
        self.embedding_rssi = keras.layers.Embedding(1000,emb_dim_rssi) # (0-999) Most of these are not used.
        self.encoder = transformer.EncoderStack(params)
        self.encoder.build(inp_shape)
        
        self.fc = keras.layers.Dense(3,activation='sigmoid')
        
    def training(self):
        self.train =True
    def testing(self):
        self.train =False
    def buildModel(self):
        self.build(self.inp_shape)
    def process_data(self,i):
        bs,rs = i[:,:100],i[:,100:]*-1
        return bs,rs
    def call(self,i):
        bs,rs = self.process_data(i)
        attention_bias = model_utils.get_padding_bias(bs)
        x = tf.concat([self.embedding_bssid(bs),self.embedding_rssi(rs)],2)
        return self.fc(self.encoder.call(x,attention_bias,None,self.train)[:,-50:-1,:])

In [None]:
batch_size=64
seq_len = 100
emb_dim_bssid= 36
emb_dim_rssi = 18

In [None]:
params = {
    "hidden_size": emb_dim_bssid+emb_dim_rssi,
#     Attention-layer parameters
    "num_hidden_layers":8,
    "attention_dropout":0.0,
    "layer_postprocess_dropout": 0.0,
    "num_heads": 6,
#     Feed-Forward parameters
    "filter_size": 64,
    "relu_dropout": 0.0
}

In [None]:
fea,tar = train.iloc[:,:200].values,train.iloc[:,200:203].values
test_fea = test.iloc[:,:200].values

In [None]:
max_f,min_f = tar.max(0),tar.min(0)
tar= (tar- min_f)/(max_f-min_f) 

In [None]:
splits=5
predict = np.zeros((test_fea.shape[0],3))
for i in range(splits):
        mask = np.random.rand(len(tar))<0.8
        X_train, Y_train = fea[mask], tar[mask]
        X_valid, Y_valid = fea[~mask], tar[~mask]
        print(f"{i}-fold model training starting...");
        model = IndoorPrediction((batch_size,seq_len), params,len(bssids),emb_dim_bssid,emb_dim_rssi,bssids)
        model.compile(optimizer = tf.keras.optimizers.Adam(
                        learning_rate=5e-4, beta_1=0.9, 
                        beta_2=0.999, epsilon=1e-07, 
                        amsgrad=False,name='Adam'), loss=mpe_loss,metrics=[mpe_loss])
        earlystop = keras.callbacks.EarlyStopping(monitor='val_loss',patience=8)
        changemode = ChangeModes(model,X_valid,Y_valid,max_f,min_f)
        save_point = 'checkpoint.ckpt'
        model_checkpoint = keras.callbacks.ModelCheckpoint(save_point,monitor='val_loss',mode='min',
                                                           verbose=1,save_best_only=True,save_weights_only=True)
        reducelr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=0, mode='min')
        with timer():
            history = model.fit(X_train,Y_train,batch_size=batch_size,
                                validation_data = (X_valid,Y_valid),epochs=30,
                                callbacks=[earlystop,reducelr,changemode,model_checkpoint],verbose=0)
        model.load_weights(save_point)
        Y_predict = model.predict(test_fea)[:,-1,:]*(max_f-min_f)+min_f
        predict += Y_predict
        gc.collect()
predict /=splits

validation with 19 entries 1 epoch:  
Epoch:0, validation loss: 0.08501642197370529, building_loss=21.83549779882712, location_loss = 169.73047401256605

validation with 9 entries 1 epoch:  
loss: 0.4561 - mpe_loss: 0.4561 Epoch:0,Est_lbscore:188.62509873283773 validation loss: 0.10268882662057877, building_loss=21.53930707612264, location_loss = 167.0857916567151

validation with 29 entries 1 epoch:  
loss: 0.4266 - mpe_loss: 0.4266 - val_loss: 0.1043 - val_mpe_loss: 0.1049
Epoch:0,Est_lbscore:193.9539121881643 validation loss: 0.10486969351768494, building_loss=22.04513628422682, location_loss = 171.90877590393754

In [None]:
sub_other = pd.read_csv('../input/simple-99-accurate-floor-model/submission.csv')

In [None]:
submission['floor'] = sub_other['floor']
submission['x'] = predict[:,0]
submission['y'] = predict[:,1]

In [None]:
submission['floor'] = submission['floor'].astype('int32')

In [None]:
submission.to_csv('submission.csv',index=False)