<a href="https://colab.research.google.com/github/kyochanpy/Kaggle_Indoor_Location_Navigation/blob/main/note_books/accurate_floor_blstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install tensorflow_addons



In [None]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from pathlib import Path
import glob
import pickle
import random
import os

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow.keras.layers as L
import tensorflow.keras.models as M
import tensorflow.keras.backend as K
import tensorflow_addons as tfa
from tensorflow_addons.layers import WeightNormalization
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping

Kouki's awsome code for preprocessing, is hidden below;

In [None]:
# options

N_SPLITS = 5

SEED = 2021

NUM_FEATS = 20 # number of features that we use. there are 100 feats but we don't need to use all of them

base_path = '/content/drive/MyDrive'

def set_seed(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    session_conf = tf.compat.v1.ConfigProto(
        intra_op_parallelism_threads=1,
        inter_op_parallelism_threads=1
    )
    sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
    tf.compat.v1.keras.backend.set_session(sess)
    
def comp_metric(xhat, yhat, fhat, x, y, f):
    intermediate = np.sqrt(np.power(xhat-x, 2) + np.power(yhat-y, 2)) + 15 * np.abs(fhat-f)
    return intermediate.sum()/xhat.shape[0]

feature_dir = f"{base_path}/unified_wifi_0"
train_files = sorted(glob.glob(os.path.join(feature_dir, '*_train.csv')))
test_files = sorted(glob.glob(os.path.join(feature_dir, '*_test.csv')))
subm = pd.read_csv('/content/drive/MyDrive/sample_submission.csv', index_col=0)

with open(f'{feature_dir}/train_all_pkl', 'rb') as f:
  data = pickle.load( f)

with open(f'{feature_dir}/test_all_pkl', 'rb') as f:
  test_data = pickle.load(f)


# training target features

BSSID_FEATS = [f'bssid_{i}' for i in range(NUM_FEATS)]
RSSI_FEATS  = [f'rssi_{i}' for i in range(NUM_FEATS)]


# get numbers of bssids to embed them in a layer

wifi_bssids = []
for i in range(100):
    wifi_bssids.extend(data.iloc[:,i].values.tolist())
wifi_bssids = list(set(wifi_bssids))

wifi_bssids_size = len(wifi_bssids)
print(f'BSSID TYPES: {wifi_bssids_size}')

wifi_bssids_test = []
for i in range(100):
    wifi_bssids_test.extend(test_data.iloc[:,i].values.tolist())
wifi_bssids_test = list(set(wifi_bssids_test))

wifi_bssids_size = len(wifi_bssids_test)
print(f'BSSID TYPES: {wifi_bssids_size}')

wifi_bssids.extend(wifi_bssids_test)
wifi_bssids_size = len(wifi_bssids)

# preprocess

le = LabelEncoder()
le.fit(wifi_bssids)
le_site = LabelEncoder()
le_site.fit(data['site_id'])

ss = StandardScaler()
ss.fit(data.loc[:,RSSI_FEATS])


data.loc[:,RSSI_FEATS] = ss.transform(data.loc[:,RSSI_FEATS])
for i in BSSID_FEATS:
    data.loc[:,i] = le.transform(data.loc[:,i])
    data.loc[:,i] = data.loc[:,i] + 1
    
data.loc[:, 'site_id'] = le_site.transform(data.loc[:, 'site_id'])

data.loc[:,RSSI_FEATS] = ss.transform(data.loc[:,RSSI_FEATS])

test_data.loc[:,RSSI_FEATS] = ss.transform(test_data.loc[:,RSSI_FEATS])
for i in BSSID_FEATS:
    test_data.loc[:,i] = le.transform(test_data.loc[:,i])
    test_data.loc[:,i] = test_data.loc[:,i] + 1
    
test_data.loc[:, 'site_id'] = le_site.transform(test_data.loc[:, 'site_id'])

test_data.loc[:,RSSI_FEATS] = ss.transform(test_data.loc[:,RSSI_FEATS])


site_count = len(data['site_id'].unique())
data.reset_index(drop=True, inplace=True)


BSSID TYPES: 61143
BSSID TYPES: 33003


In [None]:
#FLOOR
def create_fmodel(input_data):

    # bssid feats
    input_dim = input_data[0].shape[1]

    input_embd_layer = L.Input(shape=(input_dim,))
    x1 = L.Embedding(wifi_bssids_size, 64)(input_embd_layer)
    x1 = L.Flatten()(x1)

    # rssi feats
    input_dim = input_data[1].shape[1]

    input_layer = L.Input(input_dim, )
    x2 = L.BatchNormalization()(input_layer)
    x2 = L.Dense(NUM_FEATS * 64, activation='relu')(x2)

    # site
    input_site_layer = L.Input(shape=(1,))
    x3 = L.Embedding(site_count, 2)(input_site_layer)
    x3 = L.Flatten()(x3)


    # main stream
    x = L.Concatenate(axis=1)([x1, x3, x2])


    x = L.Reshape((1, -1))(x)
    x = L.BatchNormalization()(x)
    mod1=L.LSTM(256, dropout=0.4, recurrent_dropout=0.3, return_sequences=True, activation='tanh')
    x = L.Bidirectional(mod1)(x)
    x = L.Bidirectional(L.LSTM(32, dropout=0.4, return_sequences=False, activation='relu'))(x)
    x = L.BatchNormalization()(x)
    x = L.Dense(16, activation='tanh')(x) 
    
    output_layer_1 = L.Dense(11, activation='softmax', name='floor')(x) 

    model = M.Model([input_embd_layer, input_layer, input_site_layer], 
                    [output_layer_1])

    model.compile(optimizer=tf.optimizers.Adam(lr=0.001),
                  loss=tf.keras.losses.CategoricalCrossentropy(), metrics=['mse','accuracy'])

    return model




In [None]:
data.index=data['path']

In [None]:
#OneHot The floor
one_hot=pd.get_dummies(data['floor'])

In [None]:
#500 Random, totally unseen paths
val_p_ind=pd.DataFrame(data.path.unique()).sample(n=500,random_state=1).values.reshape((-1)) #100%/500samples so accuracy of preicision estimate should be around 0.2 % so +/- 0,1 % 
t_idx = data.path.unique().tolist() 
t_idx=[ a for a in t_idx if a not in val_p_ind.tolist()]

train_data=data.loc[t_idx]
X_ass_val= data.loc[val_p_ind]
len(t_idx),len(val_p_ind)

(10352, 500)

In [None]:
#check there is no cross contamination of the validation data
train_data[train_data['path']==val_p_ind[5]]

Unnamed: 0_level_0,bssid_0,bssid_1,bssid_2,bssid_3,bssid_4,bssid_5,bssid_6,bssid_7,bssid_8,bssid_9,bssid_10,bssid_11,bssid_12,bssid_13,bssid_14,bssid_15,bssid_16,bssid_17,bssid_18,bssid_19,bssid_20,bssid_21,bssid_22,bssid_23,bssid_24,bssid_25,bssid_26,bssid_27,bssid_28,bssid_29,bssid_30,bssid_31,bssid_32,bssid_33,bssid_34,bssid_35,bssid_36,bssid_37,bssid_38,bssid_39,...,rssi_65,rssi_66,rssi_67,rssi_68,rssi_69,rssi_70,rssi_71,rssi_72,rssi_73,rssi_74,rssi_75,rssi_76,rssi_77,rssi_78,rssi_79,rssi_80,rssi_81,rssi_82,rssi_83,rssi_84,rssi_85,rssi_86,rssi_87,rssi_88,rssi_89,rssi_90,rssi_91,rssi_92,rssi_93,rssi_94,rssi_95,rssi_96,rssi_97,rssi_98,rssi_99,x,y,floor,path,site_id
path,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1


In [10]:
y_trainf = one_hot.loc[t_idx, :]
y_validf = one_hot.loc[val_p_ind, :]
X_train = train_data.loc[:, BSSID_FEATS + RSSI_FEATS + ['site_id']]
X_valid = X_ass_val.loc[:, BSSID_FEATS + RSSI_FEATS + ['site_id']]
fmodel = create_fmodel([X_train.loc[:,BSSID_FEATS], X_train.loc[:,RSSI_FEATS], X_train.loc[:,'site_id']])
#     model = multi_gpu_model(model, 1)
fmodel.fit([X_train.loc[:,BSSID_FEATS], X_train.loc[:,RSSI_FEATS], X_train.loc[:,'site_id']], y_trainf, 
            validation_data=([X_valid.loc[:,BSSID_FEATS], X_valid.loc[:,RSSI_FEATS], X_valid.loc[:,'site_id']], y_validf), 
            batch_size=128, epochs=100
             ,shuffle=True
            ,callbacks=[
            ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, min_delta=1e-4, mode='min')
            , ModelCheckpoint(f'{base_path}/RNN_{SEED}_.hdf5', monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=True, mode='min')
            , EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=5, mode='min', baseline=None, restore_best_weights=True)
        ]
   )

fmodel.load_weights(f'{base_path}/RNN_{SEED}_.hdf5')
fvalid = fmodel.predict([X_ass_val.loc[:,BSSID_FEATS], X_ass_val.loc[:,RSSI_FEATS], X_ass_val.loc[:,'site_id']])#minus two is make the interval [-2:8] again
fvalid = np.argmax(fvalid, axis=1)-2
# ass_val_arr[:, fold] = fvalid

pred = fmodel.predict([test_data.loc[:,BSSID_FEATS], test_data.loc[:,RSSI_FEATS], test_data.loc[:,'site_id']]) # test_data.iloc[:, :-1])
pred =np.argmax(pred, axis=1)-2#minus two is make the interval [-2:8] again
# preds_f_arr[:, fold] = pred

ass_val_floors=fvalid
floors=pred
                                                                
accuracy_score(X_ass_val['floor'], ass_val_floors)#second validation, checks the argmax and shifting

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100

Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 7/100
Epoch 8/100


0.9965006729475101

## Error Analysis 

In [12]:
#Error Analysis - how many paths i got wrong and how many times 
X_ass_val['wrong']=(X_ass_val['floor']- ass_val_floors)!=0
wrongs= X_ass_val[X_ass_val['wrong']==True]
rights= X_ass_val[X_ass_val['wrong']==False]
wrongs.shape, wrongs['path'].unique().shape

((39, 206), (22,))

**Only one floor per path right** ? 
well i think this is a given as the original data is presented as in paths within the floors file;
I double checked this assumptionto be true.

So i check if the ones i got wrong i ever got right..
I check out the number of times i got that path right

In [13]:
#create tuple
#(Number of times predicted correctly left  vs numebr of times corrected incorecctly right)
[(rights[rights['path']==p].shape[0],wrongs[wrongs['path']==p].shape[0]) for p in  wrongs['path'].unique() if p in rights['path'].unique()]

[(195, 2),
 (74, 1),
 (22, 2),
 (30, 1),
 (10, 2),
 (11, 1),
 (19, 1),
 (2, 1),
 (27, 3),
 (9, 2),
 (48, 1),
 (19, 1),
 (10, 2),
 (11, 1),
 (16, 3),
 (69, 1),
 (6, 1),
 (89, 1),
 (42, 1),
 (3, 8)]

As you can see if you unhide the above result, in taking the most frequent column i would avoid many erorrs

In [14]:
#re-elaboration taking the most frequent
X_ass_val['p_floor']=ass_val_floors
X_ass_val=X_ass_val.reset_index(drop=True)
X_ass_val

def mode(a):
    '''returns the mode of the group'''
    return( a['p_floor'].value_counts().head(1).reset_index()['index'].values[0])

df = pd.DataFrame()    
# df['path']=X_ass_val.groupby('path').apply(modee1)
df['blended_floor_pred']=X_ass_val.groupby('path').apply(mode)

Checking the post processing Bump

In [15]:
X_ass_val=X_ass_val.merge(df, how='left', on='path')
accuracy_score(X_ass_val['floor'], X_ass_val['blended_floor_pred'])

0.9987438313144908

I can round it up (defectivly) to 99,80%. This Rounding is because of the test size i have a scale unit of 0,2%. 

Satisfactory, so do it on the test data too and submit.

In [16]:
test_data['path']=test_data['site_path_timestamp'].str.split(pat='_', n=- 1, expand=True)[1]
(test_data['site_path_timestamp'].str.split(pat='_', n=- 1, expand=True)[0]+test_data['site_path_timestamp'].str.split(pat='_', n=- 1, expand=True)[1]).unique().shape

test_data['p_floor']=pred
test_data
#re-elaboration taking the median
def modee1(a):
    return (a['path'].unique())
def modee2(a):
    return( a['p_floor'].value_counts().head(1).reset_index()['index'].values[0])

dft = pd.DataFrame()    
# df['path']=X_ass_val.groupby('path').apply(modee1)
dft['my_b_floor_pred']=test_data.groupby('path').apply(modee2)
test_data=test_data.merge(dft, how='left', on='path')


In [20]:
test_data

Unnamed: 0,bssid_0,bssid_1,bssid_2,bssid_3,bssid_4,bssid_5,bssid_6,bssid_7,bssid_8,bssid_9,bssid_10,bssid_11,bssid_12,bssid_13,bssid_14,bssid_15,bssid_16,bssid_17,bssid_18,bssid_19,bssid_20,bssid_21,bssid_22,bssid_23,bssid_24,bssid_25,bssid_26,bssid_27,bssid_28,bssid_29,bssid_30,bssid_31,bssid_32,bssid_33,bssid_34,bssid_35,bssid_36,bssid_37,bssid_38,bssid_39,...,rssi_65,rssi_66,rssi_67,rssi_68,rssi_69,rssi_70,rssi_71,rssi_72,rssi_73,rssi_74,rssi_75,rssi_76,rssi_77,rssi_78,rssi_79,rssi_80,rssi_81,rssi_82,rssi_83,rssi_84,rssi_85,rssi_86,rssi_87,rssi_88,rssi_89,rssi_90,rssi_91,rssi_92,rssi_93,rssi_94,rssi_95,rssi_96,rssi_97,rssi_98,rssi_99,site_path_timestamp,site_id,path,p_floor,my_b_floor_pred
0,57088,28808,12026,42651,605,42637,25970,31765,51717,21929,4783,51525,43242,47033,52538,45449,47665,47991,55579,56031,f2eb513d4c87747d0e4bc8c3bbf12951b3819678,f64c13fd10a07bca1bf2b7bd7a80630632ce62c9,000840e5c600de293cea57f13326f273c86c3988,6915ad24a2edf8047f749233e19e9853f5dc17fd,15d53b7189ffbd7c6010c388a9ccea417d4f28ee,de53ffe7e3c71c9ed5c845fa50e0521efa5f3685,cbc6658be86ed0a5f49e670dcdb61924eee2ba0a,0c75aad30012f215ae9377700feb44ee1ce42b88,39e2a4fb550a6b4a0feb5f0ff7b07754b1e27eaa,9ea330be4787b838d931c30f76aa9eb4ab69367e,01da12eb27c6e37d03d691c31b40591a724db4a2,326e159e0dbace167726287290620e11367bb15a,15705627b6d4a0995188c4a97eb3bb604abcc401,85e6e3acc4f0005b00f70931e3f268bdf650956c,44632b2065e713e44db3479c9cea158b0d7d7b02,69b30a314f33a8bafe26c9543bac3026b6171e18,2de2c0191117dda80de6280c787d8482899b2b2d,6a620eb3b709557db95591f6d22ba6a06754f606,1f6f9a43ff74644160d47d0196c72ca0b3809991,defb1317debff80bbec8fcdb53132edf3988ba93,...,-58,-58,-58,-58,-58,-59,-59,-59,-59,-59,-59,-59,-59,-59,-59,-59,-59,-59,-60,-60,-60,-60,-60,-60,-60,-60,-60,-60,-60,-61,-61,-61,-61,-61,-61,5a0546857ecc773753327266_046cfa46be49fc1083481...,0,046cfa46be49fc10834815c6,0,0
1,7013,45449,51525,4783,51717,47033,43242,42637,25970,28808,21929,42651,48743,55579,14613,5404,47991,12026,38179,20969,7411227db2cf06ed13a7f7c1edd038e1b544ede2,68127b819a86c95b0847a170ce53a91702f67969,57e252d0e2004b53312413c9c9283fa1927cffef,3bb3d7d8cec5d6122d7f39b1121d1239fac5cd54,8464ea586ee5479e1250f938d7c01e9bc68cefe8,f64c13fd10a07bca1bf2b7bd7a80630632ce62c9,6915ad24a2edf8047f749233e19e9853f5dc17fd,de53ffe7e3c71c9ed5c845fa50e0521efa5f3685,15d53b7189ffbd7c6010c388a9ccea417d4f28ee,000840e5c600de293cea57f13326f273c86c3988,6a620eb3b709557db95591f6d22ba6a06754f606,207781229d24ce81fcf1acf4edb187e9098ffeb4,c729e2e4f5a2888583cfebcd98b3178023f58b8e,dbd15d19abe3f9062b14bc403a499a66aa46cf26,1d52ac70bfa8eae028e7b0d05ef5ca2bfcf513e9,fe3461438b7a21c85a42ffd76030ece52e11dd7d,d2402d8ef8ed4ba4933a76cce532c8774f589d32,4ec476a535783b1129828738a517f58f8c275ae0,ea950fdfe174caa330ad082bc5099fda7bc8a236,61527abbf490712477ae1d79dfe928e2be45402a,...,-59,-59,-59,-59,-59,-60,-60,-60,-60,-60,-60,-60,-60,-60,-60,-60,-60,-60,-60,-61,-61,-61,-61,-61,-61,-61,-61,-61,-61,-61,-61,-61,-61,-61,-62,5a0546857ecc773753327266_046cfa46be49fc1083481...,0,046cfa46be49fc10834815c6,0,0
2,42651,25970,28808,51717,7013,21929,42637,43242,45449,51525,4783,25050,47033,27923,26533,14613,14369,5404,34248,53810,9f6570acae53f6cdb4e7713fb24e3085c228dffa,c55c9a0ed49b5fd4be47a865f70945690139cd8e,71177bb213b665eab29c7f0c13810fb43e981127,6915ad24a2edf8047f749233e19e9853f5dc17fd,de53ffe7e3c71c9ed5c845fa50e0521efa5f3685,f64c13fd10a07bca1bf2b7bd7a80630632ce62c9,15d53b7189ffbd7c6010c388a9ccea417d4f28ee,000840e5c600de293cea57f13326f273c86c3988,e85bfadc24ead00bb0d80a56a2b24f5baee7da4a,c729e2e4f5a2888583cfebcd98b3178023f58b8e,7129f110688db020946105b359cae2e59338135b,dbd15d19abe3f9062b14bc403a499a66aa46cf26,4ec476a535783b1129828738a517f58f8c275ae0,12911a64fecf13f2e9fb0aaed554621e3b0bacde,57e252d0e2004b53312413c9c9283fa1927cffef,d5dad1fcdae9e773ede884b3b4d781d5ee1ec90e,662791f44cd61d0426634cf093bf0ff1bfd88c2c,8464ea586ee5479e1250f938d7c01e9bc68cefe8,60210e8c383d286113182241bdf78acb3697b294,8bb2bb43f7ca5db5e821edd61514d1a42a103039,...,-57,-57,-57,-57,-57,-57,-57,-58,-58,-58,-58,-58,-58,-58,-58,-58,-58,-59,-59,-59,-59,-59,-60,-60,-60,-60,-60,-61,-61,-61,-61,-61,-62,-62,-62,5a0546857ecc773753327266_046cfa46be49fc1083481...,0,046cfa46be49fc10834815c6,0,0
3,53149,7013,27200,5286,7799,58876,25273,28261,4484,40455,45234,47665,9,28808,42651,21929,51717,42637,4339,25970,e85bfadc24ead00bb0d80a56a2b24f5baee7da4a,1d52ac70bfa8eae028e7b0d05ef5ca2bfcf513e9,8bb2bb43f7ca5db5e821edd61514d1a42a103039,d2402d8ef8ed4ba4933a76cce532c8774f589d32,4067027290768f1eb2e7cf3a538dec9ca379e01f,8464ea586ee5479e1250f938d7c01e9bc68cefe8,3ac3aff784fd9cefbc9bfd63e1574fe8ced37b68,4ec476a535783b1129828738a517f58f8c275ae0,2bb807107c76d4339fb55c9da49ecfab1f9120ca,f2eb513d4c87747d0e4bc8c3bbf12951b3819678,9814a3bcf1be199c11849db9640caa663a534a2b,662791f44cd61d0426634cf093bf0ff1bfd88c2c,57e252d0e2004b53312413c9c9283fa1927cffef,d5dad1fcdae9e773ede884b3b4d781d5ee1ec90e,e31c1fac72abf787780bd78ea261bc1fc02a09c9,29fbb6e4c43ceca86657bd242fd67eb5b97b97ba,815627a3e3ef0cf88ae51cd0376df58080f09983,1a11be1c59c0057a3468722ca654f596fc846f7f,fc77794b1f306779e6d47c93b26bca7b229a6da9,1f2eb6a4fe21ce42ff5d8184f78bc0e97f6a121f,...,-58,-58,-58,-58,-58,-58,-58,-58,-58,-58,-58,-58,-58,-59,-59,-59,-59,-59,-59,-59,-59,-59,-59,-59,-59,-59,-59,-60,-60,-60,-60,-61,-61,-61,-61,5a0546857ecc773753327266_046cfa46be49fc1083481...,0,046cfa46be49fc10834815c6,0,0
4,7013,40455,25970,24626,5286,47665,58876,9,25273,28261,51174,36482,21929,53149,58054,45234,7799,28808,4339,21140,d84cce12fbfba61bf930123050f61a11e2a29310,4067027290768f1eb2e7cf3a538dec9ca379e01f,b26914599f6d9ba16b43975394e1eeb9d82f4bab,1d52ac70bfa8eae028e7b0d05ef5ca2bfcf513e9,7129f110688db020946105b359cae2e59338135b,12911a64fecf13f2e9fb0aaed554621e3b0bacde,8bb2bb43f7ca5db5e821edd61514d1a42a103039,2bb807107c76d4339fb55c9da49ecfab1f9120ca,3ac3aff784fd9cefbc9bfd63e1574fe8ced37b68,4ec476a535783b1129828738a517f58f8c275ae0,8464ea586ee5479e1250f938d7c01e9bc68cefe8,d2402d8ef8ed4ba4933a76cce532c8774f589d32,ea950fdfe174caa330ad082bc5099fda7bc8a236,61527abbf490712477ae1d79dfe928e2be45402a,57290977ad70b322595e54de3623f48db7e2dc2b,fe3461438b7a21c85a42ffd76030ece52e11dd7d,60210e8c383d286113182241bdf78acb3697b294,c89453b4f0bde9488ef373a40d8821f4bf81beae,2ad2da7d639746b33d7a6118afad5a2964b58ce5,323607d8444900d64151ee06d164738ac727bbce,...,-60,-60,-60,-60,-61,-61,-61,-61,-61,-61,-61,-61,-62,-62,-62,-62,-62,-62,-62,-62,-63,-63,-63,-63,-63,-63,-63,-63,-63,-63,-63,-63,-63,-63,-64,5a0546857ecc773753327266_046cfa46be49fc1083481...,0,046cfa46be49fc10834815c6,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10128,6389,23703,39163,36367,9056,14910,1188,45915,58421,25140,26093,13477,4986,11594,46915,28574,7543,60249,6981,60319,d215f0885767402c79148e12d4e94d019b8a4643,70ba49e547cfd661d753771c51b313136ee26c6c,5f8e96db3b02980da615fb031ff1e2170f71d8cf,041f1a0393b0ec06df8980702066140da4fabff0,6e1befd47ba1b28b0e80d52fd926bcc56910116e,fb7c3a7ce7cde8691231d9cdb7e4c864fcdff264,7f38acb3af554f583afcf3718810e9d08199ed6f,c495c09e6557d68ab48e40a173be15c31cb1dde4,8657d2b289c00b907bd5c78f8c76b48c420cbd9c,3cae91a59f3f88a52c8bf31276dbb2ff39c2143a,eda7078bf38a44e39fbec968d7cbe00ec17c18ec,66153ad6defa02e7a1e6442a1ad6849b6cc79b45,1633d5d4624672046db8521a732dfc8c4496f24d,4fd6ed0553c15304b1b33ac2758fe764d1772ddf,eb39f11a8fc846a8f40f7f8bc35d5d69d12bc0a8,913b8d51f071b4958d4240299e516af3273a8c20,a16d1f58ee965d53d64c052e742bbf951b7fc993,2df3775fd57920d44e033b487747018b473650b3,9535b2edcc7e240e68bf87e205df3208a483c30b,1c55b072c4a0e0297f74d60e4ea4ed335bd48d88,...,-77,-77,-77,-77,-77,-77,-78,-78,-78,-79,-79,-79,-80,-80,-80,-80,-81,-81,-82,-82,-82,-83,-83,-83,-83,-83,-83,-83,-83,-83,-83,-84,-84,-84,-84,5dc8cea7659e181adb076a3f_fd64de8c4a2fc5ebb0e9f...,23,fd64de8c4a2fc5ebb0e9f412,5,5
10129,14910,23703,6389,9056,39163,4986,36367,26093,60249,5360,30527,25140,13477,1188,23081,50277,45915,19175,6981,2414,f46c9fb38056b81582c026df6cc2237ae8080cb6,041f1a0393b0ec06df8980702066140da4fabff0,8657d2b289c00b907bd5c78f8c76b48c420cbd9c,70ba49e547cfd661d753771c51b313136ee26c6c,913b8d51f071b4958d4240299e516af3273a8c20,c495c09e6557d68ab48e40a173be15c31cb1dde4,fb7c3a7ce7cde8691231d9cdb7e4c864fcdff264,4462c4db35f3ff1e6fe2efa9e0cf5382444d538a,6b8079e24628f88b8822609e9d6fdf0dd1b16fed,1964b16b7dc8e96360a42cb5d4b0bda18e4bf209,087f7e0e27ebe09ea5cb01963ba59aa1d5f11da0,2df3775fd57920d44e033b487747018b473650b3,30737b26bc475ba105a948f18d7bf725b8fb9828,b6d081cf13776bfa566f563f7c25bc51e5f5e0e2,c52216c1e7af304a398d52bb6e2d1a241f2a0cbb,b73ae5f1ab418a109976cc023ea9598310ee522c,64bffef0e423c5f0ae6da6259c113aadb978760e,3cae91a59f3f88a52c8bf31276dbb2ff39c2143a,c40dbeb42d77bc8e2da9997037353beb7f11257d,eb39f11a8fc846a8f40f7f8bc35d5d69d12bc0a8,...,-79,-80,-80,-80,-80,-80,-80,-81,-82,-82,-82,-82,-83,-83,-83,-83,-83,-83,-83,-83,-83,-83,-83,-83,-84,-84,-84,-84,-85,-85,-85,-85,-85,-85,-85,5dc8cea7659e181adb076a3f_fd64de8c4a2fc5ebb0e9f...,23,fd64de8c4a2fc5ebb0e9f412,5,5
10130,14910,6135,60249,45915,38308,58421,35720,23081,4986,36367,9056,25140,13477,39163,5360,38077,6981,19175,23703,2414,8657d2b289c00b907bd5c78f8c76b48c420cbd9c,cfc1faa22c47f307cda9d4031034546b9210e5cb,041f1a0393b0ec06df8980702066140da4fabff0,6a7f39e96642291619911c36815381c5892a521e,fb7c3a7ce7cde8691231d9cdb7e4c864fcdff264,6c63e704ac41957efd2b0216959ad5f61604433a,7f38acb3af554f583afcf3718810e9d08199ed6f,c495c09e6557d68ab48e40a173be15c31cb1dde4,913b8d51f071b4958d4240299e516af3273a8c20,1c55b072c4a0e0297f74d60e4ea4ed335bd48d88,6e1befd47ba1b28b0e80d52fd926bcc56910116e,d215f0885767402c79148e12d4e94d019b8a4643,61e508956e090d504c8f1f17585e06c60ea60512,3cae91a59f3f88a52c8bf31276dbb2ff39c2143a,4462c4db35f3ff1e6fe2efa9e0cf5382444d538a,6c130d861527e6148a4e827e5e640c47ca3920ad,1e10813ddf956ed010e7b0e95dbff91cb184f042,b73ae5f1ab418a109976cc023ea9598310ee522c,1964b16b7dc8e96360a42cb5d4b0bda18e4bf209,30737b26bc475ba105a948f18d7bf725b8fb9828,...,-76,-76,-76,-76,-76,-77,-77,-78,-78,-78,-78,-79,-79,-79,-79,-80,-81,-81,-82,-82,-82,-82,-82,-82,-83,-83,-83,-83,-83,-83,-84,-84,-84,-84,-84,5dc8cea7659e181adb076a3f_fd64de8c4a2fc5ebb0e9f...,23,fd64de8c4a2fc5ebb0e9f412,5,5
10131,6135,14910,34883,44734,23081,60249,45915,58421,39163,25140,31026,19175,23703,14608,9056,38077,37589,4986,32216,38308,6c63e704ac41957efd2b0216959ad5f61604433a,94f82b58643e3dfcf699031634e78ba32fedb1da,1c55b072c4a0e0297f74d60e4ea4ed335bd48d88,c495c09e6557d68ab48e40a173be15c31cb1dde4,10d3ccefaac2be5e207c050204407d2d21adc127,4462c4db35f3ff1e6fe2efa9e0cf5382444d538a,979bfb4594c7f3943bfb5e8a41fe5912feb882f4,8b9f2bc2e71796c8557f6869e94a464474ccb493,1964b16b7dc8e96360a42cb5d4b0bda18e4bf209,5c1e20deecdfecce6a2d51df985fe75791d0938f,7e7a43fb698bede95983e96da0f1db4fbdd3a883,6b8079e24628f88b8822609e9d6fdf0dd1b16fed,bdfd804aa49da8c533daf5b4b316c1e3e704cb4b,770921ad44e63e943c84350d70cb2ce99e7b4339,6c130d861527e6148a4e827e5e640c47ca3920ad,b6e8c7f60d8c353285c801ba43fdabc7a8e14504,7f38acb3af554f583afcf3718810e9d08199ed6f,c52216c1e7af304a398d52bb6e2d1a241f2a0cbb,f45d23ca95db9b4ef0fef9923d7024b57128ab5b,b6d081cf13776bfa566f563f7c25bc51e5f5e0e2,...,-76,-76,-76,-76,-77,-77,-77,-77,-78,-78,-78,-78,-78,-78,-78,-78,-79,-79,-79,-79,-79,-79,-80,-80,-80,-80,-80,-80,-80,-81,-81,-81,-81,-82,-82,5dc8cea7659e181adb076a3f_fd64de8c4a2fc5ebb0e9f...,23,fd64de8c4a2fc5ebb0e9f412,5,5


In [21]:
#fetching K' submissions to see if there is an improvement on the lb
sub= pd.read_csv('/content/submission_lstm_08_before_post (1).csv')
sub['floor']=test_data['my_b_floor_pred']
sub.index=sub['site_path_timestamp']
sub.drop(columns=['site_path_timestamp'],inplace=True)
sub.to_csv('submission_floor_accurate.csv')

See if it gets and up grade on the score by substituting this on the floor prediction
      unfortunately on the pubblic leaderboard score it doesn't improve on the visible decimal values, but on the private ? 
      
check if there are differences that got noticed :

In [22]:
sub= pd.read_csv('/content/submission_lstm_08_before_post (1).csv')
print('the predictions differ on {} %'.format(((test_data['my_b_floor_pred']-sub['floor'])!=0).mean()*100 ))

the predictions differ on 2.2402052699101946 %


That's it Folks 

Thank you for reading all of it, let me know your thoughts, insights or suggestions. 

