# Libraries
1. [filterpy](https://filterpy.readthedocs.io/en/latest/kalman/UnscentedKalmanFilter.html)

In [1]:
import numpy as np
import pandas as pd
import random
from glob import glob
import os
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from pathlib import Path
import plotly.express as px
import seaborn as sns

import geopy
import pymap3d as pm
from filterpy.kalman import UnscentedKalmanFilter, MerweScaledSigmaPoints

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import torchsummary
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

import warnings
warnings.filterwarnings(action='ignore')

# Hyper Parameters

In [2]:
SEED = 1990
random.seed(SEED)
np.random.seed(SEED)

# Useful functions

In [3]:
def calc_haversine(lat1, lon1, lat2, lon2):
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2.0)**2
    
    c = 2 * np.arcsin(a ** 0.5)
    dist = 6_367_000 * c
    return dist

In [4]:
def check_score(input_df: pd.DataFrame) -> pd.DataFrame:
    output_df = input_df.copy()
    
    output_df['meter'] = input_df.apply(
        lambda r: calc_haversine(
            r.latDeg, r.lngDeg, r.t_latDeg, r.t_lngDeg
        ),
        axis=1
    )

    meter_score = output_df['meter'].mean()

    scores = []
    for phone in output_df['phone'].unique():
        _index = output_df['phone']==phone
        p_50 = np.percentile(output_df.loc[_index, 'meter'], 50)
        p_95 = np.percentile(output_df.loc[_index, 'meter'], 95)
        scores.append(p_50)
        scores.append(p_95)

    score = sum(scores) / len(scores)
    
    return output_df, meter_score , score

In [5]:
ell_wgs84 = pm.Ellipsoid()
def calc_geo2enu(df:pd.DataFrame)->pd.DataFrame:
    output = df.copy()
    llh = np.array(df[['latDeg', 'lngDeg', 'heightAboveWgs84EllipsoidM']])
    denu = pm.geodetic2enu(llh[:,0], llh[:,1], llh[:,2], llh[0,0], llh[0,1], llh[0,2], ell=ell_wgs84)
    output['x'] = denu[0]
    output['y'] = denu[1]
    output['z'] = denu[2]
    
    return output

def calc_enu2geo(df:pd.DataFrame)->pd.DataFrame:
    output = df.copy()
    enu = np.array(df[['x', 'y', 'z']])
    llh = np.array(df[['latDeg', 'lngDeg', 'heightAboveWgs84EllipsoidM']])
    geo = pm.enu2geodetic(enu[:,0], enu[:,1], enu[:,2], llh[0,0], llh[0,1], llh[0,2], ell=ell_wgs84, deg = True)
    output['latDeg'] = geo[0]
    output['lngDeg'] = geo[1]
    output['heightAboveWgs84EllipsoidM'] = geo[2]
    
    return output

# Data

In [6]:
data_dir = Path("../input/google-smartphone-decimeter-challenge")
df_train = pd.read_pickle(str(data_dir / "gsdc_extract_train.pkl.gzip"))
df_test = pd.read_pickle(str(data_dir / "gsdc_extract_test.pkl.gzip"))

In [7]:
phones = df_train['phone'].unique()
phone = phones[random.randint(0, len(phones))]
df_sample = df_train[df_train['phone'] == phone].copy().reset_index().drop(columns = ['index'])
print(df_sample.shape)
df_sample.head()

(1746, 148)


Unnamed: 0,collectionName,phoneName,millisSinceGpsEpoch,latDeg,lngDeg,heightAboveWgs84EllipsoidM,phone,timeSinceFirstFixSeconds,hDop,vDop,...,GPS_L1,GPS_L5,GAL_E1,GAL_E5A,GLO_G1,BDS_B1I,BDS_B1C,BDS_B2A,QZS_J1,QZS_J5
0,2020-05-14-US-MTV-1,Pixel4XLModded,1273529466449,37.423574,-122.094137,-33.2,2020-05-14-US-MTV-1_Pixel4XLModded,554.45,1.2,0.0,...,0,0,0,0,0,0,0,0,0,0
1,2020-05-14-US-MTV-1,Pixel4XLModded,1273529467449,37.423573,-122.094153,-33.92,2020-05-14-US-MTV-1_Pixel4XLModded,555.45,1.2,0.0,...,0,0,0,0,0,0,0,0,0,0
2,2020-05-14-US-MTV-1,Pixel4XLModded,1273529468449,37.423578,-122.094148,-33.33,2020-05-14-US-MTV-1_Pixel4XLModded,556.45,1.2,0.0,...,0,0,0,0,0,0,0,0,0,0
3,2020-05-14-US-MTV-1,Pixel4XLModded,1273529469449,37.423573,-122.09415,-32.85,2020-05-14-US-MTV-1_Pixel4XLModded,557.45,1.2,0.0,...,0,0,0,0,0,0,0,0,0,0
4,2020-05-14-US-MTV-1,Pixel4XLModded,1273529470449,37.423573,-122.094147,-31.26,2020-05-14-US-MTV-1_Pixel4XLModded,558.45,1.2,0.0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
df_train.fillna(0, inplace = True)
df_test.fillna(0, inplace = True)

In [9]:
for col in df_train.columns:
    print(col)

collectionName
phoneName
millisSinceGpsEpoch
latDeg
lngDeg
heightAboveWgs84EllipsoidM
phone
timeSinceFirstFixSeconds
hDop
vDop
speedMps
courseDegree
t_latDeg
t_lngDeg
t_heightAboveWgs84EllipsoidM
constellationType
svid
signalType
receivedSvTimeInGpsNanos
xSatPosM
ySatPosM
zSatPosM
xSatVelMps
ySatVelMps
zSatVelMps
satClkBiasM
satClkDriftMps
rawPrM
rawPrUncM
isrbM
ionoDelayM
tropoDelayM
utcTimeMillis
elapsedRealtimeNanos
yawDeg
rollDeg
pitchDeg
utcTimeMillis_Status
SignalCount
SignalIndex
ConstellationType
Svid
CarrierFrequencyHz
Cn0DbHz
AzimuthDegrees
ElevationDegrees
UsedInFix
HasAlmanacData
HasEphemerisData
BasebandCn0DbHz
utcTimeMillis_UncalMag
elapsedRealtimeNanos_UncalMag
UncalMagXMicroT
UncalMagYMicroT
UncalMagZMicroT
BiasXMicroT
BiasYMicroT
BiasZMicroT
utcTimeMillis_UncalAccel
elapsedRealtimeNanos_UncalAccel
UncalAccelXMps2
UncalAccelYMps2
UncalAccelZMps2
BiasXMps2
BiasYMps2
BiasZMps2
utcTimeMillis_UncalGyro
elapsedRealtimeNanos_UncalGyro
UncalGyroXRadPerSec
UncalGyroYRadPerSec
U

In [10]:
features = ['latDeg',
            'lngDeg',
            'xSatPosM_Scaled',
            'ySatPosM_Scaled',
            'zSatPosM_Scaled',
            'xSatVelMps_Scaled',
            'ySatVelMps_Scaled',
            'zSatVelMps_Scaled',
            'GPS_L1', 
            'GPS_L5', 
            'GAL_E1', 
            'GAL_E5A', 
            'GLO_G1', 
            'BDS_B1I', 
            'BDS_B1C', 
            'BDS_B2A', 
            'QZS_J1', 
            'QZS_J5', 
           ]

## Kalman Filter Define
$$
\begin{matrix}
x_t =& x_{t-1} + \frac{v_{t-1}}{w_{t-1}}\left({\sin}\left({\omega}_{t-1}dt + {\theta}_{t-1}\right) - {\sin}\left({\theta}\right)\right)\\
y_t =& y_{t-1} + \frac{v_{t-1}}{w_{t-1}}\left({\cos}\left({\theta}_{t-1}\right) - {\cos}\left({\omega}_{t-1}dt + {\theta}_{t-1}\right)\right)\\
v_t =& v_{t-1}\\
{\theta}_t =& {\theta}_{t-1} + {\omega}_{t-1}dt\\
{\omega}_t =& {\omega}_{t-1}
\end{matrix}
$$

In [11]:
def fx(x, dt):
    xout = np.zeros_like(x)
    if abs(x[4]) > 1e-3:
        xout[0] = x[0] + x[2]/x[4] * (np.sin(x[4] * dt + x[3]) - np.sin(x[3]))
        xout[1] = x[1] + x[2]/x[4] * (np.cos(x[3]) - np.cos(x[4] * dt + x[3]))
        xout[2] = x[2]
        xout[3] = x[3] + x[4] * dt
        xout[4] = x[4]
    else:
        xout[0] = x[0] + x[2] * dt * (np.cos(x[3]))
        xout[1] = x[1] + x[2] * dt * (np.sin(x[3]))
        xout[2] = x[2]
        xout[3] = x[3] + x[4] * dt
        xout[4] = x[4]
        
    return xout

def hx(x):
    return x[[0,1,3,4]]

In [12]:
points = MerweScaledSigmaPoints(5, alpha = .1, beta = 2., kappa = -1)

## ANN Define

In [13]:
class NoiseNetwork(nn.Module):
    def __init__(self, features):
        super().__init__()
        
        self.fc1 = nn.Linear(features, 256)
        self.fc2 = nn.Linear(256, 512)
        self.fc3 = nn.Linear(512, 256)
        self.fc_Q = nn.Linear(256, 25)
        self.fc_R = nn.Linear(256, 16)
        self.fc4 = nn.Linear(256, 41)
        self.fc_err1 = nn.Linear(41, 16)
        self.fc_err2 = nn.Linear(16, 1)
        
        self.bn1 = nn.BatchNorm1d(256)
        self.bn2 = nn.BatchNorm1d(512)
        self.bn3 = nn.BatchNorm1d(256)
        self.bn4 = nn.BatchNorm1d(41)
        
        self.drop03 = nn.Dropout(0.3)
        self.drop05 = nn.Dropout(0.5)
        self.drop06 = nn.Dropout(0.6)
        self.drop09 = nn.Dropout(0.9)
        
    def forward(self, x):
        x = self.drop09(x)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.bn1(x)
        
        x = self.drop06(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.bn2(x)
        
        x = self.drop05(x)
        x = self.fc3(x)
        x = F.relu(x)
        x = self.bn3(x)
        
        Q = self.drop03(x)
        Q = self.fc_Q(Q)
        
        R = self.drop03(x)
        R = self.fc_R(R)
        
        catQR = torch.cat([Q, R], axis = 1)
        
        x = self.drop03(x)
        x = self.fc4(x)
        x = F.relu(x)
        x = self.bn4(x)
        x = x + catQR
        
        x = self.drop03(x)
        x = self.fc_err1(x)
        x = F.relu(x)
        
        x = self.drop03(x)
        x = self.fc_err2(x)
        err = F.relu(x)
        
        Q = Q.reshape(-1, 5,5)
        Q = torch.matmul(Q.transpose(1,2), Q)
        
        R = R.reshape(-1, 4,4)
        R = torch.matmul(R.transpose(1,2), R)
        
        return Q, R, err

class KFLoss(nn.Module):
    def __init__(self, lamb = 0.99):
        super().__init__()
        self.loss = nn.SmoothL1Loss()
        self.lamb = lamb
        pass
    def forward(self, err_est, err):
        loss = self.lamb * self.loss(err_est, err) + (1 - self.lamb) * torch.abs(err_est).mean()
        return loss
    
model = NoiseNetwork(len(features))
optimizer = optim.SGD(model.parameters(), lr = 0.0001)
loss_func = KFLoss()

In [14]:
def batch_forward(df:pd.DataFrame):
    data = torch.Tensor(df[features].values)
    Q, R, err_est = model(data)
    
    return Q, R, err_est

def batch_filter(df_:pd.DataFrame, Q, R):
    df = df_.copy()
    df = calc_geo2enu(df)
    features = ['x','y','yawRad','UncalGyroZRadPerSec']
    df['yawRad'] = np.deg2rad(df['yawDeg'])
    meas = df[features]
    meas = meas.fillna(0)
    
    kf = UnscentedKalmanFilter(dim_x = 5, dim_z = 4, dt = 1, fx = fx, hx = hx, points = points)
    mu, cov = kf.batch_filter(meas.values, R)
    (xs, Ps, Ks) = kf.rts_smoother(mu, cov, Q)
    
    df['x'] = xs[:,0]
    df['y'] = xs[:,1]
    df['yawDeg'] = np.rad2deg(xs[:,2])
    df['UncalGyroZRadPerSec'] = xs[:,3]
    
    df = calc_enu2geo(df)
    return df
    
def batch_estim(df_:pd.DataFrame, train = True):
    
    Q, R, err_est = batch_forward(df_)
    Q = Q.detach().numpy()
    R = R.detach().numpy()
    
    df = batch_filter(df_, Q, R)
        
    return df, err_est, Q, R

In [15]:
def train(df_:pd.DataFrame):
    model.train()  # 신경망을 학습 모드로 전환
    phones = df_['phone'].unique()
    output = df_[['phone', 'millisSinceGpsEpoch']].copy()
    
    df_list = []
    for phone in tqdm(phones):
        df_sample = df_[df_['phone'] == phone]
        
        optimizer.zero_grad()  # 경사를 0으로 초기화
        
        df_sample, err_est, _, _ = batch_estim(df_sample)
        
        df_sample, mean, score = check_score(df_sample)
        loss = loss_func(err_est, torch.Tensor(df_sample['meter'].values).reshape(-1,1))
        df_list.append(df_sample)
        
        
        loss.backward()  # 오차를 역전파 계산
        optimizer.step()  # 역전파 계산한 값으로 가중치를 수정
    
    df_list = pd.concat(df_list)
    output = output.merge(df_list, on = ['phone', 'millisSinceGpsEpoch'])
    
    return output
        

In [16]:
def valid(df_:pd.DataFrame):
    model.eval()  # 신경망을 추론 모드로 전환
    phones = df_['phone'].unique()
    output = df_[['phone', 'millisSinceGpsEpoch']].copy()
    
    df_list = []
    
    with torch.no_grad():  # 추론 과정에는 미분이 필요없음
        for phone in phones:
            df_sample = df_[df_['phone'] == phone]

            df_sample, err_est, _, _ = batch_estim(df_sample)
            df_list.append(df_sample)
        
    df_list = pd.concat(df_list)
    output = output.merge(df_list, on = ['phone', 'millisSinceGpsEpoch'])
    
    return output
        

In [17]:
def test(df_:pd.DataFrame):
    model.eval()  # 신경망을 추론 모드로 전환
    phones = df_['phone'].unique()
    output = df_[['phone', 'millisSinceGpsEpoch']].copy()
    
    df_list = []
    
    with torch.no_grad():  # 추론 과정에는 미분이 필요없음
        for phone in phones:
            df_sample = df_[df_['phone'] == phone]

            df_sample, err_est, _, _ = batch_estim(df_sample, train = False)
            df_list.append(df_sample)
        
    df_list = pd.concat(df_list)
    output = output.merge(df_list, on = ['phone', 'millisSinceGpsEpoch'])
    
    return output
        

In [18]:
# sample_phone = np.random.choice(phones, 3, replace = False)
# sample_index = df_train['phone'].apply(lambda x: x in sample_phone)
# df_samples = df_train[sample_index]

for epoch in tqdm(range(100)):
    train_result = train(df_train)
    train_result, train_mean, train_score = check_score(train_result)
    
    print(f"{epoch + 1}: mean={train_mean}, score={train_score}")

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/73 [00:00<?, ?it/s]

1: mean=4.212115381477734, score=6.169716408816198


  0%|          | 0/73 [00:00<?, ?it/s]

2: mean=4.240803060262903, score=6.152513779353327


  0%|          | 0/73 [00:00<?, ?it/s]

3: mean=4.246316151378591, score=6.2211211801846895


  0%|          | 0/73 [00:00<?, ?it/s]

4: mean=4.366520784013377, score=6.180360698963442


  0%|          | 0/73 [00:00<?, ?it/s]

5: mean=4.237895515396893, score=6.134853630102594


  0%|          | 0/73 [00:00<?, ?it/s]

6: mean=4.224609885264882, score=6.146879301229139


  0%|          | 0/73 [00:00<?, ?it/s]

7: mean=4.250020681027392, score=6.1846904386360615


  0%|          | 0/73 [00:00<?, ?it/s]

8: mean=4.244485117588078, score=6.231981305428548


  0%|          | 0/73 [00:00<?, ?it/s]

9: mean=4.208478322023082, score=6.243885916557366


  0%|          | 0/73 [00:00<?, ?it/s]

10: mean=4.320123645722812, score=6.0839509075265426


  0%|          | 0/73 [00:00<?, ?it/s]

11: mean=4.230662069329176, score=6.2018610545917205


  0%|          | 0/73 [00:00<?, ?it/s]

12: mean=4.245834524685644, score=6.268269870213243


  0%|          | 0/73 [00:00<?, ?it/s]

13: mean=4.227428103921597, score=6.130898357025383


  0%|          | 0/73 [00:00<?, ?it/s]

14: mean=4.181495600707823, score=6.095205242976221


  0%|          | 0/73 [00:00<?, ?it/s]

15: mean=4.23693470219187, score=6.215392896513779


  0%|          | 0/73 [00:00<?, ?it/s]

16: mean=4.235995708278079, score=6.21844122470293


  0%|          | 0/73 [00:00<?, ?it/s]

17: mean=4.224721545140894, score=6.089547794481526


  0%|          | 0/73 [00:00<?, ?it/s]

18: mean=4.19563080343761, score=6.1186010969319655


  0%|          | 0/73 [00:00<?, ?it/s]

19: mean=4.214010080855505, score=6.105700982037526


  0%|          | 0/73 [00:00<?, ?it/s]

20: mean=4.221912259860199, score=6.127883195302982


  0%|          | 0/73 [00:00<?, ?it/s]

21: mean=4.243854216885952, score=6.162086941190413


  0%|          | 0/73 [00:00<?, ?it/s]

22: mean=4.215893673211439, score=6.188368449537945


  0%|          | 0/73 [00:00<?, ?it/s]

23: mean=4.261486861750611, score=6.271223602522131


  0%|          | 0/73 [00:00<?, ?it/s]

24: mean=4.197220379289256, score=6.156870057240197


  0%|          | 0/73 [00:00<?, ?it/s]

25: mean=4.269292489976384, score=6.218217148414513


  0%|          | 0/73 [00:00<?, ?it/s]

26: mean=4.21998320205842, score=6.13755718997608


  0%|          | 0/73 [00:00<?, ?it/s]

27: mean=4.264008481391487, score=6.194392145421928


  0%|          | 0/73 [00:00<?, ?it/s]

28: mean=4.236519593596221, score=6.145284604383782


  0%|          | 0/73 [00:00<?, ?it/s]

29: mean=4.239362889637014, score=6.142698752047044


  0%|          | 0/73 [00:00<?, ?it/s]

30: mean=4.251624805801379, score=6.212823206908387


  0%|          | 0/73 [00:00<?, ?it/s]

31: mean=4.249748765608228, score=6.21196147893008


  0%|          | 0/73 [00:00<?, ?it/s]

32: mean=4.228249976631863, score=6.107267132525966


  0%|          | 0/73 [00:00<?, ?it/s]

33: mean=4.237922737980399, score=6.165031962531837


  0%|          | 0/73 [00:00<?, ?it/s]

34: mean=4.278306016173505, score=6.233865812070325


  0%|          | 0/73 [00:00<?, ?it/s]

35: mean=4.271752191950099, score=6.194405609533308


  0%|          | 0/73 [00:00<?, ?it/s]

36: mean=4.348642900852943, score=6.248171046401833


  0%|          | 0/73 [00:00<?, ?it/s]

37: mean=4.253945447333914, score=6.23053676200218


  0%|          | 0/73 [00:00<?, ?it/s]

38: mean=4.289349506074871, score=6.322149916219725


  0%|          | 0/73 [00:00<?, ?it/s]

39: mean=4.267286961102054, score=6.212004777358803


  0%|          | 0/73 [00:00<?, ?it/s]

40: mean=4.292625665375068, score=6.305400717456744


  0%|          | 0/73 [00:00<?, ?it/s]

41: mean=4.267943658294951, score=6.1899051233731806


  0%|          | 0/73 [00:00<?, ?it/s]

42: mean=4.247100093250504, score=6.178390780220549


  0%|          | 0/73 [00:00<?, ?it/s]

43: mean=4.247735858555717, score=6.24079472310029


  0%|          | 0/73 [00:00<?, ?it/s]

44: mean=4.224035416765486, score=6.15021046693661


  0%|          | 0/73 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
valid_result = valid(df_test)
valid_result, valid_mean, valid_score = check_score(valid_result)
print(valid_mean, valid_score)

In [None]:
submission = pd.read_csv("../input/google-smartphone-decimeter-challenge/sample_submission.csv")
submission = submission[['phone', 'millisSinceGpsEpoch']]

In [None]:
result = test(df_test)
result = result[['phone', 'millisSinceGpsEpoch', 'latDeg', 'lngDeg']]
submission = submission.merge(result, on = ['phone', 'millisSinceGpsEpoch'])

In [None]:
submission.to_csv(f"./models/{'ComplexKalmanFilter1'}/result-{4}result.csv", index = False)