In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from pathlib import Path
import pyproj
from pyproj import Proj, transform

def calc_haversine(lat1, lon1, lat2, lon2):
    RADIUS = 6_367_000
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2)**2 + \
        np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    dist = 2 * RADIUS * np.arcsin(a**0.5)
    return dist
 
def compute_dist(fname, fname2 = 'gt.csv'):
    btrain = pd.read_csv(fname)
    gt = pd.read_csv(fname2)
    df = btrain.merge(gt, on = ['phone','millisSinceGpsEpoch'])
    dst_btrain = calc_haversine(df.latDeg_x,df.lngDeg_x, df.latDeg_y, df.lngDeg_y)
    scores = pd.DataFrame({'phone': df.phone,'dst': dst_btrain})
    scores_grp = scores.groupby('phone')
    d50 = scores_grp.quantile(.50).reset_index()
    d50.columns = ['phone','q50']
    d95 = scores_grp.quantile(.95).reset_index()
    d95.columns = ['phone','q95']
    return (scores_grp.quantile(.50).mean() + scores_grp.quantile(.95).mean())/2, d50.merge(d95)

def WGS84_to_ECEF(lat, lon, alt):
    # convert to radians
    rad_lat = lat * (np.pi / 180.0)
    rad_lon = lon * (np.pi / 180.0)
    a    = 6378137.0
    # f is the flattening factor
    finv = 298.257223563
    f = 1 / finv   
    # e is the eccentricity
    e2 = 1 - (1 - f) * (1 - f)    
    # N is the radius of curvature in the prime vertical
    N = a / np.sqrt(1 - e2 * np.sin(rad_lat) * np.sin(rad_lat))
    x = (N + alt) * np.cos(rad_lat) * np.cos(rad_lon)
    y = (N + alt) * np.cos(rad_lat) * np.sin(rad_lon)
    z = (N * (1 - e2) + alt)        * np.sin(rad_lat)
    return x, y, z

transformer = pyproj.Transformer.from_crs(
    {"proj":'geocent', "ellps":'WGS84', "datum":'WGS84'},
    {"proj":'latlong', "ellps":'WGS84', "datum":'WGS84'},)
def ECEF_to_WGS84(x,y,z):
    lon, lat, alt = transformer.transform(x,y,z,radians=False)
    return lon, lat, alt

datadir = Path('/kaggle/input/google-smartphone-decimeter-challenge/')
testdir = datadir / 'test'
traindir = datadir / 'train'

sample_sub = pd.read_csv(datadir/'sample_submission.csv')
sub_columns = sample_sub.columns

baseline_train = pd.read_csv(datadir / 'baseline_locations_train.csv')
baseline_train[sub_columns].to_csv('btrain.csv',index = False)
baseline_test = pd.read_csv(datadir / 'baseline_locations_test.csv')
baseline_test[sub_columns].to_csv('btest.csv',index = False)

msge = 'millisSinceGpsEpoch'

gt = pd.DataFrame()
for d in os.listdir(traindir):
    for p in os.listdir(traindir/d):
        gt = gt.append(pd.read_csv(traindir/d/p/'ground_truth.csv'))

gt['phone'] = gt['collectionName'] + '_' + gt['phoneName']
gt[sub_columns].to_csv('gt.csv', index = False)
gt['heightAboveWgs84EllipsoidM'].describe()
df = pd.read_csv('btrain.csv').merge(gt, on = ['phone','millisSinceGpsEpoch'])
print(df)
score, scores = compute_dist('btrain.csv','gt.csv')
print(score)
scores