In [None]:
!pip install pymap3d osmnx momepy geopandas astropy

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import scipy.optimize as opt
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.neighbors import KDTree
from tqdm import tqdm
import datetime 
import pymap3d as pm
from astropy.time import Time
import scipy.interpolate
import scipy.sparse


from pathlib import Path
from shapely.geometry import Polygon, Point, LineString, shape
import osmnx as ox
import momepy
import geopandas as gpd
import plotly.express as px

import heapq
from heapq import heappush as push_
from heapq import heappop  as pop_

from sklearn.neighbors import KDTree
import pickle
from scipy.spatial import distance_matrix

import pyproj
from pyproj import Proj, transform

pd.options.display.max_rows    = 500
pd.options.display.max_columns = 500

In [None]:
def show_gt(target_gt_df, lat0=None, lng0=None, index_col='index'):
    if lat0 is None:
        lat0, lng0 = target_gt_df[['latDeg', 'lngDeg']].values[len(target_gt_df) // 2]

    fig = px.scatter_mapbox(target_gt_df,

                        # Here, plotly gets, (x,y) coordinates
                        lat  = "latDeg",
                        lon  = "lngDeg",
                        text ='phoneName',

                        #Here, plotly detects color of series
                        color  = "collectionName",
                        labels = "collectionName",
                        hover_name = index_col,
                            
                        zoom   = 15,
                        center = {"lat":lat0, "lon":lng0},
                        height = 600,
                        width  = 800)

    fig.update_layout(mapbox_style='stamen-terrain')
    fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
    fig.update_layout(title_text="GPS trafic")
    fig.show()
    
SPEED_OF_LIGHT      = 2.99792458e8  # m/s
EARTH_GM            =      3.986005e14  # m^3/s^2 (gravitational constant * mass of earth)
EARTH_RADIUS        = 6.3781e6  # m
EARTH_ROTATION_RATE = 7.2921151467e-005
LeapSecond          = 18

def ecef2lla(x, y, z):
    # x, y and z are scalars or vectors in meters
    x = np.array([x]).reshape(np.array([x]).shape[-1], 1)
    y = np.array([y]).reshape(np.array([y]).shape[-1], 1)
    z = np.array([z]).reshape(np.array([z]).shape[-1], 1)

    a    = 6378137
    a_sq = a**2
    e    = 8.181919084261345e-2
    e_sq = 6.69437999014e-3

    f = 1/298.257223563
    b = a*(1-f)

    # calculations:
    r = np.sqrt(x**2 + y**2)
    ep_sq  = (a**2-b**2)/b**2
    ee = (a**2-b**2)
    f = (54*b**2)*(z**2)
    g = r**2 + (1 - e_sq)*(z**2) - e_sq*ee*2
    c = (e_sq**2)*f*r**2/(g**3)
    s = (1 + c + np.sqrt(c**2 + 2*c))**(1/3.)
    p = f/(3.*(g**2)*(s + (1./s) + 1)**2)
    q = np.sqrt(1 + 2*p*e_sq**2)
    r_0 = -(p*e_sq*r)/(1+q) + np.sqrt(0.5*(a**2)*(1+(1./q)) - p*(z**2)*(1-e_sq)/(q*(1+q)) - 0.5*p*(r**2))
    u = np.sqrt((r - e_sq*r_0)**2 + z**2)
    v = np.sqrt((r - e_sq*r_0)**2 + (1 - e_sq)*z**2)
    z_0 = (b**2)*z/(a*v)
    h = u*(1 - b**2/(a*v))
    phi = np.arctan((z + ep_sq*z_0)/r)
    lambd = np.arctan2(y, x)

    return phi*180/np.pi, lambd*180/np.pi, h


def calc_haversine(lat1, lon1, lat2, lon2):
    """Calculates the great circle distance between two points
    on the earth. Inputs are array-like and specified in decimal degrees.
    """
    RADIUS = 6_367_000
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2)**2 + \
      np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    dist = 2 * RADIUS * np.arcsin(a**0.5)
    
    return dist

def WGS84_to_ECEF(lat, lon, alt):
    # convert to radians
    rad_lat = lat * (np.pi / 180.0)
    rad_lon = lon * (np.pi / 180.0)
    a    = 6378137.0
    # f is the flattening factor
    finv = 298.257223563
    f = 1 / finv   
    # e is the eccentricity
    e2 = 1 - (1 - f) * (1 - f)    
    # N is the radius of curvature in the prime vertical
    N = a / np.sqrt(1 - e2 * np.sin(rad_lat) * np.sin(rad_lat))
    x = (N + alt) * np.cos(rad_lat) * np.cos(rad_lon)
    y = (N + alt) * np.cos(rad_lat) * np.sin(rad_lon)
    z = (N * (1 - e2) + alt)        * np.sin(rad_lat)
    return x, y, z

transformer = pyproj.Transformer.from_crs(
    {"proj":'geocent', "ellps":'WGS84', "datum":'WGS84'},
    {"proj":'latlong', "ellps":'WGS84', "datum":'WGS84'},)

def ECEF_to_WGS84(x,y,z):
    lon, lat, alt = transformer.transform(x,y,z,radians=False)
    return lon, lat, alt

def get_train_score(df, latCol='latDeg', lngCol='lngDeg'):
    def percentile50(x):
        return np.percentile(x, 50)
    
    def percentile95(x):
        return np.percentile(x, 95)

    # calc_distance_error
    df['err']    = calc_haversine(df['latDeg_truth'], df['lngDeg_truth'], df[latCol], df[lngCol])
    df['phone']  = df['collectionName'] + '_' + df['phoneName']
    res          = df.groupby('phone')['err'].agg([percentile50, percentile95])
    
    res['p50_p90_mean'] = (res['percentile50'] + res['percentile95']) / 2 
    score               = res['p50_p90_mean'].mean()
    
    return score

def filter_outlier(df_ori, thres=2):
    df_vel       = df_ori.copy()
    df_vel.index = range(len(df_vel))
    
    for vel_col in ['Vx', 'Vy', 'Vz']:
        t_diff_pred = df_vel['millisSinceGpsEpoch'].diff(+1).fillna(1000) / 1000
        t_diff_forw = df_vel['millisSinceGpsEpoch'].diff(-1).fillna(1000) / 1000

        v_diff_pred = abs(df_vel[vel_col].diff(+1).fillna(0) / t_diff_pred)
        v_diff_forw = abs(df_vel[vel_col].diff(-1).fillna(0) / t_diff_forw)

        df_ = pd.concat([v_diff_pred.rename('diff_pred'),
                         v_diff_forw.rename('diff_forw')], axis=1)

        outlier_idx = np.where(np.sum(abs(df_.values) > thres, axis=1) == 2)[0]
        
        df_vel.loc[outlier_idx, vel_col] = np.nan
        df_vel[vel_col] = df_vel[vel_col].interpolate(method='linear')
        
    return df_vel

def compute_dist(fname, fname2 = 'gt.csv'):
    if isinstance(fname, str) == True:
        oof = pd.read_csv(fname)
    else:
        oof = fname.copy()
    
    if 'phone' not in oof.columns:
        oof['phone'] = oof['collectionName'] + '_' + oof['phoneName']
        
    gt  = pd.read_csv(fname2)
    
    df      = oof.merge(gt, on = ['phone','millisSinceGpsEpoch'])
    dst_oof = calc_haversine(df.latDeg_x,df.lngDeg_x, df.latDeg_y, df.lngDeg_y)
    scores     = pd.DataFrame({'phone': df.phone,'dst': dst_oof})
    scores_grp = scores.groupby('phone')
    d50 = scores_grp.quantile(.50).reset_index()
    d50.columns = ['phone','q50']
    d95 = scores_grp.quantile(.95).reset_index()
    d95.columns = ['phone','q95']
    
    return (scores_grp.quantile(.50).mean() + scores_grp.quantile(.95).mean())/2, d50.merge(d95)

def mean_with_other_phones(df):
    
    from scipy.ndimage import gaussian_filter1d
    from scipy.interpolate import interp1d
    
    if 'collectionName' not in df.columns:
        df['collectionName'] = df['phone'].str.split('_', expand=True)[0]
        df['phoneName']      = df['phone'].str.split('_', expand=True)[1]
        
    collections_list = df[['collectionName']].drop_duplicates().to_numpy()

    for collection in collections_list:
        phone_list = df[df['collectionName'].to_list() == collection][['phoneName']].drop_duplicates().to_numpy()

        phone_data = {}
        corrections = {}
        for phone in phone_list:
            cond = np.logical_and(df['collectionName'] == collection[0], df['phoneName'] == phone[0]).to_list()
            phone_data[phone[0]] = df[cond][['millisSinceGpsEpoch', 'latDeg', 'lngDeg']].to_numpy()

        for current in phone_data:
            correction = np.ones(phone_data[current].shape, dtype=np.float)
            correction[:,1:] = phone_data[current][:,1:]
            
            # Telephones data don't complitely match by time, so - interpolate.
            for other in phone_data:
                if other == current:
                    continue

                loc = interp1d(phone_data[other][:,0], 
                               phone_data[other][:,1:], 
                               axis=0, 
                               kind='linear', 
                               copy=False, 
                               bounds_error=None, 
                               fill_value='extrapolate', 
                               assume_sorted=True)
                
                start_idx = 0
                stop_idx = 0
                for idx, val in enumerate(phone_data[current][:,0]):
                    if val < phone_data[other][0,0]:
                        start_idx = idx
                    if val < phone_data[other][-1,0]:
                        stop_idx = idx

                if stop_idx - start_idx > 0:
                    correction[start_idx:stop_idx,0] += 1
                    correction[start_idx:stop_idx,1:] += loc(phone_data[current][start_idx:stop_idx,0])                    

            correction[:,1] /= correction[:,0]
            correction[:,2] /= correction[:,0]
            
            corrections[current] = correction.copy()
        
        for phone in phone_list:
            cond = np.logical_and(df['collectionName'] == collection[0], df['phoneName'] == phone[0]).to_list()
            
            df.loc[cond, ['latDeg', 'lngDeg']] = corrections[phone[0]][:,1:]            
            
    return df

def add_velocity(test_file, \
                 predict_type='test'):
    
    datapath        = '../input/google-smartphone-decimeter-challenge/'
    sub             = pd.read_csv('../input/google-smartphone-decimeter-challenge/sample_submission.csv')

    if predict_type == 'train':
        baseline_test   = pd.read_csv(datapath + "baseline_locations_train.csv")
    else:
        baseline_test   = pd.read_csv(datapath + "baseline_locations_test.csv")

        
    #######################
    if isinstance(test_file, str) == True: 
        test_df         = pd.read_csv(test_file)
    else:
        test_df         = test_file.copy() 
        
        
    ori_index       = test_df.index

    test_df['collectionName'] = test_df['phone'].str.split('_', expand=True)[0]
    test_df['phoneName']      = test_df['phone'].str.split('_', expand=True)[1]

    baseline_test   = baseline_test.sort_values(by=['collectionName', 'millisSinceGpsEpoch'])
    test_df         = test_df.sort_values(by=['collectionName', 'millisSinceGpsEpoch'])

    test_df['heightAboveWgs84EllipsoidM'] = baseline_test['heightAboveWgs84EllipsoidM'].values

    ######################
    
    ecef_arr = []
    for x, y, z in zip(test_df['latDeg'].values, \
                       test_df['lngDeg'].values, \
                       test_df['heightAboveWgs84EllipsoidM'].values):
        x_, y_, z_ = pm.geodetic2ecef(x, y, z)
        ecef_arr.append([x_, y_, z_])
    ecef_arr = np.array(ecef_arr)

    test_df['X'] = ecef_arr[:, 0]
    test_df['Y'] = ecef_arr[:, 1]
    test_df['Z'] = ecef_arr[:, 2]

    dXYZs = []
    for (collection, device_name), _ in tqdm(test_df.groupby(['collectionName', 'phoneName'])):
        if predict_type == 'train':
            df_xyz = pd.read_csv(f'../input/train-velocity/train_{collection}_{device_name}_velocity.csv')
        else:
            df_xyz = pd.read_csv(f'../input/position-optimize-014/test_{collection}_{device_name}_velocity.csv') 
        
        dXYZs.append(df_xyz)

        
    dXYZs   = pd.concat(dXYZs)
    test_df = pd.merge_asof(test_df.sort_values('millisSinceGpsEpoch'), \
                            dXYZs.sort_values('millisSinceGpsEpoch'), \
                            on = "millisSinceGpsEpoch", \
                            by = ['collectionName', 'phoneName'], 
                            direction = 'nearest', \
                            tolerance=int(100000))
    
    return test_df

def remove_collection_device(input_df, collectionName, phoneName_arr):
    input_df['index'] = input_df.index
    input_df          = input_df.sort_values('millisSinceGpsEpoch')
    input_df.index    = input_df['millisSinceGpsEpoch'].values

    cond1     = (input_df['collectionName']==collectionName)
    df_       =  input_df.loc[cond1].copy()
    origin_df =  df_.copy() 
    
    cond2     = (df_['phoneName'].isin(phoneName_arr))
    df_.loc[cond2, 'latDeg'] = np.nan
    df_.loc[cond2, 'lngDeg'] = np.nan
    df_  = df_.interpolate(method='index', limit_area='inside')

    
    _index = df_['latDeg'].isnull()
    
    if sum(_index) > 0:
        df_.loc[_index, 'latDeg'] = origin_df.loc[_index, 'latDeg'].values
        df_.loc[_index, 'lngDeg'] = origin_df.loc[_index, 'lngDeg'].values
        
    output_df = input_df.copy()
    output_df.loc[cond1, ['latDeg', 'lngDeg']]  = df_[['latDeg', 'lngDeg']].values
    
    return output_df

remove_list = \
{'2020-05-14-US-MTV-2': ['Pixel4XLModded'],
 '2020-09-04-US-SF-1':  ['Pixel4'],
 '2021-01-04-US-RWC-1': ['Pixel4XLModded', 'Pixel4XL'],
 '2021-01-04-US-RWC-2': ['Pixel4XLModded', 'Pixel4XL'], 
 '2021-01-05-US-SVL-1': ['Pixel4XL', 'Pixel5'],
 '2021-01-05-US-SVL-2': ['Pixel4XL'],
 '2021-04-15-US-MTV-1': ['SamsungS20Ultra'],
 '2021-04-28-US-MTV-1': ['SamsungS20Ultra'],}


def stopped_process_v2(test_df):
    sub_df = test_df.copy()

    idx = 0
    for (collection, phone), df_collection in tqdm(sub_df.groupby(['collectionName', 'phoneName'])): 
        cond     = (sub_df['collectionName']==collection)&(sub_df['phoneName']==phone)

        sub_df.loc[cond, ['Vx', 'Vy', 'Vz']] = filter_outlier(sub_df.loc[cond])[['Vx', 'Vy', 'Vz']].values

        df_         = sub_df.loc[cond].copy()
        df_.index   = range(len(df_))

        df_['vel']  = df_[['Vx', 'Vy', 'Vz']].apply(lambda tup: np.linalg.norm(tup), axis=1)

        if collection in ['2021-04-28-US-SJC-1', '2021-04-29-US-SJC-2', '2021-04-22-US-SJC-1']:
            vel_thres = 0.9   # 0.87
        else:
            vel_thres = 0.9   # 0.87

        first_idx, first_point = df_.index[0],  df_[['latDeg', 'lngDeg']].values[0] 
        last_idx,  last_point  = df_.index[-1], df_[['latDeg', 'lngDeg']].values[-1]

        df_.loc[df_['vel'] < vel_thres, 'latDeg'] = np.nan
        df_.loc[df_['vel'] < vel_thres, 'lngDeg'] = np.nan

        if pd.isnull(df_.loc[first_idx, 'latDeg']) == True:
            df_.loc[first_idx, ['latDeg', 'lngDeg']]  = first_point

        if pd.isnull(df_.loc[last_idx, 'latDeg']) == True:
            df_.loc[last_idx,  ['latDeg', 'lngDeg']]  = last_point

        df_ = df_.interpolate(method='pad')

        sub_df.loc[cond, ['latDeg', 'lngDeg']]  = df_[['latDeg', 'lngDeg']].values
        sub_df.loc[cond, 'vel']                 = df_['vel'].values
        
    return sub_df

def position_shift(fname, a, save_name=None):
    msge = 'millisSinceGpsEpoch'
    
    if isinstance(fname, str) == True:
        d = pd.read_csv(fname)
    else:
        d = fname.copy()
    if 'phone' not in d.columns:
        d['phone'] = d['collectionName'] + '_' + d['phoneName']
         
    d['heightAboveWgs84EllipsoidM'] = 63.5
    d['x'], d['y'], d['z'] = zip(*d.apply(lambda x: WGS84_to_ECEF(x.latDeg, x.lngDeg, x.heightAboveWgs84EllipsoidM), axis=1))

    #a = -0.2
    d.sort_values(['phone', msge], inplace=True)
    for fi in ['x','y','z']:
        d[[fi+'p']] = d[fi].shift().where(d['phone'].eq(d['phone'].shift()))
        d[[fi+'diff']] = d[fi]-d[fi+'p']
    #d[['yp']] = d['y'].shift().where(d['phone'].eq(d['phone'].shift()))
    d[['dist']] = np.sqrt(d['xdiff']**2 + d['ydiff']**2+ d['zdiff']**2)
    for fi in ['x','y','z']:
        d[[fi+'new']] = d[fi+'p'] + d[fi+'diff']*(1-a/d['dist'])
    lng, lat, alt = ECEF_to_WGS84(d['xnew'].values,d['ynew'].values,d['znew'].values)
    
    lng[np.isnan(lng)] = d.loc[np.isnan(lng),'lngDeg']
    lat[np.isnan(lat)] = d.loc[np.isnan(lat),'latDeg']
    d['latDeg'] = lat
    d['lngDeg'] = lng
    
    d.sort_values(['phone',msge],inplace = True)
        
    return d #[sub_columns]

-------
## Load groundtruth

In [None]:
datadir  = Path('/kaggle/input/google-smartphone-decimeter-challenge/')
traindir = datadir / 'train'

gt = pd.DataFrame()
for d in os.listdir(traindir):
    for p in os.listdir(traindir/d):
        gt = gt.append(pd.read_csv(traindir/d/p/'ground_truth.csv'))

sample_sub  = pd.read_csv('../input/google-smartphone-decimeter-challenge/sample_submission.csv')
sub_columns = sample_sub.columns
gt['phone'] = gt['collectionName'] + '_' + gt['phoneName']
gt[sub_columns].to_csv('gt.csv', index = False)

-----------------------

------------------
## Advanced optimize

In [None]:
def map_all_route(df, \
                  train_collection_arr, \
                  test_collection_arr, \
                  train_phone_arr, \
                  test_phone_arr, \
                  map_thres=None):
    
    test_df = df.copy()
    
    for train_collection, test_collection, phone_train, phone_test in zip(train_collection_arr, \
                                                                          test_collection_arr, \
                                                                          train_phone_arr, \
                                                                          test_phone_arr):

        df_soi = test_df[test_df['phone']==f'{test_collection}_{phone_test}']\
                .rename(columns={'phone': 'phoneName'})[['phoneName', 'millisSinceGpsEpoch', 'latDeg', 'lngDeg']]

        df_soi.index = range(len(df_soi))

        gt = pd.read_csv(f'../input/google-smartphone-decimeter-challenge/train/{train_collection}/{phone_train}/ground_truth.csv')
        gt = gt[['phoneName', 'millisSinceGpsEpoch', 'latDeg', 'lngDeg']]
        gt['phoneName'] = train_collection + '_' + gt['phoneName']


        base         = gt[['latDeg', 'lngDeg']].values.astype('float64')
        route        = df_soi[['latDeg', 'lngDeg']].values.astype('float64')

        base_reduce  = []
        ori_idx      = []

        for i, P in enumerate(base):
            if i == 0:
                base_reduce.append(base[0])
                ori_idx.append(0)
                continue

            dist = calc_haversine(*P, *base_reduce[-1])

            if dist >= 3:
                base_reduce.append(P)
                ori_idx.append(i)

        base_reduce   = np.array(base_reduce)
        base_idx      = np.array(ori_idx)

        INF   = 9 * 1e9
        dp    = np.ones((route.shape[0], base_reduce.shape[0])) * INF
        trace = np.ones((route.shape[0], base_reduce.shape[0])) * -1

        cands = []
        for P in route:
            P_rep  = np.tile(P[None, :], (base_reduce.shape[0], 1))
            dist   = calc_haversine(P_rep[:, 0], P_rep[:, 1], base_reduce[:, 0], base_reduce[:, 1])

            idx    = np.where(dist<50)[0]
            cands.append(idx)


        for cand_idx in cands[0]:
            dp[0][cand_idx] = calc_haversine(*route[0], *base_reduce[cand_idx])

        for route_idx in tqdm(range(1, route.shape[0])):
            route_dis = calc_haversine(*route[route_idx], *route[route_idx-1])

            for cand_idx in cands[route_idx]:
                gap_dist  = calc_haversine(*route[route_idx], *base_reduce[cand_idx])

                map_dis   = 0
                for pre_idx in range(cand_idx, -1, -1): 
                    map_dis += calc_haversine(*base_reduce[pre_idx], *base_reduce[pre_idx+1]) if pre_idx < cand_idx else 0

                    if map_dis > route_dis + 40: break

                    new_val = gap_dist + abs(map_dis - route_dis) + dp[route_idx-1][pre_idx]

                    if new_val < dp[route_idx][cand_idx]:
                        dp[route_idx][cand_idx]    = new_val
                        trace[route_idx][cand_idx] = pre_idx    

        map_idx = [np.argmin(dp[-1])]

        for route_idx in range(route.shape[0]-1, 0, -1):
            idx = map_idx[-1]
            map_idx.append(trace[route_idx][int(idx)])

        # map_idx = [base_idx[int(idx)] for idx in map_idx]
        map_idx = np.array(map_idx[::-1]).astype('int32')


        map_point = []

        for reduce_idx, P in zip(map_idx, route):
            P_point  = Point(P) 
            l, r     =  max(0, reduce_idx-5), min(reduce_idx+6, base_reduce.shape[0])
            segment  =  LineString(list(base_reduce[l : r]))
            
            map_P    = (segment.interpolate(segment.project(P_point)).coords[0])
            choose   = map_P 
            
            if map_thres is not None and calc_haversine(map_P[0], map_P[1], P[0], P[1]) > map_thres:
                choose = P
                
            map_point.append(choose)

        map_point = np.array(map_point).astype('float64')

        test_df.loc[test_df['phone']==f'{test_collection}_{phone_test}', ['latDeg', 'lngDeg']] = map_point
        
    return test_df


def map_by_chunk(test_df, \
                 test_phone, \
                 test_collection, \
                 all_point, map_point, global2base, whichBase, base2global, \
                 map_thres       = 9, \
                 candidate_thres = 100, \
                 CHUNK_SIZE      = 20, \
                 show  = True):

    sub = test_df.copy()
    
    if isinstance(test_phone, list) == True:
        cond    = (test_df['phoneName'].isin(test_phone))&(test_df['collectionName']==test_collection) 
    else:
        cond    = (test_df['phoneName']==test_phone)&(test_df['collectionName']==test_collection) 

    df_soi  = sub.loc[cond]\
                    .rename(columns={'phone': 'phoneName'})[['phoneName', 'millisSinceGpsEpoch', 'latDeg', 'lngDeg']]
    
    df_soi       = df_soi.sort_values(by='millisSinceGpsEpoch') 
    df_soi.index = range(len(df_soi))

    route        = df_soi[['latDeg', 'lngDeg']].values.astype('float64')
    df_soi.index = range(len(df_soi))


    candidate = []

    for P in route:
        P_rep  = np.tile(P[None, :], (map_point.shape[0], 1))
        dist   = calc_haversine(P_rep[:, 0], P_rep[:, 1], map_point[:, 0], map_point[:, 1])
        idx    = np.where(dist<candidate_thres)[0]
        candidate.append(idx)

    route_concat = []
    diffs        = []
    map_segment  = []
    
    for chunk_idx in tqdm(range(0, route.shape[0], CHUNK_SIZE)):

        segment = route[chunk_idx: min(chunk_idx+CHUNK_SIZE, route.shape[0])]
        cands   = candidate[chunk_idx: min(chunk_idx+CHUNK_SIZE, route.shape[0])]

        INF    = 9 * 1e9
        dp     = np.ones((segment.shape[0], map_point.shape[0])) * INF
        trace  = np.ones((segment.shape[0], map_point.shape[0])) * -1

        for cand_idx in cands[0]:
            dp[0][cand_idx] = calc_haversine(*route[0], *map_point[cand_idx])

        for route_idx in range(1, segment.shape[0]):
            route_dis = calc_haversine(*segment[route_idx], *segment[route_idx-1])

            for cand_idx in cands[route_idx]:
                gap_dist  = calc_haversine(*segment[route_idx], *map_point[cand_idx])

                map_dis   = 0
                baseIndex = global2base[cand_idx]
                basePos   = whichBase[cand_idx]

                for preBaseIdx in range(baseIndex, -1, -1): 
                    preGlobalIndex = base2global[(basePos, preBaseIdx)]

                    map_dis += calc_haversine(*all_point[basePos][preBaseIdx], *all_point[basePos][preBaseIdx+1]) \
                               if preBaseIdx < baseIndex else 0

                    if map_dis > route_dis + 40: break

                    new_val = 2 * gap_dist + abs(map_dis - route_dis) + dp[route_idx-1][preGlobalIndex]

                    if new_val < dp[route_idx][cand_idx]:
                        dp[route_idx][cand_idx]    = new_val
                        trace[route_idx][cand_idx] = preGlobalIndex    

        map_idx = [np.argmin(dp[-1])]

        for route_idx in range(segment.shape[0]-1, 0, -1):
            index = map_idx[-1]
            map_idx.append(trace[route_idx][int(index)])

        map_idx       = np.array(map_idx[::-1]).astype('int32')
        map_route     = []
        
        for global_idx, P in zip(map_idx, segment):
            P_point   = Point(P) 

            baseIndex = global2base[global_idx]
            basePos   = whichBase[global_idx]

            l, r      =  max(0, baseIndex-5), min(baseIndex+6, all_point[basePos].shape[0])
            geo       =  LineString(list(all_point[basePos][l : r]))
            map_route.append((geo.interpolate(geo.project(P_point)).coords[0]))
            map_segment.append(geo)
        map_route = np.array(map_route).astype('float64')

        mean_diff = np.mean(calc_haversine(segment[:, 0], segment[:, 1], map_route[:, 0], map_route[:, 1]))
        diffs.append(mean_diff)

#             if mean_diff > map_thres:
#                 route_concat.append(segment)
#             else:
#                 route_concat.append(map_route)

        if callable(map_thres):
            if map_thres(mean_diff) == True:            
                route_concat.append(map_route)
            else:
                route_concat.append(segment)
        else:
            if mean_diff < map_thres:            
                route_concat.append(map_route)
            else:
                route_concat.append(segment)

    route_concat =  np.concatenate(route_concat)
    sub.loc[cond, ['latDeg', 'lngDeg']] = route_concat

    if show==True:
        show_map_route(route, route_concat)
            
    return sub, map_segment


def get_candidate_road_point(df, \
                  train_collection, \
                  test_collection, \
                  train_phone, \
                  test_phone,
                  base_point_dist=2,
                  map_dist=50):
    
    test_df = df.copy()
    
    if isinstance(test_phone, list) == True:
        cond    = (test_df['phoneName'].isin(test_phone))&(test_df['collectionName']==test_collection) 
    else:
        cond    = (test_df['phoneName']==test_phone)&(test_df['collectionName']==test_collection) 
    
    df_soi  = test_df.loc[cond]\
                    .rename(columns={'phone': 'phoneName'})[['phoneName', 'millisSinceGpsEpoch', 'latDeg', 'lngDeg']]
    
    df_soi       = df_soi.sort_values(by='millisSinceGpsEpoch') 
    df_soi.index = range(len(df_soi))

    gt = pd.read_csv(f'../input/google-smartphone-decimeter-challenge/train/{train_collection}/{train_phone}/ground_truth.csv')
    gt = gt[['phoneName', 'millisSinceGpsEpoch', 'latDeg', 'lngDeg']]
    gt['phoneName'] = train_collection + '_' + gt['phoneName']


    base         = gt[['latDeg', 'lngDeg']].values.astype('float64')
    route        = df_soi[['latDeg', 'lngDeg']].values.astype('float64')

    base_reduce  = []
    ori_idx      = []

    for i, P in enumerate(base):
        if i == 0:
            base_reduce.append(base[0])
            ori_idx.append(0)
            continue

        dist = calc_haversine(*P, *base_reduce[-1])

        if dist >= base_point_dist:
            base_reduce.append(P)
            ori_idx.append(i)

    base_reduce   = np.array(base_reduce)
    base_idx      = np.array(ori_idx)

    INF   = 9 * 1e9
    dp    = np.ones((route.shape[0], base_reduce.shape[0])) * INF
    trace = np.ones((route.shape[0], base_reduce.shape[0])) * -1

    cands = []
    for P in route:
        P_rep  = np.tile(P[None, :], (base_reduce.shape[0], 1))
        dist   = calc_haversine(P_rep[:, 0], P_rep[:, 1], base_reduce[:, 0], base_reduce[:, 1])

        idx    = np.where(dist<map_dist)[0]
        cands.append(idx)


    for cand_idx in cands[0]:
        dp[0][cand_idx] = calc_haversine(*route[0], *base_reduce[cand_idx])

    for route_idx in tqdm(range(1, route.shape[0])):
        route_dis = calc_haversine(*route[route_idx], *route[route_idx-1])

        for cand_idx in cands[route_idx]:
            gap_dist  = calc_haversine(*route[route_idx], *base_reduce[cand_idx])

            map_dis   = 0
            for pre_idx in range(cand_idx, -1, -1): 
                map_dis += calc_haversine(*base_reduce[pre_idx], *base_reduce[pre_idx+1]) if pre_idx < cand_idx else 0

                if map_dis > route_dis + 40: break

                new_val = gap_dist + abs(map_dis - route_dis) + dp[route_idx-1][pre_idx]

                if new_val < dp[route_idx][cand_idx]:
                    dp[route_idx][cand_idx]    = new_val
                    trace[route_idx][cand_idx] = pre_idx    

    map_idx = [np.argmin(dp[-1])]

    for route_idx in range(route.shape[0]-1, 0, -1):
        idx = map_idx[-1]
        map_idx.append(trace[route_idx][int(idx)])

    map_idx     = np.array(map_idx[::-1]).astype('int32')
    map_segment = []

    for reduce_idx, P in zip(map_idx, route):
#         P_point  = Point(P)

        l, r     =  max(0, reduce_idx-15), min(reduce_idx+16, base_reduce.shape[0])
        map_segment.append(base_reduce[l : r])

#         segment  =  LineString(list(base_reduce[l : r]))
#         map_point.append((segment.interpolate(segment.project(P_point)).coords[0]))
    
#     test_df.loc[test_df['phone']==f'{test_collection}_{phone_test}', ['latDeg', 'lngDeg']] = map_point

    return map_segment, base_reduce


def vel_0_process(train_df):
    sub_df = train_df.copy()

    for (collection, phone), df_collection in tqdm(sub_df.groupby(['collectionName', 'phoneName'])): 
        cond     = (sub_df['collectionName']==collection)&(sub_df['phoneName']==phone)

        sub_df.loc[cond, ['Vx', 'Vy', 'Vz']] = filter_outlier(sub_df.loc[cond])[['Vx', 'Vy', 'Vz']].values

        df_         = sub_df.loc[cond].copy()
        df_['vel']  = df_[['Vx', 'Vy', 'Vz']].apply(lambda tup: np.linalg.norm(tup), axis=1)

        if collection in ['2021-04-28-US-SJC-1', '2021-04-29-US-SJC-2', '2021-04-22-US-SJC-1']:
            vel_thres = 0.87
        else:
            vel_thres = 0.87

        df_.loc[df_['vel'] < vel_thres, 'latDeg'] = np.nan
        df_.loc[df_['vel'] < vel_thres, 'lngDeg'] = np.nan

        df_ = df_.interpolate(method='pad')
        df_ = df_.interpolate(method='backfill')    

        sub_df.loc[cond, ['latDeg', 'lngDeg']] = df_[['latDeg', 'lngDeg']].values
        sub_df.loc[cond, 'vel']                = df_['vel'].values
        
    return sub_df

def get_param(collection_idx, param_set, group_collection):
    if collection_idx in group_collection[0]:
        return param_set[0], param_set[3+0]
    
    if collection_idx in group_collection[1]:
        return param_set[1], param_set[3+1]
    
    if collection_idx in group_collection[2]:
        return param_set[2], param_set[3+2]

    
def opt_process(df):
    train_df = df.copy()
    alpha_param      = 1.0
    beta_param       = 0.1

    group_collection = [[1, 2, 3, 4, 5, 6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21],
                        [22,23,25,26,28],
                        [24,27,29]]

    param_set        =  [0.1, 0.1, 0.1] + [1.0, 1.0, 1.0] # 


    for collection_idx, (collection, df_collection) in enumerate(train_df.groupby(['collectionName'])): 

        cond = train_df['collectionName']==collection

        delta_t        = train_df.loc[cond, 'millisSinceGpsEpoch'].diff().values[1:] / 1000.0

        xy_hat         = train_df.loc[cond, ['X', 'Y', 'Z']].values
        delta_xy_hat   = filter_outlier(train_df.loc[cond], thres=2.4)[['Vx', 'Vy', 'Vz']].values[1:, :] # train_df.loc[cond, ['Vx', 'Vy', 'Vz']].values[1:, :] 

        delta_xy_hat[np.where(np.linalg.norm(delta_xy_hat, axis=1)<1.0)] = 0
        delta_xy_hat   = delta_xy_hat * delta_t[:, None]

        beta_param, alpha_param = get_param(collection_idx+1, param_set, group_collection)

        N        = xy_hat.shape[0]
        alpha    = (alpha_param)**(-2) * np.ones(N)
        beta     = (beta_param + beta_param * delta_t)**(-2) # * np.ones(N)

        A  = scipy.sparse.spdiags(alpha, [0], N, N)
        B  = scipy.sparse.spdiags( beta, [0], N-1, N-1)
        D  = scipy.sparse.spdiags(np.stack([-np.ones(N), np.ones(N)]), [0, 1], N-1, N)

        Q       = A + (D.T @ B @ D)
        c       = (A @ xy_hat) + (D.T @ (B @ delta_xy_hat))
        xy_star = scipy.sparse.linalg.spsolve(Q, c)

        train_df.loc[cond, 'latDeg'] = ecef2lla(*xy_star.T)[0].T[0]
        train_df.loc[cond, 'lngDeg'] = ecef2lla(*xy_star.T)[1].T[0]
        
    return train_df


def map_to_ground_truth(route, map_segment, CHUNK_SIZE=20):
    '''
    Input: array of [lat, lng]
    fixed heightAboveWgs84EllipsoidM = 63.5
    
    --------------
    Return: (x, y, z) in ECEF
    '''
    
    map_route = []
    
    for P, segment in zip(route, map_segment):
        P_conv         =  Point(P)        
        segment_conv   =  LineString(segment)
        
        x,   y         = segment_conv.interpolate(segment_conv.project(P_conv)).coords[0]
        x_, y_,  z_    = pm.geodetic2ecef(x, y, 63.5)
        
        map_route.append([x_, y_, z_])

    return np.array(map_route).astype('float64')


data_dir = Path("../input/google-smartphone-decimeter-challenge")

def getBaseReduce(base, thres=5):
    base_reduce = []
    
    for i, P in enumerate(base):
        if i == 0:
            base_reduce.append(base[0])
            continue

        dist = calc_haversine(*P, *base_reduce[-1])

        if dist >= thres:
            base_reduce.append(P)
            
    return np.array(base_reduce)


def getCandidate(route, base_reduce, thres=30):
    cands = []
    
    for P in route:
        P_rep  = np.tile(P[None, :], (base_reduce.shape[0], 1))
        dist   = calc_haversine(P_rep[:, 0], P_rep[:, 1], base_reduce[:, 0], base_reduce[:, 1])

        idx    = np.where(dist<thres)[0]
        cands.append(idx)
        
    return cands

def get_map_data(train_group):
    train_df  = pd.read_csv(data_dir / "baseline_locations_train.csv")
    all_point = []

    for (collection_name, phone_name), df in train_df.groupby(["collectionName", "phoneName"]):
        if collection_name not in train_group: continue

        path   = data_dir / f"train/{collection_name}/{phone_name}/ground_truth.csv"
        df     = pd.read_csv(path)      

        ## 
#         if collection_name == '2021-04-28-US-MTV-1':
#             base         = df[['latDeg', 'lngDeg']][500: ].values.astype('float64')
#         else:

        base         = df[['latDeg', 'lngDeg']].values.astype('float64')
        base_reduce  = getBaseReduce(base)
        all_point.append(base_reduce)
    
    map_point   = np.concatenate(all_point)

    global2base = np.zeros(map_point.shape[0]).astype('int32')
    whichBase   = np.zeros(map_point.shape[0]).astype('int32')
    base2global = {}
    curLen      = 0

    for i, base in enumerate(all_point): 
        for j in range(base.shape[0]):
            global2base[curLen + j] = j
            whichBase[curLen + j]   = i
            base2global[(i, j)]     = curLen + j
        curLen += base.shape[0]

    return all_point, map_point, global2base, whichBase, base2global


def get_multiple_candidate_road_point(df, \
                                      test_phone, \
                                      test_collection, \
                                      all_point, map_point, global2base, whichBase, base2global, \
                                      map_thres       = 9, \
                                      candidate_thres = 100, \
                                      CHUNK_SIZE      = 20):

    sub = df.copy()
    
    if isinstance(test_phone, list) == True:
        cond    = (df['phoneName'].isin(test_phone))&(df['collectionName']==test_collection) 
    else:
        cond    = (df['phoneName']==test_phone)&(df['collectionName']==test_collection) 
    
    df_soi  = sub.loc[cond]\
                    .rename(columns={'phone': 'phoneName'})[['phoneName', 'millisSinceGpsEpoch', 'latDeg', 'lngDeg']]
    
    df_soi       = df_soi.sort_values(by='millisSinceGpsEpoch') 
    df_soi.index = range(len(df_soi))

    
    route        = df_soi[['latDeg', 'lngDeg']].values.astype('float64')
    candidate    = []

    for P in route:
        P_rep  = np.tile(P[None, :], (map_point.shape[0], 1))
        dist   = calc_haversine(P_rep[:, 0], P_rep[:, 1], map_point[:, 0], map_point[:, 1])
        idx    = np.where(dist<candidate_thres)[0]
        candidate.append(idx)

    route_concat   = []
    diffs          = []
    
    ## ground truth road candidates for each route segment (CHUNK_SIZE=20)
    ## for segment i, the candidates is road_candidate[i] (a list of LineString object)
    road_candidate = []
    
    for chunk_idx in tqdm(range(0, route.shape[0], CHUNK_SIZE)):
        min_diff = float('inf')
        segment = route[chunk_idx: min(chunk_idx+CHUNK_SIZE, route.shape[0])]
        cands   = candidate[chunk_idx: min(chunk_idx+CHUNK_SIZE, route.shape[0])]

        INF    = 9 * 1e9
        dp     = np.ones((segment.shape[0], map_point.shape[0])) * INF
        trace  = np.ones((segment.shape[0], map_point.shape[0])) * -1

        for cand_idx in cands[0]:
            dp[0][cand_idx] = calc_haversine(*route[0], *map_point[cand_idx])

        for route_idx in range(1, segment.shape[0]):
            route_dis = calc_haversine(*segment[route_idx], *segment[route_idx-1])

            for cand_idx in cands[route_idx]:
                gap_dist  = calc_haversine(*segment[route_idx], *map_point[cand_idx])

                map_dis   = 0
                baseIndex = global2base[cand_idx]
                basePos   = whichBase[cand_idx]

                for preBaseIdx in range(baseIndex, -1, -1): 
                    preGlobalIndex = base2global[(basePos, preBaseIdx)]

                    map_dis += calc_haversine(*all_point[basePos][preBaseIdx], *all_point[basePos][preBaseIdx+1]) \
                               if preBaseIdx < baseIndex else 0

                    if map_dis > route_dis + 40: break

                    new_val = 2 * gap_dist + abs(map_dis - route_dis) + dp[route_idx-1][preGlobalIndex]

                    if new_val < dp[route_idx][cand_idx]:
                        dp[route_idx][cand_idx]    = new_val
                        trace[route_idx][cand_idx] = preGlobalIndex    
        
        
        gt_candidate = []
        valid_idx    = np.where(dp[-1] != INF)[0]
        base_pool    = set()
        
        for last_idx in sorted(valid_idx, key=lambda idx: dp[-1][idx]):
        
            ## get only one road segment from one base groundtruth
            basePos   = whichBase[last_idx]
            if basePos in base_pool:
                continue
            
            map_idx      = [last_idx]

            for route_idx in range(segment.shape[0]-1, 0, -1):
                index = map_idx[-1]
                map_idx.append(trace[route_idx][int(index)])

            map_idx   = np.array(map_idx[::-1]).astype('int32')
            map_route = []
            
            min_l, max_r  = INF, 0, 
            
            for global_idx, P in zip(map_idx, segment):
                P_point   = Point(P) 

                baseIndex = global2base[global_idx]
                basePos   = whichBase[global_idx]

                l, r      =  max(0, baseIndex-5), min(baseIndex+6, all_point[basePos].shape[0])
                geo       =  LineString(list(all_point[basePos][l : r]))
                map_route.append((geo.interpolate(geo.project(P_point)).coords[0]))
                
                min_l     = min(min_l, l)
                max_r     = max(max_r, r)
                
            map_route = np.array(map_route).astype('float64')
            mean_diff = np.mean(calc_haversine(segment[:, 0], segment[:, 1], map_route[:, 0], map_route[:, 1]))
            min_diff=min(min_diff, mean_diff)
            if mean_diff < map_thres:            
                geo  =  LineString(list(all_point[basePos][min_l : max_r]))
                gt_candidate.append(geo)
                base_pool.add(basePos)
                
        diffs.append(min_diff)
        if len(gt_candidate) > 0:
            road_candidate.append([gt_candidate[0]])
        else:
            road_candidate.append([])
            
    return road_candidate, diffs

def map_to_multiple_ground_truth(route, map_segment, CHUNK_SIZE=20):
    '''
    Input: array of [lat, lng]
    fixed heightAboveWgs84EllipsoidM = 63.5
    
    --------------
    Return: (x, y, z) in ECEF
    '''
    
    map_route = []
    
    for point_idx, P in enumerate(route):
        P_conv         =  Point(P)       
        segment_arr    =  map_segment[point_idx // CHUNK_SIZE]
        
        if len(segment_arr) == 0:
            x_, y_,  z_ = pm.geodetic2ecef(P[0], P[1], 63.5)
            map_route.append([x_, y_, z_])
            continue
            
        min_diff = float('inf')
        choose   = None
        for segment in segment_arr:
            x,   y         = segment.interpolate(segment.project(P_conv)).coords[0]
            diff           = calc_haversine(P[0], P[1], x, y)  # np.linalg.norm(P - np.array([x, y]).astype('float64'))
             
            if diff < min_diff:
                x_, y_,  z_    = pm.geodetic2ecef(x, y, 63.5)
                min_diff       = diff
                choose         = [x_, y_, z_]
                
        map_route.append(choose)

    return np.array(map_route).astype('float64')

-------------
## Test

In [None]:
def advanced_opt(test_df, map_segment, cond, map_function=map_to_ground_truth, CHUNK_SIZE=20):
    
    test_opt_df    = test_df.copy()
    delta_t        = test_df.loc[cond, 'millisSinceGpsEpoch'].diff().values[1:] / 1000.0    
    xy_hat         = test_df.loc[cond, ['X', 'Y', 'Z']].values
    delta_xy_hat   = filter_outlier(test_df.loc[cond], thres=2.4)[['Vx', 'Vy', 'Vz']].values[1:, :] # train_df.loc[cond, ['Vx', 'Vy', 'Vz']].values[1:, :] 

    delta_xy_hat[np.where(np.linalg.norm(delta_xy_hat, axis=1)<0.87)] = 0
    delta_xy_hat   = delta_xy_hat * delta_t[:, None]

    alpha_param      = 1.0
    beta_param       = 0.1
    gamma_param      = 0.2

    N        = xy_hat.shape[0]
    alpha    = (alpha_param)**(-2) * np.ones(N)
    beta     = (beta_param + beta_param * delta_t)**(-2) # * np.ones(N)
    gamma    = (gamma_param**(-2) * np.ones(N))

    A  = scipy.sparse.spdiags(alpha, [0], N, N)
    B  = scipy.sparse.spdiags( beta, [0], N-1, N-1)
    D  = scipy.sparse.spdiags(np.stack([-np.ones(N), np.ones(N)]), [0, 1], N-1, N)
    C  = scipy.sparse.spdiags(gamma, [0], N, N)

    Q        = A + (D.T @ B @ D)
    c        = (A @ xy_hat) + (D.T @ (B @ delta_xy_hat))
    xy_star  = scipy.sparse.linalg.spsolve(Q, c)

    res        = ecef2lla(*xy_star.T)
    to_lat_lng = np.concatenate((res[0], res[1]), axis=1)

    for r in tqdm(np.arange(0.01, 0.99, 0.01)):
        xy_map     = map_function(to_lat_lng, map_segment, CHUNK_SIZE)

        Q = ((1 - r) * A) + (r * C) + D.T @ B @ D
        c = ((1 - r) * (A @ xy_hat)) + (r * (C @ xy_map)) + (D.T @ (B @ delta_xy_hat))
        xy_star = scipy.sparse.linalg.spsolve(Q, c)

        res        = ecef2lla(*xy_star.T)
        to_lat_lng = np.concatenate((res[0], res[1]), axis=1)

    res        = ecef2lla(*xy_star.T)
    to_lat_lng = np.concatenate((res[0], res[1]), axis=1)
    test_opt_df.loc[cond, ['latDeg', 'lngDeg']] = to_lat_lng 

    return test_opt_df

In [None]:
test_opt_chunk  = add_velocity('../input/submission-test/4.342_(0429MTV)_(0428_0421_0316R)_(0426)_thres9_(phone_mean)_3.875.csv')

## (04-29SJC)-(04-29MTV)
map_segment, base_reduce = get_candidate_road_point(test_opt_chunk, \
                                                    '2021-04-22-US-SJC-1', # train_collection_arr
                                                    '2021-04-29-US-SJC-3', # test_collection_arr
                                                    'Pixel4',                            # train_phone_arr
                                                    ['SamsungS20Ultra', 'Pixel4'], )     # test_phone_arr

cond               = (test_opt_chunk['collectionName'] == '2021-04-29-US-SJC-3')
test_opt_chunk     = advanced_opt(test_opt_chunk, map_segment, cond)

map_segment, base_reduce = get_candidate_road_point(test_opt_chunk, \
                                                    '2021-04-29-US-MTV-1', # train_collection_arr
                                                    '2021-04-29-US-MTV-2', # test_collection_arr
                                                    'Pixel4',                                    # train_phone_arr
                                                    ['Pixel4', 'Pixel5', 'SamsungS20Ultra'])     # test_phone_arr

cond               = (test_opt_chunk['collectionName'] == '2021-04-29-US-MTV-2')
test_opt_chunk     = advanced_opt(test_opt_chunk, map_segment, cond)

In [None]:
test_opt_chunk['index'] = test_opt_chunk.groupby(['collectionName', 'phoneName']).apply(lambda gr: pd.Series(range(len(gr)))).values
show_gt(test_opt_chunk)

In [None]:
test_opt_df  = vel_0_process(test_opt_chunk)
test_mean    = mean_with_other_phones(test_opt_df)
test_mean    = test_mean.sort_values(by=['phone', "millisSinceGpsEpoch"])

test_mean[['phone', 'millisSinceGpsEpoch', 'latDeg', 'lngDeg']].\
to_csv('3.875_(0429SJC_0429MTV)_(vel_0)_(phone_mean_ver2).csv', index=False)

# test_mean[['phone', 'millisSinceGpsEpoch', 'latDeg', 'lngDeg']].to_csv('4.342_(0429SJC_0429MTV)_(0428_0421_0316R)_(0426)_(vel_0)_(phone_mean_ver2).csv', index=False)
# test_mean[['phone', 'millisSinceGpsEpoch', 'latDeg', 'lngDeg']].to_csv('3.875_(0429SJC_0429MTV)_(0428_0421_0316R)_(vel_0)_(phone_mean_ver2).csv', index=False)
# test_mean[['phone', 'millisSinceGpsEpoch', 'latDeg', 'lngDeg']].to_csv('3.875_(0429SJC_0429MTV)_(vel_0)_(phone_mean_ver2).csv', index=False)
# test_mean[['phone', 'millisSinceGpsEpoch', 'latDeg', 'lngDeg']].to_csv('3.707_(0429SJC_0429MTV)_(vel_0)_(phone_mean_ver2).csv', index=False)
# test_mean[['phone', 'millisSinceGpsEpoch', 'latDeg', 'lngDeg']].to_csv('3.463_(0429SJC_0429MTV)_(vel_0)_(phone_mean_ver2).csv', index=False)
# test_mean[['phone', 'millisSinceGpsEpoch', 'latDeg', 'lngDeg']].to_csv('3.875_(0429SJC_adv_(0422_map))_(vel_0)_(phone_mean_ver2).csv', index=False)