In [1]:
# import library
import os
import pandas as pd
pd.options.display.max_columns = 100
import numpy as np
import matplotlib.pyplot as plt
from matplotlib_venn import venn2, venn2_circles
import seaborn as sns
from tqdm.notebook import tqdm
import pathlib
import plotly
import plotly.express as px

In [2]:
def calc_haversine(lat1, lon1, lat2, lon2):
    """Calculates the great circle distance between two points
    on the earth. Inputs are array-like and specified in decimal degrees.
    """
    RADIUS = 6_367_000
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2)**2 + \
        np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    dist = 2 * RADIUS * np.arcsin(a**0.5)
    return dist

In [3]:
def visualize_trafic(df, center, zoom=9):
    fig = px.scatter_mapbox(df,
                            # Here, plotly gets, (x,y) coordinates
                            lat="latDeg",
                            lon="lngDeg",
                            
                            #Here, plotly detects color of series
                            color="phoneName",
                            labels="phoneName",
                            
                            zoom=zoom,
                            center=center,
                            height=600,
                            width=800)
    fig.update_layout(mapbox_style='stamen-terrain')
    fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
    fig.update_layout(title_text="GPS trafic")
    fig.show()
    
def visualize_collection(df, collection):
    target_df = df[df['collectionName']==collection].copy()
    lat_center = target_df['latDeg'].mean()
    lng_center = target_df['lngDeg'].mean()
    center = {"lat":lat_center, "lon":lng_center}
    
    visualize_trafic(target_df, center)

In [4]:
def add_distance_diff(df):
    # shift(1): 上のやつが1個下に下がる → 前のデータ
    # shift(-1): 下のやつが1個上に上がる → 次のデータ
    df['latDeg_prev'] = df['latDeg'].shift(1)
    df['latDeg_next'] = df['latDeg'].shift(-1)
    df['lngDeg_prev'] = df['lngDeg'].shift(1)
    df['lngDeg_next'] = df['lngDeg'].shift(-1)
    df['phone_prev'] = df['phone'].shift(1)
    df['phone_next'] = df['phone'].shift(-1)
    
    df['latDeg_gt_prev'] = df['latDeg_gt'].shift(1)
    df['latDeg_gt_next'] = df['latDeg_gt'].shift(-1)
    df['lngDeg_gt_prev'] = df['lngDeg_gt'].shift(1)
    df['lngDeg_gt_next'] = df['lngDeg_gt'].shift(-1)
    
    df['latDeg_prev_diff'] = df['latDeg'] - df['latDeg_prev']
    df['latDeg_next_diff'] = df['latDeg_next'] - df['latDeg']
    df['latDeg_gt_prev_diff'] = df['latDeg_gt'] - df['latDeg_gt_prev']
    df['latDeg_gt_next_diff'] = df['latDeg_gt_next'] - df['latDeg_gt']
    
    df['lngDeg_prev_diff'] = df['lngDeg'] - df['lngDeg_prev']
    df['lngDeg_next_diff'] = df['lngDeg_next'] - df['lngDeg']
    df['lngDeg_gt_prev_diff'] = df['lngDeg_gt'] - df['lngDeg_gt_prev']
    df['lngDeg_gt_next_diff'] = df['lngDeg_gt_next'] - df['lngDeg_gt']
    
    df['dist_prev'] = calc_haversine(df['latDeg'], df['lngDeg'], df['latDeg_prev'], df['lngDeg_prev'])
    df['dist_next'] = calc_haversine(df['latDeg'], df['lngDeg'], df['latDeg_next'], df['lngDeg_next'])
    
    df['dist_gt_prev'] = calc_haversine(df['latDeg_gt'], df['lngDeg_gt'], df['latDeg_gt_prev'], df['lngDeg_gt_prev'])
    df['dist_gt_next'] = calc_haversine(df['latDeg_gt'], df['lngDeg_gt'], df['latDeg_gt_next'], df['lngDeg_gt_next'])
    
    df.loc[df['phone']!=df['phone_prev'], ['latDeg_prev', 'lngDeg_prev', 'dist_prev', 
                                           'latDeg_gt_prev', 'lngDeg_gt_prev', 'dist_gt_prev',
                                          'latDeg_prev_diff', 'latDeg_gt_prev_diff',
                                          'lngDeg_prev_diff', 'lngDeg_gt_prev_diff']] = np.nan
    
    df.loc[df['phone']!=df['phone_next'], ['latDeg_next', 'lngDeg_next', 'dist_next', 
                                           'latDeg_gt_next', 'lngDeg_gt_next', 'dist_gt_next',
                                          'latDeg_next_diff', 'latDeg_gt_next_diff',
                                          'lngDeg_next_diff', 'lngDeg_gt_next_diff']] = np.nan
    
    return df

In [5]:
INPUT = '../input/google-smartphone-decimeter-challenge'

# filtered_train = pd.read_csv(INPUT + '/' + 'baseline_locations_filtered_train.csv')
filtered_train = pd.read_csv('../output/filtered_nb017.csv')
test = pd.read_csv(INPUT + '/' + 'baseline_locations_test.csv')
sample_sub = pd.read_csv(INPUT + '/' + 'sample_submission.csv')

# ground truth
p = pathlib.Path(INPUT)
gt_files = list(p.glob('train/*/*/ground_truth.csv'))

gts = []
for gt_file in gt_files:
    gts.append(pd.read_csv(gt_file))
ground_truth = pd.concat(gts)

## EDA

In [6]:
# sub形式にしたfiltered_train済みbaselineを、gtとmergeできる形にする
filtered_train['collectionName'] = filtered_train['phone'].apply(lambda x: x.split('_')[0])
filtered_train['phoneName'] = filtered_train['phone'].apply(lambda x: x.split('_')[1])

## visualize traffic

## graph

In [7]:
ground_truth = ground_truth.rename(columns={'latDeg':'latDeg_gt', 'lngDeg':'lngDeg_gt',
                                           'heightAboveWgs84EllipsoidM':'heightAboveWgs84EllipsoidM_gt'})
filtered_train = filtered_train.merge(ground_truth, on=['collectionName', 'phoneName', 'millisSinceGpsEpoch'], how='inner')
filtered_train['dist_err'] = calc_haversine(filtered_train['latDeg_gt'], filtered_train['lngDeg_gt'], filtered_train['latDeg'], filtered_train['lngDeg'])
filtered_train = add_distance_diff(filtered_train)

### visualize_err_move_dist
- baselineとgtとの誤差
- baselineの移動距離、gtの移動距離とスピードのグラフ

In [22]:
def visualize_err_move_dist(df, phone, reject_outlier=True):
    df = df[df['phone']==phone]
    if reject_outlier:
        th = (df['dist_err'].std()*3) + df['dist_err'].mean() # 99.7%信頼区間
        print('outlier')
        display(df[df['dist_err']>th].describe()) # gtとの距離
        print()
        print('rejected')
        display(df[df['dist_err']<th].describe()) # gtとの距離

In [25]:
visualize_err_move_dist(filtered_train, '2020-05-14-US-MTV-1_Pixel4XLModded', reject_outlier=True)

outlier


Unnamed: 0,millisSinceGpsEpoch,latDeg,lngDeg,latDeg_gt,lngDeg_gt,heightAboveWgs84EllipsoidM_gt,timeSinceFirstFixSeconds,hDop,vDop,speedMps,courseDegree,dist_err,latDeg_prev,latDeg_next,lngDeg_prev,lngDeg_next,latDeg_gt_prev,latDeg_gt_next,lngDeg_gt_prev,lngDeg_gt_next,latDeg_prev_diff,latDeg_next_diff,latDeg_gt_prev_diff,latDeg_gt_next_diff,lngDeg_prev_diff,lngDeg_next_diff,lngDeg_gt_prev_diff,lngDeg_gt_next_diff,dist_prev,dist_next,dist_gt_prev,dist_gt_next
count,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0
mean,1273530000000.0,37.546134,-122.264418,37.546143,-122.264417,34.8685,1573.35,15.7595,0.0,6.3695,191.7875,6.233414,37.552553,37.540921,-122.273328,-122.257045,37.552559,37.540931,-122.27332,-122.257045,3.3e-05,3.2e-05,3.444678e-05,3.428412e-05,-5.4e-05,-5.4e-05,-5.956948e-05,-5.966519e-05,8.519316,8.175576,6.703841,6.737627
std,754036.2,0.103561,0.146733,0.103578,0.146729,2.134085,754.03622,65.553531,0.0,11.100579,78.115663,2.218424,0.102215,0.103608,0.145073,0.14679,0.102235,0.103626,0.145068,0.146789,6.6e-05,6.2e-05,6.143159e-05,6.165781e-05,0.00011,0.000108,0.0001034467,0.0001036853,10.457543,10.224045,11.26995,11.278669
min,1273529000000.0,37.423563,-122.405673,37.423575,-122.40561,33.25,554.45,0.0,0.0,0.0,12.8,3.686671,37.423872,37.423569,-122.405673,-122.405673,37.423907,37.423575,-122.40561,-122.40561,-3.3e-05,-3.3e-05,-8.9697e-06,-1.28148e-05,-0.000253,-0.000253,-0.0002290331,-0.0002302842,0.169354,0.169354,0.0,0.0
25%,1273530000000.0,37.45762,-122.405628,37.457589,-122.40561,33.59,953.95,0.0,0.0,0.0,161.7,4.57003,37.468789,37.446449,-122.405635,-122.405604,37.468748,37.446429,-122.40561,-122.40561,-1e-05,-7e-06,-8.000001e-10,-3.700002e-09,-0.00012,-0.000114,-0.0001136961,-0.0001137003,1.549212,1.458752,3.5e-05,3.5e-05
50%,1273531000000.0,37.557563,-122.280685,37.557588,-122.280721,33.59,1668.95,1.9,0.0,0.0,161.7,5.54989,37.645742,37.469505,-122.405522,-122.156069,37.645823,37.469488,-122.40561,-122.156063,2e-06,5e-06,0.0,0.0,2e-06,1e-06,0.0,0.0,3.559674,3.559674,0.000113,0.001088
75%,1273531000000.0,37.645797,-122.139722,37.645823,-122.139723,35.1075,2294.7,1.9,0.0,6.8625,199.8625,8.267928,37.645807,37.64578,-122.154814,-122.124644,37.645823,37.645823,-122.154806,-122.124639,7.5e-05,7.5e-05,6.703835e-05,6.703835e-05,1.3e-05,1.1e-05,8.000001e-10,8.000001e-10,13.225291,13.225291,13.045597,13.255859
max,1273531000000.0,37.645836,-122.090273,37.645823,-122.090305,38.5,2299.45,294.24,0.0,25.23,311.4,9.985367,37.645836,37.645836,-122.090279,-122.090273,37.645823,37.645823,-122.090307,-122.0903,0.000159,0.000159,0.0001347548,0.0001352247,3.3e-05,3.3e-05,4.4711e-06,5.0646e-06,27.95598,27.95598,25.133007,25.26506



rejected


Unnamed: 0,millisSinceGpsEpoch,latDeg,lngDeg,latDeg_gt,lngDeg_gt,heightAboveWgs84EllipsoidM_gt,timeSinceFirstFixSeconds,hDop,vDop,speedMps,courseDegree,dist_err,latDeg_prev,latDeg_next,lngDeg_prev,lngDeg_next,latDeg_gt_prev,latDeg_gt_next,lngDeg_gt_prev,lngDeg_gt_next,latDeg_prev_diff,latDeg_next_diff,latDeg_gt_prev_diff,latDeg_gt_next_diff,lngDeg_prev_diff,lngDeg_next_diff,lngDeg_gt_prev_diff,lngDeg_gt_next_diff,dist_prev,dist_next,dist_gt_prev,dist_gt_next
count,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0,1726.0
mean,1273530000000.0,37.523542,-122.248636,37.523535,-122.24864,34.897503,1425.253592,5.944988,0.0,23.187323,283.707561,1.126192,37.523413,37.52367,-122.248456,-122.248816,37.523407,37.523664,-122.24846,-122.24882,0.000128,0.000128,0.000128,0.000128,-0.00018,-0.00018,-0.00018,-0.00018,23.189637,23.193421,23.164075,23.163703
std,500621.3,0.07065,0.107059,0.07065,0.107057,1.485504,500.621332,40.085385,0.0,8.010981,86.248645,0.55314,0.070629,0.070671,0.107056,0.107061,0.070629,0.070671,0.107054,0.107059,7.7e-05,7.7e-05,7.7e-05,7.7e-05,9.6e-05,9.7e-05,9.7e-05,9.7e-05,7.916794,7.912876,7.998949,7.999586
min,1273529000000.0,37.422305,-122.406101,37.422293,-122.406101,31.36,555.45,0.0,0.0,0.0,0.5,0.039272,37.422305,37.422305,-122.406101,-122.406101,37.422293,37.422293,-122.406101,-122.406101,-9.4e-05,-9.4e-05,-0.000102,-0.000102,-0.000327,-0.000327,-0.000327,-0.000327,0.113654,0.113654,0.0,0.0
25%,1273530000000.0,37.46266,-122.342377,37.462653,-122.342375,34.04,990.7,1.0,0.0,22.3075,293.8125,0.751448,37.462546,37.462773,-122.342074,-122.34268,37.462539,37.462768,-122.342071,-122.34268,8.1e-05,8.1e-05,8e-05,8e-05,-0.000245,-0.000245,-0.000245,-0.000245,22.278204,22.278204,22.296471,22.296471
50%,1273530000000.0,37.513179,-122.255112,37.513164,-122.255113,34.53,1426.95,1.3,0.0,26.49,314.2,1.037354,37.512997,37.513362,-122.254905,-122.255319,37.512982,37.513348,-122.254905,-122.25532,0.000155,0.000155,0.000155,0.000155,-0.000199,-0.000199,-0.000199,-0.000199,26.45243,26.455418,26.452583,26.452583
75%,1273531000000.0,37.587048,-122.144359,37.587041,-122.144369,35.04,1858.2,1.8,0.0,27.7175,324.25,1.472565,37.58704,37.587056,-122.144165,-122.144552,37.587033,37.587049,-122.144174,-122.144563,0.000185,0.000185,0.000186,0.000186,-0.000163,-0.000163,-0.000164,-0.000164,27.655902,27.655902,27.670267,27.670267
max,1273531000000.0,37.646597,-122.090281,37.646596,-122.090296,41.56,2289.45,655.3,0.0,29.94,359.95,3.535595,37.646597,37.646597,-122.090273,-122.090287,37.646596,37.646596,-122.090296,-122.090296,0.000258,0.000258,0.000256,0.000256,0.00017,0.00017,0.000169,0.000169,30.156018,30.156018,29.911546,29.911546


In [26]:
filtered_train

Unnamed: 0,phone,millisSinceGpsEpoch,latDeg,lngDeg,collectionName,phoneName,latDeg_gt,lngDeg_gt,heightAboveWgs84EllipsoidM_gt,timeSinceFirstFixSeconds,hDop,vDop,speedMps,courseDegree,dist_err,latDeg_prev,latDeg_next,lngDeg_prev,lngDeg_next,phone_prev,phone_next,latDeg_gt_prev,latDeg_gt_next,lngDeg_gt_prev,lngDeg_gt_next,latDeg_prev_diff,latDeg_next_diff,latDeg_gt_prev_diff,latDeg_gt_next_diff,lngDeg_prev_diff,lngDeg_next_diff,lngDeg_gt_prev_diff,lngDeg_gt_next_diff,dist_prev,dist_next,dist_gt_prev,dist_gt_next
0,2020-05-14-US-MTV-1_Pixel4,1273529463442,37.423549,-122.094006,2020-05-14-US-MTV-1,Pixel4,37.423576,-122.094132,33.21,551.44,1.1,0.0,0.0,12.7,11.478435,,37.423563,,-122.094056,,2020-05-14-US-MTV-1_Pixel4,,37.423576,,-122.094132,,1.313517e-05,,2.999982e-10,,-0.000049,,-1.700002e-09,,4.603970,,0.000154
1,2020-05-14-US-MTV-1_Pixel4,1273529464442,37.423563,-122.094056,2020-05-14-US-MTV-1,Pixel4,37.423576,-122.094132,33.21,552.44,1.1,0.0,0.0,12.7,6.889903,37.423549,37.423571,-122.094006,-122.094090,2020-05-14-US-MTV-1_Pixel4,2020-05-14-US-MTV-1_Pixel4,37.423576,37.423576,-122.094132,-122.094132,1.313517e-05,8.610714e-06,2.999982e-10,1.040000e-08,-0.000049,-0.000035,-1.700002e-09,3.400004e-09,4.603970,3.198768,0.000154,0.001194
2,2020-05-14-US-MTV-1_Pixel4,1273529465442,37.423571,-122.094090,2020-05-14-US-MTV-1,Pixel4,37.423576,-122.094132,33.21,553.44,1.1,0.0,0.0,12.7,3.713227,37.423563,37.423576,-122.094056,-122.094109,2020-05-14-US-MTV-1_Pixel4,2020-05-14-US-MTV-1_Pixel4,37.423576,37.423576,-122.094132,-122.094132,8.610714e-06,4.774443e-06,1.040000e-08,1.140000e-08,-0.000035,-0.000019,3.400004e-09,1.600000e-09,3.198768,1.722592,0.001194,0.001275
3,2020-05-14-US-MTV-1_Pixel4,1273529466442,37.423576,-122.094109,2020-05-14-US-MTV-1,Pixel4,37.423576,-122.094132,33.20,554.44,1.1,0.0,0.0,12.7,2.036209,37.423571,37.423577,-122.094090,-122.094116,2020-05-14-US-MTV-1_Pixel4,2020-05-14-US-MTV-1_Pixel4,37.423576,37.423576,-122.094132,-122.094132,4.774443e-06,6.321866e-07,1.140000e-08,9.900006e-09,-0.000019,-0.000007,1.600000e-09,-1.600000e-09,1.722592,0.612324,0.001275,0.001109
4,2020-05-14-US-MTV-1_Pixel4,1273529467442,37.423577,-122.094116,2020-05-14-US-MTV-1,Pixel4,37.423576,-122.094132,33.20,555.44,1.1,0.0,0.0,12.7,1.429706,37.423576,37.423573,-122.094109,-122.094123,2020-05-14-US-MTV-1_Pixel4,2020-05-14-US-MTV-1_Pixel4,37.423576,37.423576,-122.094132,-122.094132,6.321866e-07,-3.707087e-06,9.900006e-09,1.350000e-08,-0.000007,-0.000008,-1.600000e-09,-3.400004e-09,0.612324,0.782343,0.001109,0.001530
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131337,2021-04-29-US-SJC-2_SamsungS20Ultra,1303760315000,37.334462,-121.899602,2021-04-29-US-SJC-2,SamsungS20Ultra,37.334475,-121.899613,56.62,2632.00,1.1,0.0,0.0,60.0,1.724668,37.334468,37.334468,-121.899606,-121.899596,2021-04-29-US-SJC-2_SamsungS20Ultra,2021-04-29-US-SJC-2_SamsungS20Ultra,37.334475,37.334475,-121.899613,-121.899613,-5.467225e-06,5.484582e-06,5.000004e-09,4.999997e-09,0.000004,0.000007,0.000000e+00,0.000000e+00,0.682730,0.844598,0.000556,0.000556
131338,2021-04-29-US-SJC-2_SamsungS20Ultra,1303760316000,37.334468,-121.899596,2021-04-29-US-SJC-2,SamsungS20Ultra,37.334475,-121.899613,56.62,2633.00,1.1,0.0,0.0,60.0,1.764401,37.334462,37.334475,-121.899602,-121.899591,2021-04-29-US-SJC-2_SamsungS20Ultra,2021-04-29-US-SJC-2_SamsungS20Ultra,37.334475,37.334475,-121.899613,-121.899613,5.484582e-06,7.761514e-06,4.999997e-09,1.700002e-09,0.000007,0.000005,0.000000e+00,-1.600000e-09,0.844598,0.964051,0.000556,0.000236
131339,2021-04-29-US-SJC-2_SamsungS20Ultra,1303760317000,37.334475,-121.899591,2021-04-29-US-SJC-2,SamsungS20Ultra,37.334475,-121.899613,56.62,2634.00,0.9,0.0,0.0,60.0,2.000396,37.334468,37.334483,-121.899596,-121.899587,2021-04-29-US-SJC-2_SamsungS20Ultra,2021-04-29-US-SJC-2_SamsungS20Ultra,37.334475,37.334475,-121.899613,-121.899613,7.761514e-06,7.499000e-06,1.700002e-09,-4.330000e-08,0.000005,0.000004,-1.600000e-09,3.300002e-09,0.964051,0.915546,0.000236,0.004821
131340,2021-04-29-US-SJC-2_SamsungS20Ultra,1303760318000,37.334483,-121.899587,2021-04-29-US-SJC-2,SamsungS20Ultra,37.334475,-121.899613,56.63,2635.00,1.0,0.0,0.0,60.0,2.540668,37.334475,37.334489,-121.899591,-121.899580,2021-04-29-US-SJC-2_SamsungS20Ultra,2021-04-29-US-SJC-2_SamsungS20Ultra,37.334475,37.334475,-121.899613,-121.899613,7.499000e-06,6.076747e-06,-4.330000e-08,-3.170000e-08,0.000004,0.000007,3.300002e-09,0.000000e+00,0.915546,0.909987,0.004821,0.003523
