# eda014
OrientationDegの可視化

In [1]:
import os
import pandas as pd
import numpy as np
import ipynb_path
import matplotlib.pyplot as plt
from matplotlib_venn import venn2, venn2_circles
import seaborn as sns
import plotly
import plotly.express as px
%matplotlib inline
pd.set_option('display.max_rows', 200)

In [2]:
def get_nb_name():
    nb_path = ipynb_path.get()
    nb_name = nb_path.rsplit('/',1)[1].replace('.ipynb','')
    return nb_name

In [3]:
def visualize_trafic(df, center, zoom=9):
    fig = px.scatter_mapbox(df,
                            
                            # Here, plotly gets, (x,y) coordinates
                            lat="latDeg",
                            lon="lngDeg",
                            
                            #Here, plotly detects color of series
                            color="phoneName",
                            labels="phoneName",
                            
                            zoom=zoom,
                            center=center,
                            height=1000,
                            width=2000)
    fig.update_layout(mapbox_style='stamen-terrain')
    fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
    fig.update_layout(title_text="GPS trafic")
    fig.show()

In [4]:
def visualize_collection(df, collection):
    target_df = df[df['collectionName']==collection].copy()
    lat_center = target_df['latDeg'].mean()
    lng_center = target_df['lngDeg'].mean()
    center = {"lat":lat_center, "lon":lng_center}
    
    visualize_trafic(target_df, center)

In [5]:
def calc_haversine(lat1, lon1, lat2, lon2):
    """Calculates the great circle distance between two points
    on the earth. Inputs are array-like and specified in decimal degrees.
    """
    RADIUS = 6_367_000
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2)**2 + \
        np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    dist = 2 * RADIUS * np.arcsin(a**0.5)
    return dist

In [6]:
# directory setting
nb_name = get_nb_name()
INPUT = '../input/google-smartphone-decimeter-challenge'
OUTPUT = '../output/' + nb_name
os.makedirs(OUTPUT, exist_ok=True)

In [7]:
# reading data
base_train = pd.read_csv(INPUT + '/' + 'baseline_locations_train.csv')
base_test = pd.read_csv(INPUT + '/' + 'baseline_locations_test.csv')
sample_sub = pd.read_csv(INPUT + '/' + 'sample_submission.csv')
ground_truth = pd.read_csv(INPUT + '/prep/ground_truth_train.csv')

In [8]:
ori_train = pd.read_csv(INPUT + '/prep/gnss/train/OrientationDeg.csv')
ori_test = pd.read_csv(INPUT + '/prep/gnss/test/OrientationDeg.csv')

In [11]:
def viz(df, gnss, df_cols, gnss_cols, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    gnss['millisSinceGpsEpoch'] = gnss['utcTimeMillis'] - 315964800000 + 18000
    gnss['phone'] = gnss['collectionName'] + '_' + gnss['phoneName']
    df['phone'] = df['collectionName'] + '_' + df['phoneName']
    phones = df['phone'].unique()
    
    df_cols_n = len(df_cols)
    gnss_cols_n = len(gnss_cols)
    cols_n = df_cols_n + gnss_cols_n
    
    for phone in phones:
        df_tmp = df[df['phone']==phone].copy()
        gnss_tmp = gnss[gnss['phone']==phone].copy()
        
        if len(gnss_tmp) == 0:
            continue
        
        fig, axes = plt.subplots(figsize=(5*cols_n, 20), nrows=cols_n,sharex=True)
        for i,c in enumerate(df_cols):
            axes[i].plot(df_tmp['millisSinceGpsEpoch'], df_tmp[c], label=c)
        for j,c in enumerate(gnss_cols):
            axes[i+j+1].plot(gnss_tmp['millisSinceGpsEpoch'], gnss_tmp[c], label=c)
        for k in range(cols_n):
            axes[k].legend(loc='upper right')
            axes[k].grid(color='g', linestyle=':', linewidth=0.3)
        fig.suptitle(phone, fontsize=16)
        fig.savefig(output_dir + '/' + phone + '.png')
        plt.close()

In [13]:
ori_train.columns

Index(['collectionName', 'phoneName', 'utcTimeMillis', 'elapsedRealtimeNanos',
       'yawDeg', 'rollDeg', 'pitchDeg'],
      dtype='object')

In [14]:
viz(ground_truth,
    ori_train,
    df_cols=['speedMps', 'courseDegree'],
    gnss_cols=['yawDeg', 'rollDeg', 'pitchDeg'],
    output_dir= OUTPUT + '/gnss_ts/gt/ori')

In [15]:
def add_distance_diff(df):
    df['latDeg_prev'] = df['latDeg'].shift(1)
    df['latDeg_next'] = df['latDeg'].shift(-1)
    df['lngDeg_prev'] = df['lngDeg'].shift(1)
    df['lngDeg_next'] = df['lngDeg'].shift(-1)
    df['phone_prev'] = df['phone'].shift(1)
    df['phone_next'] = df['phone'].shift(-1)
    
    df['dist_prev'] = calc_haversine(df['latDeg'], df['lngDeg'], df['latDeg_prev'], df['lngDeg_prev'])
    df['dist_next'] = calc_haversine(df['latDeg'], df['lngDeg'], df['latDeg_next'], df['lngDeg_next'])
    
    df.loc[df['phone']!=df['phone_prev'], ['latDeg_prev', 'lngDeg_prev', 'dist_prev']] = np.nan
    df.loc[df['phone']!=df['phone_next'], ['latDeg_next', 'lngDeg_next', 'dist_next']] = np.nan
    
    return df

base_test = add_distance_diff(base_test)

In [18]:
viz(base_test,
    ori_test,
    df_cols=['dist_prev'],
    gnss_cols=['yawDeg', 'rollDeg', 'pitchDeg'],
    output_dir= OUTPUT + '/gnss_ts/test/ori')

In [21]:
ori_train['millisSinceGpsEpoch'] = ori_train['utcTimeMillis'] - 315964800000 + 18000

In [25]:
def add_sensor_features(df, ori):
    # phoneを追加
    df['phone'] = df['collectionName'] + '_' + df['phoneName']
    ori['phone'] = ori['collectionName'] + '_' + ori['phoneName']
    
    # utc -> gps
    ori['millisSinceGpsEpoch'] = ori['utcTimeMillis'] - 315964800000 + 18000
    
    # resampling追加
    df['secondSinceGpsEpoch'] = df['millisSinceGpsEpoch'] // 1000
    ori['secondSinceGpsEpoch'] = ori['millisSinceGpsEpoch'] // 1000
    
    ori = ori.groupby(['phone', 'secondSinceGpsEpoch'])['yawDeg', 'rollDeg', 'pitchDeg'].agg(['mean', 'median', 'max', 'min', 'std']).reset_index()
    ori.columns = ['phone', 'secondSinceGpsEpoch', 
                   'yawDeg_mean', 'yawDeg_median', 'yawDeg_max', 'yawDeg_min', 'yawDeg_std',
                   'rollDeg_mean', 'rollDeg_median', 'rollDeg_max', 'rollDeg_min', 'rollDeg_std',
                   'pitchDeg_mean', 'pitchDeg_median', 'pitchDeg_max', 'pitchDeg_min', 'pitchDeg_std']
    
    df = df.merge(ori, on=['phone', 'secondSinceGpsEpoch'], how='left')
    
    df.drop(['secondSinceGpsEpoch'], axis=1, inplace=True)
    
    return df

In [26]:
base_train = add_sensor_features(base_train, ori_train)

  del sys.path[0]


In [29]:
base_train[['speedMps', 'courseDegree']] = ground_truth[['speedMps', 'courseDegree']]

In [40]:
def viz2(df, cols, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    phones = df['phone'].unique()
    cols_n = len(cols)
    
    for phone in phones:
        df_tmp = df[df['phone']==phone].copy()
        
        fig, axes = plt.subplots(figsize=(5*cols_n, 20), nrows=cols_n,sharex=True)
        for i,c in enumerate(cols):
            axes[i].plot(df_tmp['millisSinceGpsEpoch'], df_tmp[c], label=c)
            axes[i].legend(loc='upper right')
            axes[i].grid(color='g', linestyle=':', linewidth=0.3)
        fig.suptitle(phone, fontsize=16)
        fig.savefig(output_dir + '/' + phone + '.png')
        plt.close()

In [38]:
viz2(base_train,
    cols=['speedMps', 'courseDegree', 'yawDeg_mean', 'yawDeg_median', 'yawDeg_max', 'yawDeg_min', 'yawDeg_std'],
    output_dir= OUTPUT + '/gnss_ts/gt/ori_f_yaw')

In [41]:
viz2(base_train,
    cols=['speedMps', 'courseDegree', 'rollDeg_mean', 'rollDeg_median', 'rollDeg_max', 'rollDeg_min', 'rollDeg_std'],
    output_dir= OUTPUT + '/gnss_ts/gt/ori_f_roll')

In [42]:
viz2(base_train,
    cols=['speedMps', 'courseDegree', 'pitchDeg_mean', 'pitchDeg_median', 'pitchDeg_max', 'pitchDeg_min', 'pitchDeg_std'],
    output_dir= OUTPUT + '/gnss_ts/gt/ori_f_yaw')

In [45]:
ori_train[['phone', 'yawDeg']].drop_duplicates()['phone'].value_counts()

2021-04-22-US-SJC-1_SamsungS20Ultra    360
2021-04-15-US-MTV-1_Pixel5             360
2021-04-28-US-SJC-1_Pixel4             360
2021-03-10-US-SVL-1_Pixel4XL           360
2021-04-29-US-MTV-1_Pixel5             360
2021-04-22-US-SJC-1_Pixel4             360
2021-04-28-US-MTV-1_Pixel5             360
2021-03-10-US-SVL-1_SamsungS20Ultra    360
2021-04-26-US-SVL-1_Pixel5             354
2021-04-15-US-MTV-1_Pixel4Modded       351
2021-04-28-US-MTV-1_Pixel4             345
2021-04-15-US-MTV-1_SamsungS20Ultra    333
2021-04-15-US-MTV-1_Pixel4             332
2021-04-26-US-SVL-1_Mi8                328
2021-04-29-US-SJC-2_Pixel4             257
2021-04-29-US-MTV-1_Pixel4             242
2021-04-29-US-MTV-1_SamsungS20Ultra      1
2021-04-28-US-MTV-1_SamsungS20Ultra      1
2021-04-28-US-SJC-1_SamsungS20Ultra      1
2021-04-29-US-SJC-2_SamsungS20Ultra      1
Name: phone, dtype: int64

In [46]:
ori_test[['phone', 'yawDeg']].drop_duplicates()['phone'].value_counts()

2021-03-25-US-PAO-1_Pixel5             360
2021-03-25-US-PAO-1_SamsungS20Ultra    360
2021-03-16-US-RWC-2_Pixel4XL           360
2021-04-26-US-SVL-2_SamsungS20Ultra    360
2021-03-25-US-PAO-1_Pixel4             360
2021-04-29-US-MTV-2_Pixel5             360
2021-04-21-US-MTV-1_Pixel4             360
2021-04-02-US-SJC-1_Pixel5             360
2021-04-08-US-MTV-1_Pixel5             360
2021-03-16-US-MTV-2_SamsungS20Ultra    360
2021-04-02-US-SJC-1_Pixel4             360
2021-04-21-US-MTV-1_Pixel4Modded       360
2021-04-22-US-SJC-2_SamsungS20Ultra    360
2021-04-08-US-MTV-1_Pixel4             354
2021-03-16-US-RWC-2_Pixel5             352
2021-03-16-US-RWC-2_SamsungS20Ultra    348
2021-03-25-US-PAO-1_Mi8                340
2021-03-16-US-MTV-2_Pixel4Modded       334
2021-04-28-US-MTV-2_Pixel4             301
2021-04-29-US-SJC-3_Pixel4             291
2021-04-08-US-MTV-1_SamsungS20Ultra    285
2021-04-29-US-MTV-2_Pixel4             246
2021-03-25-US-PAO-1_Pixel4Modded       224
2021-04-08-