# Reference
1. https://www.kaggle.com/rohanrao/tutorial-on-reading-large-datasets
1. https://www.kaggle.com/asobod11138/gsdc-neuralnet-keras (multi-threading)

# Import Libraries

In [1]:
import numpy as np
import pandas as pd
from glob import glob
import os
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from pathlib import Path
import plotly.express as px
from multiprocessing import Pool
import multiprocessing as multi
import gc

from utils.gsdc_parser import ground_truth_file_open, derived_file_open, gnsslog_file_open, gnss_log_to_dataframes

split_charater = "\\"

# Help Function

In [2]:
def UTC2GpsEpoch(df):
    '''UTC to GpsEpoch
    
    utcTimeMillis         : UTC epoch (1970/1/1)
    millisSinceGpsEpoch   : GPS epoch(1980/1/6 midnight 12:00 UTC)
    
    Ref: https://www.kaggle.com/c/google-smartphone-decimeter-challenge/discussion/239187
    '''
    dt_offset = pd.to_datetime('1980-01-06 00:00:00') 
    dt_offset_in_ms = int(dt_offset.value / 1e6)
    df['millisSinceGpsEpoch'] = df['utcTimeMillis'] - dt_offset_in_ms + 18000
    df['millisSinceGpsEpoch'] = df['millisSinceGpsEpoch'].astype('int64')
    return df

# Set Path and Load Dataset

In [3]:
PATH = Path("../input/google-smartphone-decimeter-challenge")
train_df = pd.read_csv(PATH / "baseline_locations_train.csv")
test_df = pd.read_csv(PATH / "baseline_locations_test.csv")

In [4]:
print(train_df.shape)
train_df.head()

(131342, 7)


Unnamed: 0,collectionName,phoneName,millisSinceGpsEpoch,latDeg,lngDeg,heightAboveWgs84EllipsoidM,phone
0,2020-05-14-US-MTV-1,Pixel4,1273529463442,37.423575,-122.094091,-34.06,2020-05-14-US-MTV-1_Pixel4
1,2020-05-14-US-MTV-1,Pixel4,1273529464442,37.423578,-122.094101,-33.29,2020-05-14-US-MTV-1_Pixel4
2,2020-05-14-US-MTV-1,Pixel4,1273529465442,37.423573,-122.094111,-30.99,2020-05-14-US-MTV-1_Pixel4
3,2020-05-14-US-MTV-1,Pixel4,1273529466442,37.423583,-122.094121,-32.83,2020-05-14-US-MTV-1_Pixel4
4,2020-05-14-US-MTV-1,Pixel4,1273529467442,37.423579,-122.094114,-34.49,2020-05-14-US-MTV-1_Pixel4


In [5]:
print(test_df.shape)
test_df.head()

(91486, 7)


Unnamed: 0,collectionName,phoneName,millisSinceGpsEpoch,latDeg,lngDeg,heightAboveWgs84EllipsoidM,phone
0,2020-05-15-US-MTV-1,Pixel4,1273608785432,37.416628,-122.082053,-30.69,2020-05-15-US-MTV-1_Pixel4
1,2020-05-15-US-MTV-1,Pixel4,1273608786432,37.416646,-122.08204,-31.76,2020-05-15-US-MTV-1_Pixel4
2,2020-05-15-US-MTV-1,Pixel4,1273608787432,37.416653,-122.082039,-31.65,2020-05-15-US-MTV-1_Pixel4
3,2020-05-15-US-MTV-1,Pixel4,1273608788432,37.416607,-122.082063,-31.52,2020-05-15-US-MTV-1_Pixel4
4,2020-05-15-US-MTV-1,Pixel4,1273608789432,37.416609,-122.082073,-28.95,2020-05-15-US-MTV-1_Pixel4


# Load All Data Function

In [6]:
gnss_section_names = {'Raw','UncalAccel', 'UncalGyro', 'UncalMag', 'Fix', 'Status', 'OrientationDeg'}
section_names = {'GroundTruth', 'Derived', 'Raw','UncalAccel', 'UncalGyro', 'UncalMag', 'Fix', 'Status', 'OrientationDeg'}
_columns = ['latDeg', 'lngDeg', 'heightAboveWgs84EllipsoidM']

In [7]:
thread_num = 4

In [8]:
def merge_data(df_, truth, derived, gnss):
    df = df_.copy()
    
    print('Truth Data Start')
    if truth is not None:
        df = pd.merge_asof(df.sort_values('millisSinceGpsEpoch'), 
              truth.sort_values('millisSinceGpsEpoch'), 
              on="millisSinceGpsEpoch", by=["collectionName", "phoneName"], 
              direction='nearest',tolerance=100000)
    print('Truth Data Fin')

    print('Derived Data Start')
    df = pd.merge_asof(df.sort_values('millisSinceGpsEpoch'), 
          derived.sort_values('millisSinceGpsEpoch'), 
          on="millisSinceGpsEpoch", by=["collectionName", "phoneName"], 
          direction='nearest',tolerance=100000)
    print('Derived Data Fin')


    print('Gnss Log Data Start')
    for key, value in tqdm(gnss.items()):
        if value.shape[0] == 0:
            continue
        df = pd.merge_asof(df.sort_values('millisSinceGpsEpoch'), 
              value.sort_values('millisSinceGpsEpoch'), 
              on="millisSinceGpsEpoch", by=["collectionName", "phoneName"], 
              direction='nearest',tolerance=100000, suffixes = ['', "_" + key])
    print('Gnss Log Data Fin')
        
    return df

## Train
### For Multi Processing Code

In [9]:
path_list = list(glob(str(PATH / "train" / "*/*/*")))

get_ground_truth = lambda x:x.split(split_charater)[-1].find("ground_truth")>= 0
get_derived = lambda x:x.split(split_charater)[-1].find("derived.csv")>= 0
get_gnsslog = lambda x:x.split(split_charater)[-1].find("GnssLog.txt")>= 0

ground_truth_path_list = list(filter(get_ground_truth, path_list))
derived_path_list = list(filter(get_derived, path_list))
gnsslog_path_list = list(filter(get_gnsslog, path_list))

In [10]:
output = dict()
for section in section_names:
    output[section] = pd.DataFrame()

In [11]:
with Pool(thread_num) as pool:
    imap = pool.imap(gnsslog_file_open, gnsslog_path_list)
    gnss_logs = list(tqdm(imap, total=len(gnsslog_path_list), desc="load gnss log"))
    
gnss_dict = {}
for key in gnss_section_names:
    list_temp = []
    for gnss_log in gnss_logs:
        list_temp.append(gnss_log[key])
    df_temp = pd.concat(list_temp)
    gnss_dict[key] = df_temp
    if (key == "Status") or (key == "Fix"):  
        gnss_dict[key].rename(columns = {'UnixTimeMillis':'utcTimeMillis'}, inplace = True)
    
    gnss_dict[key] = UTC2GpsEpoch(gnss_dict[key])
    print(gnss_dict[key]['utcTimeMillis'].dtype)
    print(gnss_dict[key].shape)
    print(gnss_dict[key].head())

del gnss_logs

load gnss log:   0%|          | 0/73 [00:00<?, ?it/s]

float64
(2825564, 8)
        collectionName phoneName  utcTimeMillis  elapsedRealtimeNanos  yawDeg  \
0  2021-03-10-US-SVL-1  Pixel4XL   1.615418e+12          1.035191e+13    61.0   
1  2021-03-10-US-SVL-1  Pixel4XL   1.615418e+12          1.035193e+13    61.0   
2  2021-03-10-US-SVL-1  Pixel4XL   1.615418e+12          1.035195e+13    61.0   
3  2021-03-10-US-SVL-1  Pixel4XL   1.615418e+12          1.035196e+13    61.0   
4  2021-03-10-US-SVL-1  Pixel4XL   1.615418e+12          1.035198e+13    61.0   

   rollDeg  pitchDeg  millisSinceGpsEpoch  
0   -165.0     -83.0        1299453166737  
1   -165.0     -83.0        1299453166753  
2   -165.0     -83.0        1299453166769  
3   -165.0     -83.0        1299453166785  
4   -165.0     -83.0        1299453166801  
int64
(4285508, 16)
   utcTimeMillis  SignalCount  SignalIndex  ConstellationType  Svid  \
0  1589494244403           37            0                  1     2   
1  1589494244403           37            1                  1     

int64
(0, 14)
Empty DataFrame
Columns: [Provider, LatitudeDegrees, LongitudeDegrees, AltitudeMeters, SpeedMps, AccuracyMeters, BearingDegrees, utcTimeMillis, SpeedAccuracyMps, BearingAccuracyDegrees, collectionName, phoneName, elapsedRealtimeNanos, millisSinceGpsEpoch]
Index: []


In [12]:
with Pool(thread_num) as pool:
    imap = pool.imap(ground_truth_file_open, ground_truth_path_list)
    ground_truth = list(tqdm(imap, total=len(ground_truth_path_list), desc="load ground truth"))
    
df_truth = pd.concat(ground_truth)
print(df_truth.shape)
df_truth.head()

del ground_truth

load ground truth:   0%|          | 0/73 [00:00<?, ?it/s]

(131342, 11)


In [13]:
with Pool(thread_num) as pool:
    imap = pool.imap(derived_file_open, derived_path_list)
    derived = list(tqdm(imap, total=len(derived_path_list), desc="load derived data"))
    
df_derived = pd.concat(derived)
print(df_derived.shape)
df_derived.head()

del derived

load derived data:   0%|          | 0/73 [00:00<?, ?it/s]

(3834542, 20)


### Tiny Data Issue Solve

In [14]:
raw_timestamps = gnss_dict['Raw']['millisSinceGpsEpoch'].unique()
derived_timestamps = df_derived['millisSinceGpsEpoch'].unique()

# The timestamps in derived are one epoch ahead. We need to map each epoch
# in derived to the prior one (in Raw).
indexes = np.searchsorted(raw_timestamps, derived_timestamps)
from_t_to_fix_derived = dict(zip(derived_timestamps, raw_timestamps[indexes-1]))
df_derived['millisSinceGpsEpoch'] = np.array(list(map(lambda v: from_t_to_fix_derived[v], df_derived['millisSinceGpsEpoch'])))

In [15]:
delta_millis = df_derived['millisSinceGpsEpoch'] - df_derived['receivedSvTimeInGpsNanos']/1e6
good_signals = (delta_millis > 0) & (delta_millis < 300)
df_derived = df_derived[good_signals].copy()

### Save To Pickle File

In [16]:
output = merge_data(train_df, df_truth, df_derived, gnss_dict)
output.to_pickle(str(PATH / "gsdc_train.pkl.gzip"))
print(output.shape)
output.head()

Truth Data Start
Truth Data Fin
Derived Data Start
Derived Data Fin
Gnss Log Data Start


  0%|          | 0/7 [00:00<?, ?it/s]

Gnss Log Data Fin
(131342, 110)


Unnamed: 0,collectionName,phoneName,millisSinceGpsEpoch,latDeg,lngDeg,heightAboveWgs84EllipsoidM,phone,timeSinceFirstFixSeconds,hDop,vDop,...,SnrInDb,ConstellationType_Raw,AgcDb,BasebandCn0DbHz_Raw,FullInterSignalBiasNanos,FullInterSignalBiasUncertaintyNanos,SatelliteInterSignalBiasNanos,SatelliteInterSignalBiasUncertaintyNanos,CodeType,ChipsetElapsedRealtimeNanos
0,2020-05-14-US-MTV-1,Pixel4,1273529463442,37.423575,-122.094091,-34.06,2020-05-14-US-MTV-1_Pixel4,551.44,1.1,0.0,...,,1,-0.61,,,,,,,
1,2020-05-14-US-MTV-1,Pixel4,1273529464442,37.423578,-122.094101,-33.29,2020-05-14-US-MTV-1_Pixel4,552.44,1.1,0.0,...,,1,-0.12,,,,,,,
2,2020-05-14-US-MTV-1,Pixel4,1273529465442,37.423573,-122.094111,-30.99,2020-05-14-US-MTV-1_Pixel4,553.44,1.1,0.0,...,,1,0.02,,,,,,,
3,2020-05-14-US-MTV-1,Pixel4,1273529466442,37.423583,-122.094121,-32.83,2020-05-14-US-MTV-1_Pixel4,554.44,1.1,0.0,...,,1,0.07,,,,,,,
4,2020-05-14-US-MTV-1,Pixel4XLModded,1273529466449,37.423574,-122.094137,-33.2,2020-05-14-US-MTV-1_Pixel4XLModded,554.45,1.2,0.0,...,,1,0.99,,,,,,,


In [17]:
output.info(verbose = True, memory_usage= True, null_counts=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 131342 entries, 0 to 131341
Data columns (total 110 columns):
 #   Column                                     Non-Null Count   Dtype  
---  ------                                     --------------   -----  
 0   collectionName                             131342 non-null  object 
 1   phoneName                                  131342 non-null  object 
 2   millisSinceGpsEpoch                        131342 non-null  int64  
 3   latDeg                                     131342 non-null  float64
 4   lngDeg                                     131342 non-null  float64
 5   heightAboveWgs84EllipsoidM                 131342 non-null  float64
 6   phone                                      131342 non-null  object 
 7   timeSinceFirstFixSeconds                   131342 non-null  float64
 8   hDop                                       131342 non-null  float64
 9   vDop                                       131342 non-null  float64
 10  speedMp

In [19]:
del df_truth, df_derived, gnss_dict, output
gc.collect()

NameError: name 'df_truth' is not defined

## Test
### For Multi Processing Code

In [21]:
path_list = list(glob(str(PATH / "test" / "*/*/*")))

get_ground_truth = lambda x:x.split(split_charater)[-1].find("ground_truth")>= 0
get_derived = lambda x:x.split(split_charater)[-1].find("derived.csv")>= 0
get_gnsslog = lambda x:x.split(split_charater)[-1].find("GnssLog.txt")>= 0

ground_truth_path_list = list(filter(get_ground_truth, path_list))
derived_path_list = list(filter(get_derived, path_list))
gnsslog_path_list = list(filter(get_gnsslog, path_list))

In [22]:
output = dict()
for section in section_names:
    output[section] = pd.DataFrame()

In [23]:
with Pool(thread_num) as pool:
    imap = pool.imap(gnsslog_file_open, gnsslog_path_list)
    gnss_logs = list(tqdm(imap, total=len(gnsslog_path_list), desc="load gnss log"))
    
gnss_dict = {}
for key in gnss_section_names:
    list_temp = []
    for gnss_log in gnss_logs:
        list_temp.append(gnss_log[key])
    df_temp = pd.concat(list_temp)
    gnss_dict[key] = df_temp
    if (key == "Status") or (key == "Fix"):  
        gnss_dict[key].rename(columns = {'UnixTimeMillis':'utcTimeMillis'}, inplace = True)
    
    gnss_dict[key] = UTC2GpsEpoch(gnss_dict[key])
    print(gnss_dict[key]['utcTimeMillis'].dtype)
    print(gnss_dict[key].shape)
    print(gnss_dict[key].head())

del gnss_logs

load gnss log:   0%|          | 0/48 [00:00<?, ?it/s]

float64
(3583787, 8)
        collectionName     phoneName  utcTimeMillis  elapsedRealtimeNanos  \
0  2021-03-16-US-MTV-2  Pixel4Modded   1.615921e+12          7.160950e+12   
1  2021-03-16-US-MTV-2  Pixel4Modded   1.615921e+12          7.160966e+12   
2  2021-03-16-US-MTV-2  Pixel4Modded   1.615921e+12          7.160982e+12   
3  2021-03-16-US-MTV-2  Pixel4Modded   1.615921e+12          7.160998e+12   
4  2021-03-16-US-MTV-2  Pixel4Modded   1.615921e+12          7.161014e+12   

   yawDeg  rollDeg  pitchDeg  millisSinceGpsEpoch  
0    55.0    137.0     -82.0        1299956516428  
1    57.0    139.0     -82.0        1299956516444  
2    58.0    139.0     -82.0        1299956516460  
3    57.0    139.0     -82.0        1299956516476  
4    56.0    138.0     -82.0        1299956516492  
int64
(2936055, 16)
   utcTimeMillis  SignalCount  SignalIndex  ConstellationType  Svid  \
0              0           33            0                  1     2   
1              0           33            1

In [26]:
with Pool(thread_num) as pool:
    imap = pool.imap(derived_file_open, derived_path_list)
    derived = list(tqdm(imap, total=len(derived_path_list), desc="load derived data"))
    
df_derived = pd.concat(derived)
print(df_derived.shape)
df_derived.head()

del derived

load derived data:   0%|          | 0/48 [00:00<?, ?it/s]

(2523199, 20)


### Tiny Data Issue Solve

In [27]:
raw_timestamps = gnss_dict['Raw']['millisSinceGpsEpoch'].unique()
derived_timestamps = df_derived['millisSinceGpsEpoch'].unique()

# The timestamps in derived are one epoch ahead. We need to map each epoch
# in derived to the prior one (in Raw).
indexes = np.searchsorted(raw_timestamps, derived_timestamps)
from_t_to_fix_derived = dict(zip(derived_timestamps, raw_timestamps[indexes-1]))
df_derived['millisSinceGpsEpoch'] = np.array(list(map(lambda v: from_t_to_fix_derived[v], df_derived['millisSinceGpsEpoch'])))

In [28]:
delta_millis = df_derived['millisSinceGpsEpoch'] - df_derived['receivedSvTimeInGpsNanos']/1e6
good_signals = (delta_millis > 0) & (delta_millis < 300)
df_derived = df_derived[good_signals].copy()

### Save To Pickle File

In [29]:
output = merge_data(test_df, None, df_derived, gnss_dict)
output.to_pickle(str(PATH / "gsdc_test.pkl.gzip"))
print(output.shape)
output.head()

Truth Data Start
Truth Data Fin
Derived Data Start
Derived Data Fin
Gnss Log Data Start


  0%|          | 0/7 [00:00<?, ?it/s]

Gnss Log Data Fin
(91486, 102)


Unnamed: 0,collectionName,phoneName,millisSinceGpsEpoch,latDeg,lngDeg,heightAboveWgs84EllipsoidM,phone,constellationType,svid,signalType,...,SnrInDb,ConstellationType_Raw,AgcDb,BasebandCn0DbHz_Raw,FullInterSignalBiasNanos,FullInterSignalBiasUncertaintyNanos,SatelliteInterSignalBiasNanos,SatelliteInterSignalBiasUncertaintyNanos,CodeType,ChipsetElapsedRealtimeNanos
0,2020-05-15-US-MTV-1,Pixel4XL,1273608752446,37.416623,-122.082055,-24.21,2020-05-15-US-MTV-1_Pixel4XL,3.0,21.0,GLO_G1,...,,1,1.88,,,,,,,
1,2020-05-15-US-MTV-1,Pixel4XL,1273608753446,37.41659,-122.082073,-29.97,2020-05-15-US-MTV-1_Pixel4XL,3.0,21.0,GLO_G1,...,,1,2.33,,,,,,,
2,2020-05-15-US-MTV-1,Pixel4XL,1273608754446,37.416593,-122.082084,-29.85,2020-05-15-US-MTV-1_Pixel4XL,3.0,21.0,GLO_G1,...,,1,1.99,,,,,,,
3,2020-05-15-US-MTV-1,Pixel4XL,1273608755446,37.41658,-122.082067,-28.53,2020-05-15-US-MTV-1_Pixel4XL,3.0,21.0,GLO_G1,...,,6,1.74,,,,,,,
4,2020-05-15-US-MTV-1,Pixel4XL,1273608756446,37.416594,-122.082065,-27.7,2020-05-15-US-MTV-1_Pixel4XL,3.0,21.0,GLO_G1,...,,1,2.08,,,,,,,


In [30]:
output.info(verbose = True, memory_usage= True, null_counts=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 91486 entries, 0 to 91485
Data columns (total 102 columns):
 #   Column                                     Non-Null Count  Dtype  
---  ------                                     --------------  -----  
 0   collectionName                             91486 non-null  object 
 1   phoneName                                  91486 non-null  object 
 2   millisSinceGpsEpoch                        91486 non-null  int64  
 3   latDeg                                     91486 non-null  float64
 4   lngDeg                                     91486 non-null  float64
 5   heightAboveWgs84EllipsoidM                 91486 non-null  float64
 6   phone                                      91486 non-null  object 
 7   constellationType                          55504 non-null  float64
 8   svid                                       55504 non-null  float64
 9   signalType                                 55504 non-null  object 
 10  receivedSvTimeInGpsNa

In [32]:
del df_derived, gnss_dict, output

In [33]:
%clear




# Load Pickle File

In [34]:
import numpy as np
import pandas as pd
from glob import glob
import os
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from pathlib import Path
import plotly.express as px

In [35]:
PATH = Path("../input/google-smartphone-decimeter-challenge")

In [36]:
df_train = pd.read_pickle(str(PATH / "gsdc_train.pkl.gzip"))

In [37]:
print(df_train.shape)
df_train.head()

(131342, 110)


Unnamed: 0,collectionName,phoneName,millisSinceGpsEpoch,latDeg,lngDeg,heightAboveWgs84EllipsoidM,phone,timeSinceFirstFixSeconds,hDop,vDop,...,SnrInDb,ConstellationType_Raw,AgcDb,BasebandCn0DbHz_Raw,FullInterSignalBiasNanos,FullInterSignalBiasUncertaintyNanos,SatelliteInterSignalBiasNanos,SatelliteInterSignalBiasUncertaintyNanos,CodeType,ChipsetElapsedRealtimeNanos
0,2020-05-14-US-MTV-1,Pixel4,1273529463442,37.423575,-122.094091,-34.06,2020-05-14-US-MTV-1_Pixel4,551.44,1.1,0.0,...,,1,-0.61,,,,,,,
1,2020-05-14-US-MTV-1,Pixel4,1273529464442,37.423578,-122.094101,-33.29,2020-05-14-US-MTV-1_Pixel4,552.44,1.1,0.0,...,,1,-0.12,,,,,,,
2,2020-05-14-US-MTV-1,Pixel4,1273529465442,37.423573,-122.094111,-30.99,2020-05-14-US-MTV-1_Pixel4,553.44,1.1,0.0,...,,1,0.02,,,,,,,
3,2020-05-14-US-MTV-1,Pixel4,1273529466442,37.423583,-122.094121,-32.83,2020-05-14-US-MTV-1_Pixel4,554.44,1.1,0.0,...,,1,0.07,,,,,,,
4,2020-05-14-US-MTV-1,Pixel4XLModded,1273529466449,37.423574,-122.094137,-33.2,2020-05-14-US-MTV-1_Pixel4XLModded,554.45,1.2,0.0,...,,1,0.99,,,,,,,


In [38]:
df_train.info(verbose = True, memory_usage= True, null_counts=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 131342 entries, 0 to 131341
Data columns (total 110 columns):
 #   Column                                     Non-Null Count   Dtype  
---  ------                                     --------------   -----  
 0   collectionName                             131342 non-null  object 
 1   phoneName                                  131342 non-null  object 
 2   millisSinceGpsEpoch                        131342 non-null  int64  
 3   latDeg                                     131342 non-null  float64
 4   lngDeg                                     131342 non-null  float64
 5   heightAboveWgs84EllipsoidM                 131342 non-null  float64
 6   phone                                      131342 non-null  object 
 7   timeSinceFirstFixSeconds                   131342 non-null  float64
 8   hDop                                       131342 non-null  float64
 9   vDop                                       131342 non-null  float64
 10  speedMp

In [39]:
df_test = pd.read_pickle(str(PATH / "gsdc_test.pkl.gzip"))

In [40]:
print(df_test.shape)
df_test.head()

(91486, 102)


Unnamed: 0,collectionName,phoneName,millisSinceGpsEpoch,latDeg,lngDeg,heightAboveWgs84EllipsoidM,phone,constellationType,svid,signalType,...,SnrInDb,ConstellationType_Raw,AgcDb,BasebandCn0DbHz_Raw,FullInterSignalBiasNanos,FullInterSignalBiasUncertaintyNanos,SatelliteInterSignalBiasNanos,SatelliteInterSignalBiasUncertaintyNanos,CodeType,ChipsetElapsedRealtimeNanos
0,2020-05-15-US-MTV-1,Pixel4XL,1273608752446,37.416623,-122.082055,-24.21,2020-05-15-US-MTV-1_Pixel4XL,3.0,21.0,GLO_G1,...,,1,1.88,,,,,,,
1,2020-05-15-US-MTV-1,Pixel4XL,1273608753446,37.41659,-122.082073,-29.97,2020-05-15-US-MTV-1_Pixel4XL,3.0,21.0,GLO_G1,...,,1,2.33,,,,,,,
2,2020-05-15-US-MTV-1,Pixel4XL,1273608754446,37.416593,-122.082084,-29.85,2020-05-15-US-MTV-1_Pixel4XL,3.0,21.0,GLO_G1,...,,1,1.99,,,,,,,
3,2020-05-15-US-MTV-1,Pixel4XL,1273608755446,37.41658,-122.082067,-28.53,2020-05-15-US-MTV-1_Pixel4XL,3.0,21.0,GLO_G1,...,,6,1.74,,,,,,,
4,2020-05-15-US-MTV-1,Pixel4XL,1273608756446,37.416594,-122.082065,-27.7,2020-05-15-US-MTV-1_Pixel4XL,3.0,21.0,GLO_G1,...,,1,2.08,,,,,,,


In [41]:
df_test.info(verbose = True, memory_usage= True, null_counts=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 91486 entries, 0 to 91485
Data columns (total 102 columns):
 #   Column                                     Non-Null Count  Dtype  
---  ------                                     --------------  -----  
 0   collectionName                             91486 non-null  object 
 1   phoneName                                  91486 non-null  object 
 2   millisSinceGpsEpoch                        91486 non-null  int64  
 3   latDeg                                     91486 non-null  float64
 4   lngDeg                                     91486 non-null  float64
 5   heightAboveWgs84EllipsoidM                 91486 non-null  float64
 6   phone                                      91486 non-null  object 
 7   constellationType                          55504 non-null  float64
 8   svid                                       55504 non-null  float64
 9   signalType                                 55504 non-null  object 
 10  receivedSvTimeInGpsNa

In [42]:
submission = pd.read_csv("../input/google-smartphone-decimeter-challenge/sample_submission.csv")
submission.shape

(91486, 4)