## Input data

In [None]:
!ls '/kaggle/input/google-smartphone-decimeter-challenge'

### Contents of a single phone directory

In [None]:
!ls /kaggle/input/google-smartphone-decimeter-challenge/train/2020-05-14-US-MTV-1/Pixel4/

## Reader class for easy data reading

In [None]:
import os
from os.path import join
import pandas as pd

class DataReader:
    def __init__(self):
        self.input_path='/kaggle/input/google-smartphone-decimeter-challenge'
        self.train_df = self.read_train_csv()
        
    def read_train_csv(self):
        return pd.read_csv(join(self.input_path, 'baseline_locations_train.csv'))
        
    def read_supplemental_gnss_logs(self, phone_path):
        """ Read supplemental gnss logs from these file formats .20o/.21o/.nmea"""
        # TODO convert it to CSV using this script
        # https://stackoverflow.com/questions/65394166/how-to-read-an-nmea-file-with-python
        curr_path = join(self.input_path, phone_path, 'supplemental')
        for dirname, _, filenames in os.walk(curr_path):
            for filename in filenames:
#                 print(os.path.join(dirname, filename))
                with open(join(curr_path, filename)) as f:
                    file_content = f.read()
                    print(file_content)
                    
        
    def read_gnss_logs(self, path, phone_name):
        with open(join(path, phone_name + '_GnssLog.txt')) as f:
            file_content = f.read()
            return file_content
    
    def read_one_phone_data(self, phone_path):
        curr_path = join(self.input_path, phone_path)
        phone_name = curr_path.split('/')[-1]
        ground_truth_df = pd.read_csv(join(curr_path, 'ground_truth.csv'))
        derived_df = pd.read_csv(join(curr_path, phone_name + '_derived.csv'))
        gnss_logs = self.read_gnss_logs(curr_path, phone_name)
        # supp_logs = self.read_supplemental_gnss_logs(curr_path)
        
        return ground_truth_df, derived_df, gnss_logs
    
    def read_all_phone_data(self, phone_path):
        pass
#         curr_path = join(self.input_path, phone_path)
#         ground_truth = pd.read_csv(join(curr_path, 'ground_truth.csv'))
#         return ground_truth

    def create_submission_file(self):
        pass
    

In [None]:
data = DataReader()

In [None]:
print("Shape of train_df :", data.train_df.shape)
data.train_df.head(3)

In [None]:
data.train_df.describe()

In [None]:
import pandas_profiling as pp
pp.ProfileReport(data.train_df)

In [None]:
ground_truth_df, derived_df, gnss_logs = data.read_one_phone_data('train/2020-05-14-US-MTV-1/Pixel4')
ground_truth_df.head(2)

In [None]:
ground_truth_df.describe()

In [None]:
derived_df.describe()

In [None]:
!ls /kaggle/input/google-smartphone-decimeter-challenge/test/2020-05-15-US-MTV-1/Pixel4

In [None]:
derived_test = pd.read_csv('/kaggle/input/google-smartphone-decimeter-challenge/test/2020-05-15-US-MTV-1/Pixel4/Pixel4_derived.csv')

In [None]:
derived_test.columns

In [None]:
# correctedPrM = rawPrM + satClkBiasM - isrbM - ionoDelayM - tropoDelayM.

## Plot ground truth and Approximated ground truth together

In [None]:
output_df = pd.merge(data.train_df, ground_truth_df, on=['collectionName', 'phoneName', 'millisSinceGpsEpoch'])

In [None]:
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt

plt.figure(figsize=(10,10))
delta = 0
m = Basemap(projection = 'merc', llcrnrlat=output_df.latDeg_x.min() - delta,\
    urcrnrlat=output_df.latDeg_x.max() + delta, llcrnrlon=output_df.lngDeg_x.min() - delta,\
    urcrnrlon=output_df.lngDeg_x.max() + delta,lat_ts=40,resolution='l')

lat = output_df.latDeg_x.tolist()
lon = output_df.lngDeg_x.tolist()

x, y = m(lon, lat)
m.plot(x, y, 'o-', markersize=1, linewidth=1) 
lat2 = [x+0.005 for x in output_df.latDeg_y]
lon2 = [x+0.005 for x in output_df.lngDeg_y]
x2, y2 = m(lon2, lat2)
m.plot(x2, y2, 'o-', markersize=1, linewidth=1) 

m.drawcoastlines()
m.fillcontinents(color='yellow')
m.drawmapboundary(fill_color='white')
m.drawstates(color='black')
m.drawcountries(color='black')
plt.title("Route of a single phone")
plt.show() 

# Model

In [None]:
# TODO build a model

# Submission

In [None]:
submission_file = pd.read_csv(join(data.input_path, 'sample_submission.csv'))
submission_file.to_csv('submission.csv', index= False)