# GNSS logs

In [8]:
import os
import pandas as pd
import pathlib
from tqdm import tqdm
import numpy as np

In [9]:
# directory setting
INPUT = '../input/google-smartphone-decimeter-challenge'
OUTPUT = '../input/google-smartphone-decimeter-challenge/prep'

In [14]:
def gnss_log_to_dataframes(path):
    #print('Loading ' + path, flush=True)
    gnss_section_names = {'Raw','UncalAccel', 'UncalGyro', 'UncalMag', 'Fix', 'Status', 'OrientationDeg'}
    with open(path) as f_open:
        datalines = f_open.readlines()

    datas = {k: [] for k in gnss_section_names}
    gnss_map = {k: [] for k in gnss_section_names}
    for dataline in datalines:
        is_header = dataline.startswith('#')
        dataline = dataline.strip('#').strip().split(',')
        # skip over notes, version numbers, etc
        if is_header and dataline[0] in gnss_section_names:
            gnss_map[dataline[0]] = dataline[1:]
        elif not is_header:
            if dataline[0] not in ['OrientationDe', 'Unca', 'Orientatio']:
                datas[dataline[0]].append(dataline[1:])
            else:
                print('skip : ', file, dataline[0])

    results = dict()
    for k, v in datas.items():
        results[k] = pd.DataFrame(v, columns=gnss_map[k])
    # pandas doesn't properly infer types from these lists by default
    for k, df in results.items():
        for col in df.columns:
            if col == 'CodeType':
                continue
            results[k][col] = pd.to_numeric(results[k][col], errors='coerce')
            
    return results

In [11]:
p = pathlib.Path(INPUT)
gnss_files = list(p.glob('train/*/*/*GnssLog.txt'))

results = {}
cols = ['Raw', 'UncalAccel', 'UncalGyro', 'UncalMag', 'Fix', 'Status', 'OrientationDeg']
for c in cols:
    results[c] = []

for file in gnss_files:
    dfs = gnss_log_to_dataframes(file)
    for c in cols:
        tmp = dfs[c]
        tmp['collectionName'] = file.parts[4]
        tmp['phoneName'] = file.parts[5]
        results[c].append(tmp)


for c in cols:
    pd.concat(results[c]).to_csv(OUTPUT + f'/gnss/train/{c}.csv', index=False)

skip :  ../input/google-smartphone-decimeter-challenge/train/2021-04-29-US-MTV-1/SamsungS20Ultra/SamsungS20Ultra_GnssLog.txt OrientationDe
skip :  ../input/google-smartphone-decimeter-challenge/train/2021-01-04-US-RWC-2/Pixel5/Pixel5_GnssLog.txt Unca


In [16]:
p = pathlib.Path(INPUT)
gnss_files = list(p.glob('test/*/*/*GnssLog.txt'))

results = {}
cols = ['Raw', 'UncalAccel', 'UncalGyro', 'UncalMag', 'Fix', 'Status', 'OrientationDeg']
for c in cols:
    results[c] = []

for file in gnss_files:
    dfs = gnss_log_to_dataframes(file)
    for c in cols:
        tmp = dfs[c]
        tmp['collectionName'] = file.parts[4]
        tmp['phoneName'] = file.parts[5]
        results[c].append(tmp)


for c in cols:
    pd.concat(results[c]).to_csv(OUTPUT + f'/gnss/test/{c}.csv', index=False)

skip :  ../input/google-smartphone-decimeter-challenge/test/2021-04-26-US-SVL-2/SamsungS20Ultra/SamsungS20Ultra_GnssLog.txt Orientatio


In [13]:
file

PosixPath('../input/google-smartphone-decimeter-challenge/test/2021-04-26-US-SVL-2/SamsungS20Ultra/SamsungS20Ultra_GnssLog.txt')