## Libraries

In [None]:
%%writefile GLONASS_FCN_OSN_MAP.json
{
  "test_2020-08-03-US-MTV-2_Mi8": {
    "96": -1,
    "98": 13,
    "103": 19,
    "104": -1,
    "105": 3
  },
  "test_2020-08-13-US-MTV-1_Mi8": {
    "105": 7,
    "106": 4,
    "103": 19
  },
  "test_2021-03-25-US-PAO-1_Mi8": {
    "98": 9,
    "102": 24
  },
  "train_2020-05-21-US-MTV-2_Pixel4": {
    "96": -1
  },
  "train_2020-05-21-US-MTV-2_Pixel4XL": {
    "96": -1
  },
  "train_2020-05-29-US-MTV-1_Pixel4": {
    "103": 23
  },
  "train_2020-07-17-US-MTV-1_Mi8": {
    "106": -1,
    "99": 16,
    "100": 15,
    "103": 23
  },
  "train_2020-07-17-US-MTV-2_Mi8": {
    "106": -1,
    "99": -1,
    "101": 1,
    "103": -1
  },
  "train_2020-08-03-US-MTV-1_Mi8": {
    "98": 9,
    "99": -1,
    "102": -1,
    "103": 19
  },
  "train_2020-08-06-US-MTV-2_Mi8": {
    "105": 7,
    "102": 20,
    "103": -1
  },
  "train_2020-09-04-US-SF-1_Mi8": {
    "105": 3,
    "106": 8,
    "93": 14,
    "103": 23
  },
  "train_2020-09-04-US-SF-2_Mi8": {
    "106": -1,
    "100": 15,
    "101": 1
  },
  "train_2021-01-05-US-SVL-1_Mi8": {
    "104": 21,
    "97": 22
  },
  "train_2021-04-26-US-SVL-1_Mi8": {
    "106": 4
  },
  "train_2021-04-28-US-SJC-1_Pixel4": {
    "93": -1
  }
}

In [None]:
%%writefile constants.py
import json
import datetime
from collections import defaultdict
import numpy as np

with open(f'GLONASS_FCN_OSN_MAP.json') as f:
    GLONASS_FCN_OSN_MAP = json.load(f)

GPS_ORIGIN_DAY       = datetime.date(1980, 1, 6)
GPS_ORIGIN_DATETIME  = datetime.datetime(1980, 1, 6)
GLONASS_LEAP_SECONDS = 18
BEIDOU_LEAP_SECONDS  = 14
TZ_MSK = datetime.timezone(datetime.timedelta(hours=+3), 'MSK')

WGS84_SEMI_MAJOR_AXIS = 6378137.0
WGS84_SEMI_MINOR_AXIS = 6356752.314245
WGS84_SQUARED_FIRST_ECCENTRICITY  = 6.69437999013e-3
WGS84_SQUARED_SECOND_ECCENTRICITY = 6.73949674226e-3
WGS84_FIRST_ECCENTRICITY  = np.sqrt(WGS84_SQUARED_FIRST_ECCENTRICITY)
WGS84_SECOND_ECCENTRICITY = np.sqrt(WGS84_SQUARED_SECOND_ECCENTRICITY)

LIGHT_SPEED = 299792458.0

OMEGA_EARTH = 7.2921151467e-5
MU_EARTH    = 3.986005e+14

FREQ_GPS_L1  = 1.575420e+09
FREQ_GPS_L5  = 1.176450e+09
FREQ_GAL_E1  = FREQ_GPS_L1
FREQ_GAL_E5A = FREQ_GPS_L5
FREQ_QZS_J1  = FREQ_GPS_L1
FREQ_QZS_J5  = FREQ_GPS_L5
FREQ_BDS_B1I = 1.561098e+09
FREQ_GLO_G1_NOMINAL = 1602.00 * 1e+6
FREQ_GLO_G1_DELTA   = 562.5 * 1e+3

CONSTELLATION_TYPE_MAP = {
    'GPS'     : 1,
    'GLONASS' : 3,
    'QZSS'    : 4,
    'BEIDOU'  : 5,
    'GALILEO' : 6,
}

RAW_STATE_BIT_MAP = {
     0: "Code Lock",
     1: "Bit Sync",
     2: "Subframe Sync",
     3: "Time Of Week Decoded State",
     4: "Millisecond Ambiguity",
     5: "Symbol Sync",
     6: "GLONASS String Sync",
     7: "GLONASS Time Of Day Decoded",
     8: "BEIDOU D2 Bit Sync",
     9: "BEIDOU D2 Subframe Sync",
    10: "Galileo E1BC Code Lock",
    11: "Galileo E1C 2^nd^ Code Lock",
    12: "Galileo E1B Page Sync",
    13: "SBAS Sync",
    14: "Time Of Week Known",
    15: "GLONASS Time Of Day Known",
}
RAW_STATE_BIT_INV_MAP = { value : key for key, value in RAW_STATE_BIT_MAP.items() }

SYSTEM_NAME_MAP = {
    'GPS'     : 'G',
    'GLONASS' : 'R',
    'GALILEO' : 'E',
    'BEIDOU'  : 'C',
    'QZSS'    : 'J',
}

GLONASS_FREQ_CHANNEL_MAP = {
    1 : 1,
    2 : -4,
    3 : 5,
    4 : 6,
    5 : 1,
    6 : -4,
    7 : 5,
    8 : 6,
    9 : -2,
    10 : -7,
    11 : 0,
    12 : -1,
    13 : -2,
    14 : -7,
    15 : 0,
    16 : -1,
    17 : 4,
    18 : -3,
    19 : 3,
    20 : 2,
    21 : 4,
    22 : -3,
    23 : 3,
    24 : 2,
}

QZSS_PRN_SVID_MAP = {
    193 : 1,
    194 : 2,
    199 : 3,
    195 : 4,
}

INIT_B = np.deg2rad(  37.5)
INIT_L = np.deg2rad(-122.2)
INIT_H = 0.0

FREQ_TOL = 100.0
Cn0DbHz_THRESHOLD = 20.0
ReceivedSvTimeUncertaintyNanos_THRESHOLD = 500
RAW_PSEUDO_RANGE_THRESHOLD = 50_000 * 1e+3

CLOCK_TIME_MARGIN = datetime.timedelta(seconds=90)
ORBIT_TIME_MARGIN = datetime.timedelta(hours=3)
IONO_TIME_MARGIN  = datetime.timedelta(hours=2)

EPSILON_M = 0.01
DEFAULT_TROPO_DELAY_M = 2.48

HAVERSINE_RADIUS = 6_371_000

In [None]:
%%writefile io_f.py
import io
import datetime
from dataclasses import dataclass, asdict
import numpy as np
import pandas as pd
from scipy.interpolate import InterpolatedUnivariateSpline, RectBivariateSpline

@dataclass
class IONEX:
    iono_height : float
    base_radius : float
    lat_1       : float
    lat_2       : float
    lat_delta   : float
    lng_1       : float
    lng_2       : float
    lng_delta   : float
    time_1      : np.datetime64
    time_2      : np.datetime64
    time_delta  : np.timedelta64
    iono_map    : np.array
    lat_range   : np.array
    lng_range   : np.array

def concat_clk(clk_df_list):
    clk_df = pd.concat(clk_df_list, axis=0)
    sat_set_list = [frozenset(clk_df['SatName']) for clk_df in clk_df_list]
    sat_prod = sat_set_list[0]
    for sat_set in sat_set_list[1:]:
        sat_prod = sat_prod & sat_set
    clk_df = clk_df[clk_df['SatName'].isin(sat_prod)]
    clk_df = clk_df.reset_index(drop=True)
    return clk_df

def concat_sp3(sp3_df_list):
    sp3_df  = pd.concat(sp3_df_list, axis=0)
    sat_set_list = [frozenset(sp3_df['SatName']) for sp3_df in sp3_df_list]
    sat_prod = sat_set_list[0]
    for sat_set in sat_set_list[1:]:
        sat_prod = sat_prod & sat_set
    sp3_df = sp3_df[sp3_df['SatName'].isin(sat_prod)]
    sp3_df = sp3_df.reset_index(drop=True)
    return sp3_df

def concat_ionex(ionex_list):
    assert(len(np.unique([ionex.iono_height for ionex in ionex_list])) == 1)
    assert(len(np.unique([ionex.base_radius for ionex in ionex_list])) == 1)
    assert(len(np.unique([ionex.lat_1       for ionex in ionex_list])) == 1)
    assert(len(np.unique([ionex.lat_2       for ionex in ionex_list])) == 1)
    assert(len(np.unique([ionex.lat_delta   for ionex in ionex_list])) == 1)
    assert(len(np.unique([ionex.lng_1       for ionex in ionex_list])) == 1)
    assert(len(np.unique([ionex.lng_2       for ionex in ionex_list])) == 1)
    assert(len(np.unique([ionex.lng_delta   for ionex in ionex_list])) == 1)
    assert(len(np.unique([ionex.time_delta  for ionex in ionex_list])) == 1)
    N = len(ionex_list)
    iono_map = []
    for i in range(N-1):
        assert(ionex_list[i].time_2 == ionex_list[i+1].time_1)
        iono_map.append(ionex_list[i].iono_map[0:-1, :, :])
    iono_map.append(ionex_list[-1].iono_map)
    kw = asdict(ionex_list[0])
    kw['time_2']   = ionex_list[-1].time_2
    kw['iono_map'] = np.concatenate(iono_map, axis=0)
    return IONEX(**kw)

def read_GnssLog_Raw(filename):
    lines = []
    with open(filename, 'r') as f:
        for line in f:
            if 'Raw' in line:
                line = line.rstrip().lstrip('#')
                lines.append(line)
    sio = io.StringIO('\n'.join(lines))
    return pd.read_csv(sio)

def read_clock_file(filename):
    with open(filename, 'r') as f:
        lines = f.readlines()
    for index, line in enumerate(lines):
        if 'TIME SYSTEM ID' in line:
            assert(line.strip().split()[0] == 'GPS')
            continue
        if 'END OF HEADER' in line:
            start_index = index + 1
            break
    lines = lines[start_index:]
    SAT, EPOCH, DELTA_TSV = [], [], []
    for line in lines:
        if not line.startswith('AS '):
            continue
        tokens = line.rstrip().split()
        sat = tokens[1]
        epoch = datetime.datetime(year   = int(tokens[2]),
                                  month  = int(tokens[3]),
                                  day    = int(tokens[4]),
                                  hour   = int(tokens[5]),
                                  minute = int(tokens[6]),
                                  second = int(float(tokens[7])),
                                  )
        if 'D' in tokens[9]:
            tokens[9] = tokens[9].replace('D', 'E')
        delta_tsv = float(tokens[9])
        SAT.append(sat)
        EPOCH.append(epoch)
        DELTA_TSV.append(delta_tsv)
    df = pd.DataFrame({
        'Epoch'    : EPOCH,
        'SatName'  : SAT,
        'DeltaTSV' : DELTA_TSV,
    })
    df = df[df['Epoch'] < (df['Epoch'].values[0] + pd.Timedelta(1, unit='day'))]
    df = df.reset_index(drop=True)
    return df

def read_sp3_file(filename):
    with open(filename, 'r') as f:
        lines = f.readlines()        
    for index, line in enumerate(lines):
        if line.startswith('%c '):
            time_system = line.split()[3]
            assert((time_system == 'GPS') or (time_system == 'ccc'))
            continue
        if line.startswith('* '):
            start_index = index
            break
    lines = lines[start_index:]

    data = []
    for line in lines:
        if line.startswith('* '):
            tokens = line.rstrip().split()
            epoch = datetime.datetime(
                year   = int(tokens[1]),
                month  = int(tokens[2]),
                day    = int(tokens[3]),
                hour   = int(tokens[4]),
                minute = int(tokens[5]),
                second = int(float(tokens[6])),
            )
        elif line.startswith('P'):
            tokens = line.rstrip().split()
            sat = tokens[0][1:]
            x, y, z, delta_t = [float(s) for s in tokens[1:5]]
            x = x * 1e+3
            y = y * 1e+3
            z = z * 1e+3
            delta_t = delta_t * 1e-6
            data.append([epoch, sat, x, y, z, delta_t])
    columns = ['Epoch', 'SatName', 'X', 'Y', 'Z', 'DeltaTSV_SP3']
    df = pd.DataFrame(data, columns=columns)
    df = df[df['Epoch'] < (df['Epoch'].values[0] + pd.Timedelta(1, unit='day'))]
    df = df[~((df['X'] == 0) & (df['Y'] == 0) & (df['Z'] == 0))]
    df = df.reset_index(drop=True)
    return df

def read_SINEX_TRO_file(filename):
    with open(filename, 'r') as f:
        lines = f.readlines()
    for index, line in enumerate(lines):
        if '+TROP/SOLUTION' in line:
            start_index = index + 2
            break
    lines = lines[start_index:]
    data = []
    for line in lines:
        if '-TROP/SOLUTION' in line:
            break
        tokens  = line.strip().split()
        y, d, s = [int(x) for x in tokens[1].split(':')]
        epoch = datetime.datetime(y+2000, 1, 1) + datetime.timedelta(days=d-1) + datetime.timedelta(seconds=s)
        data.append([epoch] + [1e-3 * float(x) for x in tokens[2:]])
    columns = ['Epoch',
               'TROTOT', 'TROTOT_STD',
               'TGNTOT', 'TGNTOT_STD',
               'TGETOT', 'TGETOT_STD']
    df = pd.DataFrame(data, columns=columns)
    df = df[df['Epoch'] < (df['Epoch'].values[0] + pd.Timedelta(1, unit='day'))]
    df = df.reset_index(drop=True)
    return df

def read_IONEX_file(filename):
    with open(filename, 'r') as f:
        lines = f.readlines()
    kw = dict()
    #==============================
    # read header
    #==============================
    for index, line in enumerate(lines):
        tokens = line.strip().split()
        if 'EPOCH OF FIRST MAP' in line:
            kw['time_1'] = np.datetime64(datetime.datetime(
                year   = int(tokens[0]),
                month  = int(tokens[1]),
                day    = int(tokens[2]),
                hour   = int(tokens[3]),
                minute = int(tokens[4]),
                second = int(tokens[5]),
            ))
            continue
        if 'EPOCH OF LAST MAP' in line:
            kw['time_2'] = np.datetime64(datetime.datetime(
                year   = int(tokens[0]),
                month  = int(tokens[1]),
                day    = int(tokens[2]),
                hour   = int(tokens[3]),
                minute = int(tokens[4]),
                second = int(tokens[5]),                
            ))
            continue
        if 'INTERVAL' in line:
            kw['time_delta'] = np.timedelta64(datetime.timedelta(
                seconds=int(tokens[0]),
            ))
            continue
        if 'HGT1 / HGT2 / DHGT' in line:
            h1, h2, dh = [float(x) for x in tokens[0:3]]
            assert(h1 == h2)
            assert(dh == 0.0)
            kw['iono_height'] = h1 * 1000
            continue
        if 'LAT1 / LAT2 / DLAT' in line:
            lat_1, lat_2, lat_delta = [float(x) for x in tokens[0:3]]
            assert((lat_2 - lat_1) * lat_delta > 0)
            if (lat_1 > lat_2):
                flip_lat = True
                lat_1, lat_2 = lat_2, lat_1
                lat_delta = - lat_delta
            else:
                flip_lat = False
            kw['lat_1']     = np.deg2rad(lat_1)
            kw['lat_2']     = np.deg2rad(lat_2)
            kw['lat_delta'] = np.deg2rad(lat_delta)
            continue
        if 'LON1 / LON2 / DLON' in line:
            lng_1, lng_2, lng_delta = [float(x) for x in tokens[0:3]]
            assert((lng_2 - lng_1) * lng_delta > 0)
            if (lng_1 > lng_2):
                flip_lng = True
                lng_1, lng_2 = lng_2, lng_1
                lng_delta = - lng_delta
            else:
                flip_lng = False
            kw['lng_1']     = np.deg2rad(lng_1)
            kw['lng_2']     = np.deg2rad(lng_2)
            kw['lng_delta'] = np.deg2rad(lng_delta)
            continue
        if 'MAPPING FUNCTION' in line:
            assert(tokens[0] == 'COSZ')
            continue
        if 'BASE RADIUS' in line:
            kw['base_radius'] = 1000 * float(tokens[0])
            continue
        if 'EXPONENT' in line:
            TEC_coeff = 10**float(tokens[0])
            continue
        if 'MAP DIMENSION' in line:
            assert(int(tokens[0]) == 2)
            continue
        if 'END OF HEADER' in line:
            line_count = index + 1
            break
    #==============================
    # read data
    #==============================
    roundint = lambda x : int(round(x))
    N_lat  = 1 + roundint((kw['lat_2'] - kw['lat_1']) / kw['lat_delta'])
    N_lng  = 1 + roundint((kw['lng_2'] - kw['lng_1']) / kw['lng_delta'])
    N_time = 1 + roundint((kw['time_2'] - kw['time_1']) / kw['time_delta'])
    iono_map = np.zeros((N_time, N_lat, N_lng), dtype=np.float64)

    data_per_line = 16
    lines_per_data = (N_lng + data_per_line - 1) // data_per_line
    
    for time_count in range(N_time):
        assert('START OF TEC MAP' in lines[line_count])
        assert(int(lines[line_count].strip().split()[0]) == time_count + 1)
        line_count += 1
        assert('EPOCH OF CURRENT MAP' in lines[line_count])
        line_count += 1
        for lat_count in range(N_lat):
            assert('LAT/LON1/LON2/DLON/H' in lines[line_count])
            line_count += 1
            values = []
            for i in range(lines_per_data):
                values.extend([int(x) for x in lines[line_count+i].strip().split()])
            if 9999 in values:
                print('Warning: There is non-available TEC values.')
            iono_map[time_count, lat_count, :] = np.array(values).astype(float)
            line_count += lines_per_data
        assert('END OF TEC MAP' in lines[line_count])
        assert(int(lines[line_count].strip().split()[0]) == time_count + 1)
        line_count += 1

    if flip_lat:
        iono_map = np.flip(iono_map, axis=1)
    if flip_lng:
        iono_map = np.flip(iono_map, axis=2)
    iono_map = iono_map * TEC_coeff
    kw['iono_map']  = iono_map
    kw['lat_range'] = np.linspace(kw['lat_1'], kw['lat_2'], N_lat)
    kw['lng_range'] = np.linspace(kw['lng_1'], kw['lng_2'], N_lng)
    return IONEX(**kw)

In [None]:
%%writefile transform.py
import numpy as np
from dataclasses import dataclass

import constants as C

@dataclass
class ECEF:
    x: np.array
    y: np.array
    z: np.array

    def to_numpy(self):
        return np.stack([self.x, self.y, self.z], axis=0)

    @staticmethod
    def from_numpy(pos):
        x, y, z = [np.squeeze(w) for w in np.split(pos, 3, axis=-1)]
        return ECEF(x=x, y=y, z=z)

@dataclass
class BLH:
    lat : np.array
    lng : np.array
    hgt : np.array

@dataclass
class ENU:
    east  : np.array
    north : np.array
    up    : np.array

@dataclass
class AZEL:
    elevation : np.array
    azimuth   : np.array
    zenith    : np.array

def BLH_to_ECEF(blh):
    a  = C.WGS84_SEMI_MAJOR_AXIS
    e2 = C.WGS84_SQUARED_FIRST_ECCENTRICITY
    sin_B = np.sin(blh.lat)
    cos_B = np.cos(blh.lat)
    sin_L = np.sin(blh.lng)
    cos_L = np.cos(blh.lng)
    n = a / np.sqrt(1 - e2*sin_B**2)
    x = (n + blh.hgt) * cos_B * cos_L
    y = (n + blh.hgt) * cos_B * sin_L
    z = ((1 - e2) * n + blh.hgt) * sin_B
    return ECEF(x=x, y=y, z=z)

def ECEF_to_BLH_approximate(ecef):
    a = C.WGS84_SEMI_MAJOR_AXIS
    b = C.WGS84_SEMI_MINOR_AXIS
    e2  = C.WGS84_SQUARED_FIRST_ECCENTRICITY
    e2_ = C.WGS84_SQUARED_SECOND_ECCENTRICITY
    x = ecef.x
    y = ecef.y
    z = ecef.z
    r = np.sqrt(x**2 + y**2)
    t = np.arctan2(z * (a/b), r)
    B = np.arctan2(z + (e2_*b)*np.sin(t)**3, r - (e2*a)*np.cos(t)**3)
    L = np.arctan2(y, x)
    n = a / np.sqrt(1 - e2*np.sin(B)**2)
    H = (r / np.cos(B)) - n
    return BLH(lat=B, lng=L, hgt=H)

ECEF_to_BLH = ECEF_to_BLH_approximate

def ECEF_to_ENU(pos, base):
    dx = pos.x - base.x
    dy = pos.y - base.y
    dz = pos.z - base.z
    base_blh = ECEF_to_BLH(base)
    sin_B = np.sin(base_blh.lat)
    cos_B = np.cos(base_blh.lat)
    sin_L = np.sin(base_blh.lng)
    cos_L = np.cos(base_blh.lng)
    e = -sin_L*dx + cos_L*dy
    n = -sin_B*cos_L*dx - sin_B*sin_L*dy + cos_B*dz
    u =  cos_B*cos_L*dx + cos_B*sin_L*dy + sin_B*dz
    return ENU(east=e, north=n, up=u)

def ENU_to_AZEL(enu):
    e = enu.east
    n = enu.north
    u = enu.up
    elevation = np.arctan2(u, np.sqrt(e**2 + n**2))
    azimuth   = np.arctan2(e, n)
    zenith    = (0.5 * np.pi) - elevation
    return AZEL(elevation=elevation,
                azimuth=azimuth,
                zenith=zenith)

def ECEF_to_AZEL(pos, base):
    return ENU_to_AZEL(ECEF_to_ENU(pos, base))

def haversine_distance(blh_1, blh_2):
    dlat = blh_2.lat - blh_1.lat
    dlng = blh_2.lng - blh_1.lng
    a = np.sin(dlat/2)**2 + np.cos(blh_1.lat) * np.cos(blh_2.lat) * np.sin(dlng/2)**2
    dist = 2 * C.HAVERSINE_RADIUS * np.arcsin(np.sqrt(a))
    return dist

def hubenys_distance(blh_1, blh_2):
    Rx = C.WGS84_SEMI_MAJOR_AXIS
    Ry = C.WGS84_SEMI_MINOR_AXIS
    E2 = C.WGS84_SQUARED_FIRST_ECCENTRICITY
    num_M = Rx * (1 - E2)
    Dy = blh_1.lat - blh_2.lat
    Dx = blh_1.lng - blh_2.lng
    P  = 0.5 * (blh_1.lat + blh_2.lat)
    W  = np.sqrt(1 - E2 * np.sin(P)**2)
    M  = num_M / W**3
    N  = Rx / W
    d2 = (Dy * M)**2 + (Dx * N * np.cos(P))**2
    d  = np.sqrt(d2)
    return d

def jacobian_BLH_to_ECEF(blh):
    a  = C.WGS84_SEMI_MAJOR_AXIS
    e2 = C.WGS84_SQUARED_FIRST_ECCENTRICITY
    B = blh.lat
    L = blh.lng
    H = blh.hgt
    cos_B = np.cos(B)
    sin_B = np.sin(B)
    cos_L = np.cos(L)
    sin_L = np.sin(L)
    N = a / np.sqrt(1 - e2*sin_B**2)
    dNdB = a * e2 * sin_B * cos_B * (1 - e2*sin_B**2)**(-3/2)
    N_plus_H = N + H
    cos_B_cos_L = cos_B * cos_L
    cos_B_sin_L = cos_B * sin_L
    sin_B_cos_L = sin_B * cos_L
    sin_B_sin_L = sin_B * sin_L

    dXdB = dNdB*cos_B_cos_L - N_plus_H*sin_B_cos_L
    dYdB = dNdB*cos_B_sin_L - N_plus_H*sin_B_sin_L
    dZdB = (1-e2)*dNdB*sin_B + (1-e2)*N_plus_H*cos_B

    dXdL = - N_plus_H * cos_B_sin_L
    dYdL =   N_plus_H * cos_B_cos_L
    dZdL = np.zeros_like(dXdL)

    dXdH = cos_B_cos_L
    dYdH = cos_B_sin_L
    dZdH = sin_B

    J = np.stack([[dXdB, dXdL, dXdH],
                  [dYdB, dYdL, dYdH],
                  [dZdB, dZdL, dZdH]], axis=0)
    axes = list(range(2, J.ndim)) + [0, 1]
    J = np.transpose(J, axes)
    return J

def jacobian_ECEF_to_ENU(blh):
    B = blh.lat
    L = blh.lng
    cos_B = np.cos(B)
    sin_B = np.sin(B)
    cos_L = np.cos(L)
    sin_L = np.sin(L)
    
    dEdX = -sin_L
    dEdY =  cos_L
    dEdZ = np.zeros_like(dEdX)
    
    dNdX = -sin_B*cos_L
    dNdY = -sin_B*sin_L
    dNdZ =  cos_B

    dUdX = cos_B*cos_L
    dUdY = cos_B*sin_L
    dUdZ = sin_B

    J = np.stack([[dEdX, dEdY, dEdZ],
                  [dNdX, dNdY, dNdZ],
                  [dUdX, dUdY, dUdZ]], axis=0)
    axes = list(range(2, J.ndim)) + [0, 1]
    J = np.transpose(J, axes)
    return J

def jacobian_BL_to_EN(BLH):
    J_ECEF_BLH = jacobian_BLH_to_ECEF(BLH)
    J_ENU_ECEF = jacobian_ECEF_to_ENU(BLH)
    J_EN_BL    = np.einsum('nij,njk->nik', J_ENU_ECEF[:, 0:2, :], J_ECEF_BLH[:, :, 0:2])
    return J_EN_BL

def pd_haversine_distance(df1, df2):
    blh1 = BLH(
        lat=np.deg2rad(df1['latDeg'].values),
        lng=np.deg2rad(df1['lngDeg'].values),
        hgt=0,
    )
    blh2 = BLH(
        lat=np.deg2rad(df2['latDeg'].values),
        lng=np.deg2rad(df2['lngDeg'].values),
        hgt=0,
    )
    return haversine_distance(blh1, blh2)

In [None]:
%%writefile preprocess.py
import sys
import os
import glob
import itertools
import traceback
import multiprocessing
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from scipy.interpolate import InterpolatedUnivariateSpline

import io_f
import constants as C

INPUT_PATH  = '../input/google-smartphone-decimeter-challenge'
MERGED_PATH = '../input/gsdc-merged-clk-and-sp3'
DEST_PATH   = '_features'

CONSTE_ID_GPS = C.CONSTELLATION_TYPE_MAP['GPS']
CONSTE_ID_GLO = C.CONSTELLATION_TYPE_MAP['GLONASS']
CONSTE_ID_GAL = C.CONSTELLATION_TYPE_MAP['GALILEO']
CONSTE_ID_QZS = C.CONSTELLATION_TYPE_MAP['QZSS']
CONSTE_ID_BDS = C.CONSTELLATION_TYPE_MAP['BEIDOU']

def add_extra_features_1st(key, gnss_df, log):
    nanosSinceGpsEpoch = gnss_df['TimeNanos'] - gnss_df['FullBiasNanos']
    gnss_df['millisSinceGpsEpoch'] = nanosSinceGpsEpoch // 10**6
    gnss_df['Epoch'] = pd.to_timedelta(nanosSinceGpsEpoch, unit='ns') + C.GPS_ORIGIN_DATETIME

    N = gnss_df.shape[0]
    SatName    = np.empty(shape=(N, ), dtype=np.object)
    SignalType = np.empty(shape=(N, ), dtype=np.object)
    FixedSvid  = np.empty(shape=(N, ), dtype=np.int32)
    columns = ['ConstellationType', 'Svid', 'CarrierFrequencyHz']
    for index, (conste, svid, freq) in enumerate(gnss_df[columns].itertuples(index=False, name=None)):
        if (conste == CONSTE_ID_GPS) and abs(freq - C.FREQ_GPS_L1) < C.FREQ_TOL:
            sat_name_prefix = C.SYSTEM_NAME_MAP['GPS']
            signal_type = 'GPS_L1'
            assert((1 <= svid) and (svid <= 32))
            fixed_svid = svid
        elif (conste == CONSTE_ID_GPS) and abs(freq - C.FREQ_GPS_L5) < C.FREQ_TOL:
            sat_name_prefix = C.SYSTEM_NAME_MAP['GPS']
            signal_type = 'GPS_L5'
            assert((1 <= svid) and (svid <= 32))
            fixed_svid = svid
        elif (conste == CONSTE_ID_GAL) and abs(freq - C.FREQ_GAL_E1) < C.FREQ_TOL:
            sat_name_prefix = C.SYSTEM_NAME_MAP['GALILEO']
            signal_type = 'GAL_E1'
            assert((1 <= svid) and (svid <= 36))
            fixed_svid = svid
        elif (conste == CONSTE_ID_GAL) and abs(freq - C.FREQ_GAL_E5A) < C.FREQ_TOL:
            sat_name_prefix = C.SYSTEM_NAME_MAP['GALILEO']
            signal_type = 'GAL_E5A'
            assert((1 <= svid) and (svid <= 36))
            fixed_svid = svid
        elif (conste == CONSTE_ID_BDS) and abs(freq - C.FREQ_BDS_B1I) < C.FREQ_TOL:
            sat_name_prefix = C.SYSTEM_NAME_MAP['BEIDOU']
            signal_type = 'BDS_B1I'
            assert((1 <= svid) and (svid <= 61))
            fixed_svid = svid
        elif (conste == CONSTE_ID_QZS) and abs(freq - C.FREQ_QZS_J1) < C.FREQ_TOL:
            sat_name_prefix = C.SYSTEM_NAME_MAP['QZSS']
            signal_type = 'QZS_J1'
            fixed_svid = C.QZSS_PRN_SVID_MAP[svid]
        elif (conste == CONSTE_ID_QZS) and abs(freq - C.FREQ_QZS_J5) < C.FREQ_TOL:
            sat_name_prefix = C.SYSTEM_NAME_MAP['QZSS']
            signal_type = 'QZS_J5'
            fixed_svid = C.QZSS_PRN_SVID_MAP[svid]
        elif (conste == CONSTE_ID_GLO):
            sat_name_prefix = C.SYSTEM_NAME_MAP['GLONASS']
            signal_type = 'GLO_G1'
            if svid > 24:
                freq_channel = svid - 100
                fixed_svid   = C.GLONASS_FCN_OSN_MAP[key][str(svid)]
            else:
                freq_channel = C.GLONASS_FREQ_CHANNEL_MAP[svid]
                fixed_svid   = svid
            assert( (-7 <= freq_channel) and (freq_channel <= 6) )
            freq_nominal = C.FREQ_GLO_G1_NOMINAL + freq_channel * C.FREQ_GLO_G1_DELTA
            assert( abs(freq - freq_nominal) < C.FREQ_TOL )
        else:
            print((conste, svid, freq))
            raise RuntimeError('unknown signal type')
        SatName[index]    = f'{sat_name_prefix}{fixed_svid:02d}'
        SignalType[index] = signal_type
        FixedSvid[index]  = fixed_svid
        del sat_name_prefix, signal_type, fixed_svid
        pass
    gnss_df['SatName']    = SatName
    gnss_df['SignalType'] = SignalType
    gnss_df['FixedSvid']  = FixedSvid
    return gnss_df

def bit_check(X, name):
    mask = 2**C.RAW_STATE_BIT_INV_MAP[name]
    return (X & mask) != 0

def remove_invalid_measurements(df, log):
    state    = df['State'].values
    sig_type = df['SignalType'].values
    conste   = df['ConstellationType'].values

    is_gal_E1     = (sig_type == 'GAL_E1')
    is_non_gal_E1 = np.logical_not(is_gal_E1)
    code_lock_0   = is_non_gal_E1 & bit_check(state, 'Code Lock')
    code_lock_1   = is_gal_E1     & bit_check(state, 'Galileo E1BC Code Lock')
    code_lock_ok  = code_lock_0 | code_lock_1

    is_glo     = (conste == C.CONSTELLATION_TYPE_MAP['GLONASS'])
    is_nonglo  = np.logical_not(is_glo)
    time_of_0  = is_nonglo & bit_check(state, 'Time Of Week Decoded State') & bit_check(state, 'Time Of Week Known')
    time_of_1  = is_glo & bit_check(state, 'GLONASS Time Of Day Decoded') & bit_check(state, 'GLONASS Time Of Day Known')
    time_of_ok = time_of_0 | time_of_1

    msec_ambi_ok     = np.logical_not(bit_check(state, 'Millisecond Ambiguity'))
    sigma_rectime_ok = (df['ReceivedSvTimeUncertaintyNanos'] <= C.ReceivedSvTimeUncertaintyNanos_THRESHOLD).values
    cn0_ok           = (df['Cn0DbHz'] >= C.Cn0DbHz_THRESHOLD).values
    svid_ok          = (df['FixedSvid'] >= 1).values

    valid = (code_lock_ok
             & time_of_ok
             & msec_ambi_ok
             & sigma_rectime_ok
             & cn0_ok
             & svid_ok)
    df = df[valid]
    df = df.reset_index(drop=True)
    return df

def add_clock_drift(raw_df, derived_df, log):
    output_df_list = []
    for (sat_name, sig_type), raw_sat_df in raw_df.groupby(['SatName', 'SignalType']):
        svid = int(sat_name[1:])
        derived_sat_df = derived_df[ (derived_df['signalType'] == sig_type) & (derived_df['svid'] == svid) ]
        if derived_sat_df.shape[0] <= 1:
            log.append(f'{sat_name} not in derived.csv.')
            continue
        t_ref = derived_sat_df['millisSinceGpsEpoch'].min()
        X_in  = 1e-3 * (derived_sat_df['millisSinceGpsEpoch'] - t_ref).values
        Y_in  = derived_sat_df['satClkDriftMps'].values
        X_out = 1e-3 * (raw_sat_df['millisSinceGpsEpoch'] - t_ref).values
        Y_out = InterpolatedUnivariateSpline(X_in, Y_in, k=1, ext=3)(X_out)
        raw_sat_df['satClkDriftMps'] = Y_out
        output_df_list.append(raw_sat_df)
    output_df = pd.concat(output_df_list, axis=0)
    output_df.sort_values(['millisSinceGpsEpoch', 'SatName', 'SignalType'], inplace=True, ignore_index=True)
    return output_df

def add_satellite_orbit(gnss_df, log):
    sp3_days = np.unique([(gnss_df['Epoch'].min() - C.ORBIT_TIME_MARGIN).date(),
                          (gnss_df['Epoch'].max() + C.ORBIT_TIME_MARGIN).date()])
    sp3_filelist = [os.path.join(MERGED_PATH, d.strftime('SP3_%Y%m%d.csv')) for d in sp3_days]
    sp3_df_list = [pd.read_csv(f) for f in sp3_filelist]
    for df in sp3_df_list:
        df['Epoch'] = df['Epoch'].astype(np.datetime64)
    sp3_df = io_f.concat_sp3(sp3_df_list)

    t0 = gnss_df['Epoch'].min()

    gnss_sat_dfs = []
    for sat, gnss_sat_df in gnss_df.groupby('SatName'):
        sp3_sat_df = sp3_df[sp3_df['SatName'] == sat]
        if sp3_sat_df.shape[0] == 0:
            log.append(f'{sat} not available.')
            continue
        TIME_ref = (sp3_sat_df['Epoch'] - t0).astype(np.int64).values * 1e-9
        xPos_fn  = InterpolatedUnivariateSpline(TIME_ref, sp3_sat_df['X'].values, k=5)
        yPos_fn  = InterpolatedUnivariateSpline(TIME_ref, sp3_sat_df['Y'].values, k=5)
        zPos_fn  = InterpolatedUnivariateSpline(TIME_ref, sp3_sat_df['Z'].values, k=5)
        xVel_fn  = xPos_fn.derivative()
        yVel_fn  = yPos_fn.derivative()
        zVel_fn  = zPos_fn.derivative()

        TIME = (gnss_sat_df['Epoch'] - t0).astype(np.int64).values * 1e-9
        xPos = xPos_fn(TIME)
        yPos = yPos_fn(TIME)
        zPos = zPos_fn(TIME)
        xVel = xVel_fn(TIME)
        yVel = yVel_fn(TIME)
        zVel = zVel_fn(TIME)
        gnss_sat_df['xSatPosM']   = xPos
        gnss_sat_df['ySatPosM']   = yPos
        gnss_sat_df['zSatPosM']   = zPos
        gnss_sat_df['xSatVelMps'] = xVel
        gnss_sat_df['ySatVelMps'] = yVel
        gnss_sat_df['zSatVelMps'] = zVel
        gnss_sat_dfs.append(gnss_sat_df)
        pass
    gnss_df = pd.concat(gnss_sat_dfs, axis=0)
    gnss_df = gnss_df.sort_values(['millisSinceGpsEpoch', 'SatName', 'SignalType'])
    gnss_df = gnss_df.reset_index(drop=True)
    return gnss_df

def remove_QZSS(df, log):
    if df[df['SignalType'] == 'QZS_J1'].shape[0] < 50:
        df = df[df['SignalType'] != 'QZS_J1']
    if df[df['SignalType'] == 'QZS_J5'].shape[0] < 50:
        df = df[df['SignalType'] != 'QZS_J5']
    df = df.sort_values(['millisSinceGpsEpoch', 'SatName', 'SignalType'])
    df = df.reset_index(drop=True)
    return df

def fix_derived_timestamp(df_raw, df_derived):
    raw_timestamps     = df_raw['millisSinceGpsEpoch'].unique()
    derived_timestamps = df_derived['millisSinceGpsEpoch'].unique()
    indexes = np.searchsorted(raw_timestamps, derived_timestamps)
    from_t_to_fix_derived = dict(zip(derived_timestamps, raw_timestamps[indexes-1]))
    df_derived['millisSinceGpsEpoch'] = np.array(list(map(lambda v: from_t_to_fix_derived[v], df_derived['millisSinceGpsEpoch'])))

    df_derived = df_derived.drop_duplicates(['millisSinceGpsEpoch', 'constellationType', 'svid', 'signalType'])    
    df_derived = df_derived.sort_values('millisSinceGpsEpoch', ignore_index=True)
    
    return df_derived

def process_gnss_df(prefix, drive, phone, key, df):
    log = []
    df  = add_extra_features_1st(key, df, log)
    df  = remove_invalid_measurements(df, log)
    df  = remove_QZSS(df, log)
    
    derived_df = pd.read_csv(f'{INPUT_PATH}/{prefix}/{drive}/{phone}/{phone}_derived.csv')
    derived_df = fix_derived_timestamp(df, derived_df)
    df  = add_clock_drift(df, derived_df, log)
    
    df  = add_satellite_orbit(df, log)
    log = [f'[{key:<50}] {msg}' for msg in log]
    return df, log

def do_preprocess(path):
    prefix, drive, phone = path.split('/')[-3:]
    key = f'{prefix}_{drive}_{phone}'
    df1 = io_f.read_GnssLog_Raw(f'{INPUT_PATH}/{prefix}/{drive}/{phone}/{phone}_GnssLog.txt')
    try:
        df2, log = process_gnss_df(prefix, drive, phone, key, df1)
        df2.to_csv(f'{DEST_PATH}/{key}.csv', index=False)
        N1  = df1.shape[0]
        N2  = df2.shape[0]
        percent = 100.0 * N2 / N1
        log.append(f'[{key:<50}] {N2}/{N1} ({percent:.1f})')
        return log
    except ValueError:
        traceback.print_exc(file=sys.stderr)
        return f'{key:<50}: Fail'
    return

def main():
    os.makedirs(DEST_PATH, exist_ok=True)

    pathlist = sorted(glob.glob(f'{INPUT_PATH}/train/*/*') + glob.glob(f'{INPUT_PATH}/test/*/*'))

    processes = multiprocessing.cpu_count()
    with multiprocessing.Pool(processes=processes) as pool:
        results = pool.imap_unordered(do_preprocess, pathlist)
        results = tqdm(results, total=len(pathlist))
        results = itertools.chain.from_iterable(results)
        results = list(results)
    with open('_preprocess.log', 'w') as f:
        for msg in sorted(results):
            print(msg, file=f)
    return

In [None]:
%%writefile estimate_velocity.py
import glob
import datetime
import sys
import os
import multiprocessing
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from scipy.interpolate import InterpolatedUnivariateSpline

import io_f
import constants as C
import transform

INPUT_PATH   = '../input/google-smartphone-decimeter-challenge'
FEATURE_PATH = '_features'
DEST_PATH    = '_doppler_velocity'

BASELINE_DF = pd.concat([pd.read_csv('../input/gsdc-baseline-smoothing-result/smoothing_1st_train.csv'),
                         pd.read_csv('../input/gsdc-baseline-smoothing-result/smoothing_1st_test.csv'),
                        ], axis=0)
def get_baseline(drive, phone):
    key = f'{drive}_{phone}'
    df = BASELINE_DF[BASELINE_DF['phone'] == key].copy()
    df.reset_index(drop=True, inplace=True)
    return df

def L2_norm(x):
    return np.sqrt(np.sum(x**2, axis=1))

def calc_azel(sat_pos, rec_pos):
    sat = transform.ECEF.from_numpy(sat_pos)
    rec = transform.ECEF.from_numpy(rec_pos)
    return transform.ECEF_to_AZEL(pos=sat, base=rec)

def calc_blh(pos):
    blh = transform.ECEF.from_numpy(pos)
    return transform.ECEF_to_BLH(blh)

def mask_to_weight(mask, epsilon=1e-2):
    return (1 - epsilon) * mask.astype(float) + epsilon

def solve_least_square(A, b, epsilon=1e-12):
    """
    J = ||A x - b||^2
    """
    sys_A = A.T @ A
    sys_b = A.T @ b
    N = A.shape[1]
    u, s, vh = np.linalg.svd(sys_A, hermitian=True)
    cond = s[0] / s[-1]
    if cond * epsilon > 1:
        return
    sys_x = vh.T @ ((u.T @ sys_b) / s)
    return sys_x

def estimate_velocity_1epoch(name, epoch_df, rec_blh):
    ms_epoch = epoch_df['millisSinceGpsEpoch'].values[0]
    M = epoch_df.shape[0]
    if M < 4:
        print(f'[{name} {ms_epoch}] Too few satellites.')
        return
    
    sat_pos_rec       = epoch_df[['xSatPosM', 'ySatPosM', 'zSatPosM']].values
    sat_vel_rec       = epoch_df[['xSatVelMps', 'ySatVelMps', 'zSatVelMps']].values
    sat_clk_drift_mps = epoch_df['satClkDriftMps'].values
    v_doppler         = epoch_df['PseudorangeRateMetersPerSecond'].values
    std_mps           = 0.05 + epoch_df['PseudorangeRateUncertaintyMetersPerSecond'].values

    rec_pos  = transform.BLH_to_ECEF(rec_blh).to_numpy()
    tof      = L2_norm(rec_pos - sat_pos_rec) / C.LIGHT_SPEED
    sat_pos  = sat_pos_rec - (sat_vel_rec.T * tof).T
    sat_azel = calc_azel(sat_pos, rec_pos)
    el_mask  = (sat_azel.elevation > np.deg2rad(5.0))
    J = transform.jacobian_ECEF_to_ENU(rec_blh)

    P  = sat_pos - rec_pos
    E  = (P.T / L2_norm(P)).T
    A  = np.concatenate([-E, np.ones((M, 1))], axis=1)
    b  = v_doppler - np.sum(E * sat_vel_rec, axis=1) + sat_clk_drift_mps
    W  = mask_to_weight(el_mask) * (1 / std_mps)
    WA = (W * A.T).T
    Wb = W * b
    x  = solve_least_square(WA, Wb)
    if x is None:
        print(f'[{name} {ms_epoch}] Rank deficient.')
        return
    v_rec_ecef = x[0:3]
    v_rec_enu  = J @ v_rec_ecef
    return v_rec_ecef, v_rec_enu

def estimate_velocity(args):
    prefix, drive, phone = args
    name = f'{prefix}_{drive}_{phone}'

    gnss_df  = pd.read_csv(f'{FEATURE_PATH}/{prefix}_{drive}_{phone}.csv')
    gnss_df['Epoch'] = gnss_df['Epoch'].astype(np.datetime64)

    base_df = get_baseline(drive, phone)
    t_ref   = base_df['millisSinceGpsEpoch'].min()
    TIME    = 1e-3 * (base_df['millisSinceGpsEpoch'] - t_ref).values
    B       = np.deg2rad(base_df['latDeg'].values)
    L       = np.deg2rad(base_df['lngDeg'].values)
    B_fn    = InterpolatedUnivariateSpline(TIME, B, k=3, ext=3)
    L_fn    = InterpolatedUnivariateSpline(TIME, L, k=3, ext=3)

    data = []
    for index, (epoch, epoch_df) in enumerate(gnss_df.groupby('millisSinceGpsEpoch')):
        t = 1e-3 * (epoch - t_ref)
        rec_blh = transform.BLH(lat=B_fn(t),
                                lng=L_fn(t),
                                hgt=0.0)
        result = estimate_velocity_1epoch(name, epoch_df, rec_blh)
        if result is not None:
            v_rec_ecef, v_rec_enu = result
            data.append((epoch, 
                         v_rec_ecef[0], v_rec_ecef[1], v_rec_ecef[2],
                         v_rec_enu[0], v_rec_enu[1], v_rec_enu[2],
                         ))
    columns = ['millisSinceGpsEpoch', 'v_x', 'v_y', 'v_z', 'v_east', 'v_north', 'v_up']
    pred_df = pd.DataFrame(data, columns=columns).sort_values('millisSinceGpsEpoch')
    pred_df['collectionName'] = drive
    pred_df['phoneName'] = phone
    pred_df['phone'] = f'{drive}_{phone}'
    pred_df.to_csv(f'{DEST_PATH}/{prefix}_{drive}_{phone}.csv', index=False)
    return
    
def main():
    os.makedirs(DEST_PATH, exist_ok=True)

    path_list = sorted(glob.glob(f'{INPUT_PATH}/train/*/*') + glob.glob(f'{INPUT_PATH}/test/*/*'))
    args_list = [path.split('/')[-3:] for path in path_list]

    processes = multiprocessing.cpu_count()
    with multiprocessing.Pool(processes=processes) as pool:
        results = pool.imap_unordered(estimate_velocity, args_list)
        results = tqdm(results, total=len(args_list))
        results = list(results)

    train_df_list = [pd.read_csv(f) for f in sorted(glob.glob(f'{DEST_PATH}/train_*.csv'))]
    test_df_list  = [pd.read_csv(f) for f in sorted(glob.glob(f'{DEST_PATH}/test_*.csv' ))]
    train_df = pd.concat(train_df_list, axis=0)
    test_df  = pd.concat(test_df_list, axis=0)
    train_df.to_csv(f'{DEST_PATH}/doppler_velocity_train.csv', index=False)
    test_df.to_csv(f'{DEST_PATH}/doppler_velocity_test.csv', index=False)
    return

## main

In [None]:
import preprocess
preprocess.main()

In [None]:
import estimate_velocity
estimate_velocity.main()