## Config

In [None]:
import pandas as pd
BASELINE_DF = pd.concat([pd.read_csv('../input/gsdc-improved-raw-gnss-baseline-result/raw_gnss_train.csv'),
                         pd.read_csv('../input/gsdc-improved-raw-gnss-baseline-result/raw_gnss_test.csv'),
                        ], axis=0)

## Libraries

In [None]:
%%writefile constants.py
import json
import datetime
from collections import defaultdict
import numpy as np

GPS_ORIGIN_DAY       = datetime.date(1980, 1, 6)
GPS_ORIGIN_DATETIME  = datetime.datetime(1980, 1, 6)
GLONASS_LEAP_SECONDS = 18
BEIDOU_LEAP_SECONDS  = 14
TZ_MSK = datetime.timezone(datetime.timedelta(hours=+3), 'MSK')

WGS84_SEMI_MAJOR_AXIS = 6378137.0
WGS84_SEMI_MINOR_AXIS = 6356752.314245
WGS84_SQUARED_FIRST_ECCENTRICITY  = 6.69437999013e-3
WGS84_SQUARED_SECOND_ECCENTRICITY = 6.73949674226e-3
WGS84_FIRST_ECCENTRICITY  = np.sqrt(WGS84_SQUARED_FIRST_ECCENTRICITY)
WGS84_SECOND_ECCENTRICITY = np.sqrt(WGS84_SQUARED_SECOND_ECCENTRICITY)

LIGHT_SPEED = 299792458.0

OMEGA_EARTH = 7.2921151467e-5
MU_EARTH    = 3.986005e+14

FREQ_GPS_L1  = 1.575420e+09
FREQ_GPS_L5  = 1.176450e+09
FREQ_GAL_E1  = FREQ_GPS_L1
FREQ_GAL_E5A = FREQ_GPS_L5
FREQ_QZS_J1  = FREQ_GPS_L1
FREQ_QZS_J5  = FREQ_GPS_L5
FREQ_BDS_B1I = 1.561098e+09
FREQ_GLO_G1_NOMINAL = 1602.00 * 1e+6
FREQ_GLO_G1_DELTA   = 562.5 * 1e+3

CONSTELLATION_TYPE_MAP = {
    'GPS'     : 1,
    'GLONASS' : 3,
    'QZSS'    : 4,
    'BEIDOU'  : 5,
    'GALILEO' : 6,
}

RAW_STATE_BIT_MAP = {
     0: "Code Lock",
     1: "Bit Sync",
     2: "Subframe Sync",
     3: "Time Of Week Decoded State",
     4: "Millisecond Ambiguity",
     5: "Symbol Sync",
     6: "GLONASS String Sync",
     7: "GLONASS Time Of Day Decoded",
     8: "BEIDOU D2 Bit Sync",
     9: "BEIDOU D2 Subframe Sync",
    10: "Galileo E1BC Code Lock",
    11: "Galileo E1C 2^nd^ Code Lock",
    12: "Galileo E1B Page Sync",
    13: "SBAS Sync",
    14: "Time Of Week Known",
    15: "GLONASS Time Of Day Known",
}
RAW_STATE_BIT_INV_MAP = { value : key for key, value in RAW_STATE_BIT_MAP.items() }

SYSTEM_NAME_MAP = {
    'GPS'     : 'G',
    'GLONASS' : 'R',
    'GALILEO' : 'E',
    'BEIDOU'  : 'C',
    'QZSS'    : 'J',
}

GLONASS_FREQ_CHANNEL_MAP = {
    1 : 1,
    2 : -4,
    3 : 5,
    4 : 6,
    5 : 1,
    6 : -4,
    7 : 5,
    8 : 6,
    9 : -2,
    10 : -7,
    11 : 0,
    12 : -1,
    13 : -2,
    14 : -7,
    15 : 0,
    16 : -1,
    17 : 4,
    18 : -3,
    19 : 3,
    20 : 2,
    21 : 4,
    22 : -3,
    23 : 3,
    24 : 2,
}

QZSS_PRN_SVID_MAP = {
    193 : 1,
    194 : 2,
    199 : 3,
    195 : 4,
}

INIT_B = np.deg2rad(  37.5)
INIT_L = np.deg2rad(-122.2)
INIT_H = 0.0

FREQ_TOL = 100.0
Cn0DbHz_THRESHOLD = 20.0
ReceivedSvTimeUncertaintyNanos_THRESHOLD = 100
RAW_PSEUDO_RANGE_THRESHOLD = 50_000 * 1e+3

CLOCK_TIME_MARGIN = datetime.timedelta(seconds=90)
ORBIT_TIME_MARGIN = datetime.timedelta(hours=3)
IONO_TIME_MARGIN  = datetime.timedelta(hours=2)

EPSILON_M = 0.01
ELEVATION_CUTOFF = np.deg2rad(7.0)
DEFAULT_TROPO_DELAY_M = 2.48

HAVERSINE_RADIUS = 6_371_000

MAGNETIC_DECLINATION = np.deg2rad(10.0)

In [None]:
%%writefile io_f.py
import io
import datetime
from dataclasses import dataclass, asdict
import numpy as np
import pandas as pd
from scipy.interpolate import InterpolatedUnivariateSpline, RectBivariateSpline

UTC_TO_GPS_OFFSET_MS = ((datetime.date(1980, 1, 6) - datetime.date(1970, 1, 1)).days * 24 * 3600 - 18) * 1000

def read_GnssLog_sensors(filename):
    acce_lines = []
    gyro_lines = []
    magn_lines = []
    orie_lines = []
    with open(filename, 'r') as f:
        for line in f:
            if 'UncalAccel' in line:
                line = line.rstrip().lstrip('#')
                acce_lines.append(line)
                continue
            if 'UncalGyro' in line:
                line = line.rstrip().lstrip('#')
                gyro_lines.append(line)
                continue
            if 'UncalMag' in line:
                line = line.rstrip().lstrip('#')
                magn_lines.append(line)
                continue
            if 'OrientationDeg' in line:
                line = line.rstrip().lstrip('#')
                orie_lines.append(line)
                continue
    acce_df = pd.read_csv(io.StringIO('\n'.join(acce_lines))) if len(acce_lines) != 0 else None
    gyro_df = pd.read_csv(io.StringIO('\n'.join(gyro_lines))) if len(gyro_lines) != 0 else None
    magn_df = pd.read_csv(io.StringIO('\n'.join(magn_lines))) if len(magn_lines) != 0 else None
    orie_df = pd.read_csv(io.StringIO('\n'.join(orie_lines))) if len(orie_lines) != 0 else None
    def modify_df(df):
        if df is None:
            return None
        df.dropna(axis=0, inplace=True)
        df.drop_duplicates(subset='utcTimeMillis', inplace=True)
        if df.shape[0] < 2:
            return None
        dt_valid = np.concatenate([[True], np.diff(df['utcTimeMillis'].values) > 0])
        df = df[dt_valid].copy()
        df.reset_index(drop=True, inplace=True)
        df['millisSinceGpsEpoch'] = df['utcTimeMillis'] - UTC_TO_GPS_OFFSET_MS
        return df
    dfs = dict(
        acce = modify_df(acce_df),
        gyro = modify_df(gyro_df),
        magn = modify_df(magn_df),
        orie = modify_df(orie_df),
    )
    return dfs

@dataclass
class IONEX:
    iono_height : float
    base_radius : float
    lat_1       : float
    lat_2       : float
    lat_delta   : float
    lng_1       : float
    lng_2       : float
    lng_delta   : float
    time_1      : np.datetime64
    time_2      : np.datetime64
    time_delta  : np.timedelta64
    iono_map    : np.array
    lat_range   : np.array
    lng_range   : np.array

def concat_sp3(sp3_df_list):
    sp3_df  = pd.concat(sp3_df_list, axis=0)
    sat_set_list = [frozenset(sp3_df['SatName']) for sp3_df in sp3_df_list]
    sat_sum  = sat_set_list[0]
    sat_prod = sat_set_list[0]
    for sat_set in sat_set_list[1:]:
        sat_sum  = sat_sum  | sat_set
        sat_prod = sat_prod & sat_set
    sat_partial = sat_sum - sat_prod
    for sat in sat_partial:
        # print(f'concat_sp3: drop {sat}')
        sp3_df = sp3_df[sp3_df['SatName'] != sat]
    sp3_df = sp3_df.reset_index(drop=True)
    return sp3_df

def concat_ionex(ionex_list):
    assert(len(np.unique([ionex.iono_height for ionex in ionex_list])) == 1)
    assert(len(np.unique([ionex.base_radius for ionex in ionex_list])) == 1)
    assert(len(np.unique([ionex.lat_1       for ionex in ionex_list])) == 1)
    assert(len(np.unique([ionex.lat_2       for ionex in ionex_list])) == 1)
    assert(len(np.unique([ionex.lat_delta   for ionex in ionex_list])) == 1)
    assert(len(np.unique([ionex.lng_1       for ionex in ionex_list])) == 1)
    assert(len(np.unique([ionex.lng_2       for ionex in ionex_list])) == 1)
    assert(len(np.unique([ionex.lng_delta   for ionex in ionex_list])) == 1)
    assert(len(np.unique([ionex.time_delta  for ionex in ionex_list])) == 1)
    N = len(ionex_list)
    iono_map = []
    for i in range(N-1):
        assert(ionex_list[i].time_2 == ionex_list[i+1].time_1)
        iono_map.append(ionex_list[i].iono_map[0:-1, :, :])
    iono_map.append(ionex_list[-1].iono_map)
    kw = asdict(ionex_list[0])
    kw['time_2']   = ionex_list[-1].time_2
    kw['iono_map'] = np.concatenate(iono_map, axis=0)
    return IONEX(**kw)

def read_GnssLog_Raw(filename):
    lines = []
    with open(filename, 'r') as f:
        for line in f:
            if 'Raw' in line:
                line = line.rstrip().lstrip('#')
                lines.append(line)
    sio = io.StringIO('\n'.join(lines))
    return pd.read_csv(sio)

def read_clock_file(filename):
    with open(filename, 'r') as f:
        lines = f.readlines()
    for index, line in enumerate(lines):
        if 'TIME SYSTEM ID' in line:
            assert(line.strip().split()[0] == 'GPS')
            continue
        if 'END OF HEADER' in line:
            start_index = index + 1
            break
    lines = lines[start_index:]
    SAT, EPOCH, DELTA_TSV = [], [], []
    for line in lines:
        if not line.startswith('AS '):
            continue
        tokens = line.rstrip().split()
        sat = tokens[1]
        epoch = datetime.datetime(year   = int(tokens[2]),
                                  month  = int(tokens[3]),
                                  day    = int(tokens[4]),
                                  hour   = int(tokens[5]),
                                  minute = int(tokens[6]),
                                  second = int(float(tokens[7])),
                                  )
        delta_tsv = float(tokens[9])
        SAT.append(sat)
        EPOCH.append(epoch)
        DELTA_TSV.append(delta_tsv)
    df = pd.DataFrame({
        'Epoch'    : EPOCH,
        'SatName'  : SAT,
        'DeltaTSV' : DELTA_TSV,
    })
    df = df[df['Epoch'] < (df['Epoch'].values[0] + pd.Timedelta(1, unit='day'))]
    df = df.reset_index(drop=True)
    return df

def read_sp3_file(filename):
    with open(filename, 'r') as f:
        lines = f.readlines()        
    for index, line in enumerate(lines):
        if line.startswith('%c '):
            time_system = line.split()[3]
            assert((time_system == 'GPS') or (time_system == 'ccc'))
            continue
        if line.startswith('* '):
            start_index = index
            break
    lines = lines[start_index:]

    data = []
    for line in lines:
        if line.startswith('* '):
            tokens = line.rstrip().split()
            epoch = datetime.datetime(
                year   = int(tokens[1]),
                month  = int(tokens[2]),
                day    = int(tokens[3]),
                hour   = int(tokens[4]),
                minute = int(tokens[5]),
                second = int(float(tokens[6])),
            )
        elif line.startswith('P'):
            tokens = line.rstrip().split()
            sat = tokens[0][1:]
            x, y, z, delta_t = [float(s) for s in tokens[1:5]]
            x = x * 1e+3
            y = y * 1e+3
            z = z * 1e+3
            delta_t = delta_t * 1e-6
            data.append([epoch, sat, x, y, z, delta_t])
    columns = ['Epoch', 'SatName', 'X', 'Y', 'Z', 'DeltaTSV_SP3']
    df = pd.DataFrame(data, columns=columns)
    df = df[df['Epoch'] < (df['Epoch'].values[0] + pd.Timedelta(1, unit='day'))]
    err_df = df[(df['X'] == 0) & (df['Y'] == 0) & (df['Z'] == 0)]
    for sat in np.unique(err_df['SatName'].values):
        # print(f'read_sp3_file: drop {sat}')
        df = df[df['SatName'] != sat]
    df = df.reset_index(drop=True)
    return df

def read_SINEX_TRO_file(filename):
    with open(filename, 'r') as f:
        lines = f.readlines()
    for index, line in enumerate(lines):
        if '+TROP/SOLUTION' in line:
            start_index = index + 2
            break
    lines = lines[start_index:]
    data = []
    for line in lines:
        if '-TROP/SOLUTION' in line:
            break
        tokens  = line.strip().split()
        y, d, s = [int(x) for x in tokens[1].split(':')]
        epoch = datetime.datetime(y+2000, 1, 1) + datetime.timedelta(days=d-1) + datetime.timedelta(seconds=s)
        data.append([epoch] + [1e-3 * float(x) for x in tokens[2:]])
    columns = ['Epoch',
               'TROTOT', 'TROTOT_STD',
               'TGNTOT', 'TGNTOT_STD',
               'TGETOT', 'TGETOT_STD']
    df = pd.DataFrame(data, columns=columns)
    df = df[df['Epoch'] < (df['Epoch'].values[0] + pd.Timedelta(1, unit='day'))]
    df = df.reset_index(drop=True)
    return df

def read_IONEX_file(filename):
    with open(filename, 'r') as f:
        lines = f.readlines()
    kw = dict()
    #==============================
    # read header
    #==============================
    for index, line in enumerate(lines):
        tokens = line.strip().split()
        if 'EPOCH OF FIRST MAP' in line:
            kw['time_1'] = np.datetime64(datetime.datetime(
                year   = int(tokens[0]),
                month  = int(tokens[1]),
                day    = int(tokens[2]),
                hour   = int(tokens[3]),
                minute = int(tokens[4]),
                second = int(tokens[5]),
            ))
            continue
        if 'EPOCH OF LAST MAP' in line:
            kw['time_2'] = np.datetime64(datetime.datetime(
                year   = int(tokens[0]),
                month  = int(tokens[1]),
                day    = int(tokens[2]),
                hour   = int(tokens[3]),
                minute = int(tokens[4]),
                second = int(tokens[5]),                
            ))
            continue
        if 'INTERVAL' in line:
            kw['time_delta'] = np.timedelta64(datetime.timedelta(
                seconds=int(tokens[0]),
            ))
            continue
        if 'HGT1 / HGT2 / DHGT' in line:
            h1, h2, dh = [float(x) for x in tokens[0:3]]
            assert(h1 == h2)
            assert(dh == 0.0)
            kw['iono_height'] = h1 * 1000
            continue
        if 'LAT1 / LAT2 / DLAT' in line:
            lat_1, lat_2, lat_delta = [float(x) for x in tokens[0:3]]
            assert((lat_2 - lat_1) * lat_delta > 0)
            if (lat_1 > lat_2):
                flip_lat = True
                lat_1, lat_2 = lat_2, lat_1
                lat_delta = - lat_delta
            else:
                flip_lat = False
            kw['lat_1']     = np.deg2rad(lat_1)
            kw['lat_2']     = np.deg2rad(lat_2)
            kw['lat_delta'] = np.deg2rad(lat_delta)
            continue
        if 'LON1 / LON2 / DLON' in line:
            lng_1, lng_2, lng_delta = [float(x) for x in tokens[0:3]]
            assert((lng_2 - lng_1) * lng_delta > 0)
            if (lng_1 > lng_2):
                flip_lng = True
                lng_1, lng_2 = lng_2, lng_1
                lng_delta = - lng_delta
            else:
                flip_lng = False
            kw['lng_1']     = np.deg2rad(lng_1)
            kw['lng_2']     = np.deg2rad(lng_2)
            kw['lng_delta'] = np.deg2rad(lng_delta)
            continue
        if 'MAPPING FUNCTION' in line:
            assert(tokens[0] == 'COSZ')
            continue
        if 'BASE RADIUS' in line:
            kw['base_radius'] = 1000 * float(tokens[0])
            continue
        if 'EXPONENT' in line:
            TEC_coeff = 10**float(tokens[0])
            continue
        if 'MAP DIMENSION' in line:
            assert(int(tokens[0]) == 2)
            continue
        if 'END OF HEADER' in line:
            line_count = index + 1
            break
    #==============================
    # read data
    #==============================
    N_lat  = 1 + int((kw['lat_2'] - kw['lat_1']) / kw['lat_delta'])
    N_lng  = 1 + int((kw['lng_2'] - kw['lng_1']) / kw['lng_delta'])
    N_time = 1 + (kw['time_2'] - kw['time_1']) // kw['time_delta']
    iono_map = np.zeros((N_time, N_lat, N_lng), dtype=np.float64)

    data_per_line = 16
    lines_per_data = (N_lng + data_per_line - 1) // data_per_line
    
    for time_count in range(N_time):
        assert('START OF TEC MAP' in lines[line_count])
        assert(int(lines[line_count].strip().split()[0]) == time_count + 1)
        line_count += 1
        assert('EPOCH OF CURRENT MAP' in lines[line_count])
        line_count += 1
        for lat_count in range(N_lat):
            assert('LAT/LON1/LON2/DLON/H' in lines[line_count])
            line_count += 1
            values = []
            for i in range(lines_per_data):
                values.extend([int(x) for x in lines[line_count+i].strip().split()])
            if 9999 in values:
                print('Warning: There is non-available TEC values.')
            iono_map[time_count, lat_count, :] = np.array(values).astype(float)
            line_count += lines_per_data
        assert('END OF TEC MAP' in lines[line_count])
        assert(int(lines[line_count].strip().split()[0]) == time_count + 1)
        line_count += 1

    if flip_lat:
        iono_map = np.flip(iono_map, axis=1)
    if flip_lng:
        iono_map = np.flip(iono_map, axis=2)
    iono_map = iono_map * TEC_coeff
    kw['iono_map']  = iono_map
    kw['lat_range'] = np.linspace(kw['lat_1'], kw['lat_2'], N_lat)
    kw['lng_range'] = np.linspace(kw['lng_1'], kw['lng_2'], N_lng)
    return IONEX(**kw)

In [None]:
%%writefile transform.py
import numpy as np
from dataclasses import dataclass

import constants as C

@dataclass
class ECEF:
    x: np.array
    y: np.array
    z: np.array

    def to_numpy(self):
        return np.stack([self.x, self.y, self.z], axis=0)

    @staticmethod
    def from_numpy(pos):
        x, y, z = [np.squeeze(w) for w in np.split(pos, 3, axis=-1)]
        return ECEF(x=x, y=y, z=z)

@dataclass
class BLH:
    lat : np.array
    lng : np.array
    hgt : np.array

@dataclass
class ENU:
    east  : np.array
    north : np.array
    up    : np.array

@dataclass
class AZEL:
    elevation : np.array
    azimuth   : np.array
    zenith    : np.array

def BLH_to_ECEF(blh):
    a  = C.WGS84_SEMI_MAJOR_AXIS
    e2 = C.WGS84_SQUARED_FIRST_ECCENTRICITY
    sin_B = np.sin(blh.lat)
    cos_B = np.cos(blh.lat)
    sin_L = np.sin(blh.lng)
    cos_L = np.cos(blh.lng)
    n = a / np.sqrt(1 - e2*sin_B**2)
    x = (n + blh.hgt) * cos_B * cos_L
    y = (n + blh.hgt) * cos_B * sin_L
    z = ((1 - e2) * n + blh.hgt) * sin_B
    return ECEF(x=x, y=y, z=z)

def ECEF_to_BLH_approximate(ecef):
    a = C.WGS84_SEMI_MAJOR_AXIS
    b = C.WGS84_SEMI_MINOR_AXIS
    e2  = C.WGS84_SQUARED_FIRST_ECCENTRICITY
    e2_ = C.WGS84_SQUARED_SECOND_ECCENTRICITY
    x = ecef.x
    y = ecef.y
    z = ecef.z
    r = np.sqrt(x**2 + y**2)
    t = np.arctan2(z * (a/b), r)
    B = np.arctan2(z + (e2_*b)*np.sin(t)**3, r - (e2*a)*np.cos(t)**3)
    L = np.arctan2(y, x)
    n = a / np.sqrt(1 - e2*np.sin(B)**2)
    H = (r / np.cos(B)) - n
    return BLH(lat=B, lng=L, hgt=H)

ECEF_to_BLH = ECEF_to_BLH_approximate

def ECEF_to_ENU(pos, base):
    dx = pos.x - base.x
    dy = pos.y - base.y
    dz = pos.z - base.z
    base_blh = ECEF_to_BLH(base)
    sin_B = np.sin(base_blh.lat)
    cos_B = np.cos(base_blh.lat)
    sin_L = np.sin(base_blh.lng)
    cos_L = np.cos(base_blh.lng)
    e = -sin_L*dx + cos_L*dy
    n = -sin_B*cos_L*dx - sin_B*sin_L*dy + cos_B*dz
    u =  cos_B*cos_L*dx + cos_B*sin_L*dy + sin_B*dz
    return ENU(east=e, north=n, up=u)

def ENU_to_AZEL(enu):
    e = enu.east
    n = enu.north
    u = enu.up
    elevation = np.arctan2(u, np.sqrt(e**2 + n**2))
    azimuth   = np.arctan2(e, n)
    zenith    = (0.5 * np.pi) - elevation
    return AZEL(elevation=elevation,
                azimuth=azimuth,
                zenith=zenith)

def ECEF_to_AZEL(pos, base):
    return ENU_to_AZEL(ECEF_to_ENU(pos, base))

def haversine_distance(blh_1, blh_2):
    dlat = blh_2.lat - blh_1.lat
    dlng = blh_2.lng - blh_1.lng
    a = np.sin(dlat/2)**2 + np.cos(blh_1.lat) * np.cos(blh_2.lat) * np.sin(dlng/2)**2
    dist = 2 * C.HAVERSINE_RADIUS * np.arcsin(np.sqrt(a))
    return dist

def hubenys_distance(blh_1, blh_2):
    Rx = C.WGS84_SEMI_MAJOR_AXIS
    Ry = C.WGS84_SEMI_MINOR_AXIS
    E2 = C.WGS84_SQUARED_FIRST_ECCENTRICITY
    num_M = Rx * (1 - E2)
    Dy = blh_1.lat - blh_2.lat
    Dx = blh_1.lng - blh_2.lng
    P  = 0.5 * (blh_1.lat + blh_2.lat)
    W  = np.sqrt(1 - E2 * np.sin(P)**2)
    M  = num_M / W**3
    N  = Rx / W
    d2 = (Dy * M)**2 + (Dx * N * np.cos(P))**2
    d  = np.sqrt(d2)
    return d

def jacobian_BLH_to_ECEF(blh):
    a  = C.WGS84_SEMI_MAJOR_AXIS
    e2 = C.WGS84_SQUARED_FIRST_ECCENTRICITY
    B = blh.lat
    L = blh.lng
    H = blh.hgt
    cos_B = np.cos(B)
    sin_B = np.sin(B)
    cos_L = np.cos(L)
    sin_L = np.sin(L)
    N = a / np.sqrt(1 - e2*sin_B**2)
    dNdB = a * e2 * sin_B * cos_B * (1 - e2*sin_B**2)**(-3/2)
    N_plus_H = N + H
    cos_B_cos_L = cos_B * cos_L
    cos_B_sin_L = cos_B * sin_L
    sin_B_cos_L = sin_B * cos_L
    sin_B_sin_L = sin_B * sin_L

    dXdB = dNdB*cos_B_cos_L - N_plus_H*sin_B_cos_L
    dYdB = dNdB*cos_B_sin_L - N_plus_H*sin_B_sin_L
    dZdB = (1-e2)*dNdB*sin_B + (1-e2)*N_plus_H*cos_B

    dXdL = - N_plus_H * cos_B_sin_L
    dYdL =   N_plus_H * cos_B_cos_L
    dZdL = np.zeros_like(dXdL)

    dXdH = cos_B_cos_L
    dYdH = cos_B_sin_L
    dZdH = sin_B

    J = np.stack([[dXdB, dXdL, dXdH],
                  [dYdB, dYdL, dYdH],
                  [dZdB, dZdL, dZdH]], axis=0)
    axes = list(range(2, J.ndim)) + [0, 1]
    J = np.transpose(J, axes)
    return J

def jacobian_ECEF_to_ENU(blh):
    B = blh.lat
    L = blh.lng
    cos_B = np.cos(B)
    sin_B = np.sin(B)
    cos_L = np.cos(L)
    sin_L = np.sin(L)
    
    dEdX = -sin_L
    dEdY =  cos_L
    dEdZ = np.zeros_like(dEdX)
    
    dNdX = -sin_B*cos_L
    dNdY = -sin_B*sin_L
    dNdZ =  cos_B

    dUdX = cos_B*cos_L
    dUdY = cos_B*sin_L
    dUdZ = sin_B

    J = np.stack([[dEdX, dEdY, dEdZ],
                  [dNdX, dNdY, dNdZ],
                  [dUdX, dUdY, dUdZ]], axis=0)
    axes = list(range(2, J.ndim)) + [0, 1]
    J = np.transpose(J, axes)
    return J

def jacobian_BL_to_EN(BLH):
    J_ECEF_BLH = jacobian_BLH_to_ECEF(BLH)
    J_ENU_ECEF = jacobian_ECEF_to_ENU(BLH)
    J_EN_BL    = np.einsum('nij,njk->nik', J_ENU_ECEF[:, 0:2, :], J_ECEF_BLH[:, :, 0:2])
    return J_EN_BL

def pd_haversine_distance(df1, df2):
    blh1 = BLH(
        lat=np.deg2rad(df1['latDeg'].values),
        lng=np.deg2rad(df1['lngDeg'].values),
        hgt=0,
    )
    blh2 = BLH(
        lat=np.deg2rad(df2['latDeg'].values),
        lng=np.deg2rad(df2['lngDeg'].values),
        hgt=0,
    )
    return haversine_distance(blh1, blh2)

In [None]:
%%writefile design_filter.py
import numpy as np
import scipy.signal
from scipy.special import sinc

def make_sinc_filter(F_cutoff, dt, n_sinc=3, n_gauss=2):
    N_FLT = round(n_sinc / (2 * F_cutoff * dt))
    x = (n_sinc / (N_FLT + 1)) * (np.arange(2*N_FLT+1) - N_FLT)
    S = sinc(x)
    W = scipy.signal.windows.gaussian(2*N_FLT+1, std=N_FLT/n_gauss)
    flt = S * W
    flt = flt * (1 / np.sum(flt))
    return flt

def main():
    import scipy.signal
    import matplotlib.pyplot as plt

    dt = 2.5 * 1e-3
    F_cutoff = 2.0
    FLT = make_sinc_filter(F_cutoff=F_cutoff, dt=dt)

    f_nyquist = (1 / (2 * dt))
    print(f'dt = {1000*dt} [ms]')
    print(f'f_nyquist = {f_nyquist:.2f} [Hz]')
    
    num = FLT
    den = np.zeros_like(num)
    den[0] = 1
    sys = scipy.signal.TransferFunction(num, den, dt=dt)

    frange = np.logspace(-1, 2, 1000)
    wrange = (2 * np.pi * dt) * frange
    _, mag, phase = sys.bode(wrange)

    assert(len(num) % 2 == 1)
    N_FLT = (len(num) - 1) // 2
    it = np.linspace(-N_FLT, N_FLT, 2*N_FLT+1)
    print(f'N_FLT = {N_FLT}')

    plt.figure()
    plt.plot(it, num)

    plt.figure()
    plt.semilogx(frange, mag)
    plt.grid(True)
    plt.xlim([0.1, 100])
    plt.ylim([-80, 20])

    plt.show()
    return

In [None]:
%%writefile signal_f.py
from dataclasses import dataclass
import numpy as np
import pandas as pd
from scipy.interpolate import InterpolatedUnivariateSpline

import constants as C
import design_filter

@dataclass
class Trig:
    cos : np.array
    sin : np.array

    def __add__(self, other):
        cos = (self.cos * other.cos) - (self.sin * other.sin)
        sin = (self.sin * other.cos) + (self.cos * other.sin)
        return Trig(cos=cos, sin=sin)

    def __sub__(self, other):
        cos = (self.cos * other.cos) + (self.sin * other.sin)
        sin = (self.sin * other.cos) - (self.cos * other.sin)
        return Trig(cos=cos, sin=sin)

    def __neg__(self):
        return Trig(cos=self.cos, sin=-self.sin)

    def to_rad(self):
        return np.arctan2(self.sin, self.cos)

    @staticmethod
    def from_data(x, y):
        r = np.sqrt(x**2 + y**2)
        cos = x / r
        sin = y / r
        return Trig(cos=cos, sin=sin)

    @staticmethod
    def from_rad(th):
        cos = np.cos(th)
        sin = np.sin(th)
        return Trig(cos=cos, sin=sin)

def apply_filter_padding_edge(X, flt):
    assert(len(flt) % 2 == 1)
    n_pad = (len(flt) - 1) // 2
    X_pad = np.concatenate([np.full(n_pad, X[0]), X, np.full(n_pad, X[-1])], axis=0)
    return np.convolve(X_pad, flt, mode='valid')

def preprocess_sensor_data(sensor_dfs, t_ref, dt_up, dt_down, flt):
    assert(len(flt) % 2 == 1)
    acce_df = sensor_dfs['acce']
    gyro_df = sensor_dfs['gyro']
    magn_df = sensor_dfs['magn']
    acce_time = 1e-3 * (acce_df['millisSinceGpsEpoch'] - t_ref).values
    gyro_time = 1e-3 * (gyro_df['millisSinceGpsEpoch'] - t_ref).values
    magn_time = 1e-3 * (magn_df['millisSinceGpsEpoch'] - t_ref).values
    t_up_min  = np.ceil( np.max([acce_time[ 0], gyro_time[ 0], magn_time[ 0]]) / dt_up) * dt_up
    t_up_max  = np.floor(np.min([acce_time[-1], gyro_time[-1], magn_time[-1]]) / dt_up) * dt_up
    time_up   = np.arange(t_up_min, t_up_max + dt_up / 10, dt_up)
    
    acce_x_up = InterpolatedUnivariateSpline(acce_time, acce_df['UncalAccelXMps2'].values, k=1)(time_up)
    acce_y_up = InterpolatedUnivariateSpline(acce_time, acce_df['UncalAccelYMps2'].values, k=1)(time_up)
    acce_z_up = InterpolatedUnivariateSpline(acce_time, acce_df['UncalAccelZMps2'].values, k=1)(time_up)
    gyro_x_up = InterpolatedUnivariateSpline(gyro_time, gyro_df['UncalGyroXRadPerSec'].values, k=1)(time_up)
    gyro_y_up = InterpolatedUnivariateSpline(gyro_time, gyro_df['UncalGyroYRadPerSec'].values, k=1)(time_up)
    gyro_z_up = InterpolatedUnivariateSpline(gyro_time, gyro_df['UncalGyroZRadPerSec'].values, k=1)(time_up)
    magn_x_up = InterpolatedUnivariateSpline(magn_time, magn_df['UncalMagXMicroT'].values, k=1)(time_up)
    magn_y_up = InterpolatedUnivariateSpline(magn_time, magn_df['UncalMagYMicroT'].values, k=1)(time_up)
    magn_z_up = InterpolatedUnivariateSpline(magn_time, magn_df['UncalMagZMicroT'].values, k=1)(time_up)
    sensor_flt = np.stack([
        np.convolve(acce_x_up, flt, mode='valid'),
        np.convolve(acce_y_up, flt, mode='valid'),
        np.convolve(acce_z_up, flt, mode='valid'),
        np.convolve(gyro_x_up, flt, mode='valid'),
        np.convolve(gyro_y_up, flt, mode='valid'),
        np.convolve(gyro_z_up, flt, mode='valid'),
        np.convolve(magn_x_up, flt, mode='valid'),
        np.convolve(magn_y_up, flt, mode='valid'),
        np.convolve(magn_z_up, flt, mode='valid'),
    ], axis=1)
    n_flt = (len(flt) - 1) // 2
    time_flt   = time_up[n_flt:-n_flt]

    roundint = lambda x : int(np.round(x))
    
    dt_ratio    = roundint(dt_down / dt_up)
    t_down_min  = np.ceil( time_flt[ 0] / dt_down) * dt_down
    t_down_max  = np.floor(time_flt[-1] / dt_down) * dt_down
    N_down      = roundint((t_down_max - t_down_min) / dt_down) + 1
    idx_offset  = roundint((t_down_min - time_flt[ 0]) / dt_up)
    idx_down    = dt_ratio * np.arange(N_down) + idx_offset
    time_down   = time_flt[idx_down]
    sensor_down = sensor_flt[idx_down, :]

    columns = ['UncalAccelXMps2', 'UncalAccelYMps2', 'UncalAccelZMps2',
               'UncalGyroXRadPerSec', 'UncalGyroYRadPerSec', 'UncalGyroZRadPerSec',
               'UncalMagXMicroT', 'UncalMagYMicroT', 'UncalMagZMicroT']
    df = pd.DataFrame(sensor_down, columns=columns)
    df['Time'] = time_down
    return df

def calibrate_magn_offset(sensor_df):
    N = sensor_df.shape[0]
    x = sensor_df['UncalMagXMicroT']
    y = sensor_df['UncalMagYMicroT']
    z = sensor_df['UncalMagZMicroT']
    x2 = x**2
    y2 = y**2
    z2 = z**2
    r2 = x2 + y2 + z2

    a11 = 2 * np.sum(x2)
    a12 = 2 * np.sum(x * y)
    a13 = 2 * np.sum(x * z)
    a14 = 2 * np.sum(x)
    a21 = a12
    a22 = 2 * np.sum(y2)
    a23 = 2 * np.sum(y * z)
    a24 = 2 * np.sum(y)
    a31 = a13
    a32 = a23
    a33 = 2 * np.sum(z2)
    a34 = 2 * np.sum(z)
    a41 = a14
    a42 = a24
    a43 = a34
    a44 = 2 * N
    
    b1 = np.sum(x * r2)
    b2 = np.sum(y * r2)
    b3 = np.sum(z * r2)
    b4 = np.sum(r2)

    A = np.array([[a11, a12, a13, a14],
                  [a21, a22, a23, a24],
                  [a31, a32, a33, a34],
                  [a41, a42, a43, a44]])
    b = np.array([b1, b2, b3, b4])
    mx0, my0, mz0, d = np.linalg.solve(A, b)
    mr = np.sqrt(mx0**2 + my0**2 + mz0**2 + 2*d)
    return mx0, my0, mz0, mr

def remove_gyro_drift(omega, threshold):
    mask  = (np.abs(omega) < threshold).astype(float)
    drift = np.sum(omega * mask) / np.sum(mask)
    return omega - drift

def trapezoidal_integration(V, dt):
    dX = (0.5 * dt) * (V[0:-1] + V[1:])
    return np.concatenate([[0], np.cumsum(dX)], axis=0)

def central_difference(X, dt):
    V = (1 / (2*dt)) * (X[2:] - X[0:-2])
    return np.concatenate([[0], V, [0]], axis=0)

def add_calibrated_signals(sensor_df, dt_down):
    mx0, my0, mz0, mr = calibrate_magn_offset(sensor_df)
    MX = sensor_df['UncalMagXMicroT'].values - mx0
    MZ = sensor_df['UncalMagZMicroT'].values - mz0
    trig_th_hat = Trig.from_data(-MX, -MZ)

    omega = sensor_df['UncalGyroYRadPerSec'].values
    omega = remove_gyro_drift(omega, 0.02)
    omega = remove_gyro_drift(omega, 0.01)
    integ_omega = trapezoidal_integration(omega, dt_down)
    trig_integ_omega = Trig.from_rad(integ_omega)

    trig_th_drift      = trig_th_hat - trig_integ_omega
    trig_th_drift_mean = Trig.from_data(np.mean(trig_th_drift.cos), np.mean(trig_th_drift.sin))
    trig_th_residual   = trig_th_drift - trig_th_drift_mean
    th_drift_mean      = trig_th_drift_mean.to_rad()
    FLT = design_filter.make_sinc_filter(F_cutoff=1/15.0, dt=dt_down)
    th_residual = apply_filter_padding_edge(trig_th_residual.to_rad(), FLT)
    th = integ_omega + th_residual + (th_drift_mean - C.MAGNETIC_DECLINATION)
    sensor_df['omega']  = omega
    sensor_df['theta']  = th
    sensor_df['cos_th'] = np.cos(th)
    sensor_df['sin_th'] = np.sin(th)
    sensor_df['dotV']   = - (sensor_df[f'UncalAccelZMps2'] - sensor_df[f'UncalAccelZMps2'].mean())
    return sensor_df

def check_sensor_availability(sensor_dfs):
    if sensor_dfs['acce'] is None:
        return False
    if sensor_dfs['gyro'] is None:
        return False
    if sensor_dfs['magn'] is None:
        return False
    acce_df = sensor_dfs['acce']
    gyro_df = sensor_dfs['gyro']
    magn_df = sensor_dfs['magn']
    if acce_df.shape[0] == 0:
        return False
    if gyro_df.shape[0] == 0:
        return False
    if magn_df.shape[0] == 0:
        return False
    XYZ = ['X', 'Y', 'Z']
    acce = acce_df[[f'UncalAccel{axis}Mps2'     for axis in XYZ]].values
    gyro = gyro_df[[f'UncalGyro{axis}RadPerSec' for axis in XYZ]].values
    magn = magn_df[[f'UncalMag{axis}MicroT'     for axis in XYZ]].values
    if np.sum(np.abs(np.diff(acce, axis=0))) == 0:
        return False
    if np.sum(np.abs(np.diff(acce, axis=0))) == 0:
        return False
    if np.sum(np.abs(np.diff(acce, axis=0))) == 0:
        return False
    acce_dt = 1e-3 * np.diff(acce_df['millisSinceGpsEpoch'].values)
    gyro_dt = 1e-3 * np.diff(acce_df['millisSinceGpsEpoch'].values)
    magn_dt = 1e-3 * np.diff(acce_df['millisSinceGpsEpoch'].values)
    if np.std(acce_dt) > 1e-3:
        return False
    if np.std(gyro_dt) > 1e-3:
        return False
    if np.std(magn_dt) > 1e-3:
        return False
    acce_mean     = np.mean(acce, axis=0)
    expected_acce = np.array([0, 9.8, 0])
    cos_angle_num = np.dot(acce_mean, expected_acce)
    cos_angle_den = np.linalg.norm(acce_mean) * np.linalg.norm(expected_acce)
    angle = np.arccos(cos_angle_num / cos_angle_den)
    if angle > np.deg2rad(20):
        return False
    return True

def remove_different_posture(sensor_df):
    ay = sensor_df['UncalAccelYMps2'] # gravitational acceleration
    valid_orig = np.abs(ay - np.mean(ay)) < 5
    valid = valid_orig
    # Delete the previous and next 5 seconds also.
    for i in range(1, 6):
        valid_shift_plus  = np.concatenate([np.full(i, True), valid_orig[0:-i]], axis=0)
        valid_shift_minus = np.concatenate([valid_orig[i:], np.full(i, True)], axis=0)
        valid = valid & valid_shift_plus & valid_shift_minus
    sensor_df = sensor_df[valid].copy()
    return sensor_df

In [None]:
%%writefile qpsolver.py
import numpy as np
import scipy.sparse
import scipy.sparse.linalg

def solve_qp(R, Q, q, A, B):
    """
    minimize (1/2) u^T R u + (1/2)x^T Q x - q^T x
    subject to Ax + Bu = 0
    """
    BRB = B @ scipy.sparse.linalg.spsolve(R, B.T)
    A_sys  = scipy.sparse.bmat([[Q, A.T], [A, -BRB]], format='csc')
    b_sys  = np.concatenate([q, np.zeros(A.shape[0])], axis=0)
    x_sys  = scipy.sparse.linalg.spsolve(A_sys, b_sys)
    X_star = x_sys[0:A.shape[1]]
    return X_star

def make_newton_solver(Q, A, BRB, G, w):
    N_x = A.shape[1]
    N_z = w.shape[0]
    w2inv = w**(-2)
    W2inv = scipy.sparse.spdiags(w2inv, [0], N_z, N_z)
    S     = Q + (G.T @ W2inv @ G)
    sys_A = scipy.sparse.bmat([[S, A.T], [A, -BRB]], format='csc')
    sys_B = scipy.sparse.linalg.splu(sys_A)
    def solver(rx, ry, rz, rs):
        rz2 = rz - (w * rs)
        rx2 = rx + G.T @ (w2inv * rz2)
        sys_x = sys_B.solve(np.concatenate([rx2, ry]))
        dx = sys_x[0:N_x]
        dy = sys_x[N_x:]
        dz = w2inv * (G @ dx - rz2)
        ds = w * (rs - w * dz)
        return dx, dy, dz, ds
    return solver

def calc_alpha(z, s, dz, ds):
    x  = np.concatenate([z, s], axis=0)
    dx = np.concatenate([dz, ds], axis=0)
    cond = (dx < 0)
    if cond.shape[0] == 0:
        return 1.0
    else:
        return min(1.0, 0.95 * np.min(- x[cond] / dx[cond]))

def solve_qp_with_inequality(R, Q, q, A, B, G, h):
    """
    minimize (1/2) u^T R u + (1/2)x^T Q x - q^T x
    subject to Ax + Bu = 0
               Gx + s  = h, s >= 0
    """
    N_x = A.shape[1]
    N_y = A.shape[0]
    N_z = G.shape[0]
    N_s = N_z

    x = np.zeros((N_x, ))
    y = np.zeros((N_y, ))
    z = np.ones((N_z, ))
    s = np.ones((N_z, ))
    
    X_ZERO = np.zeros_like(x)
    Y_ZERO = np.zeros_like(y)
    Z_ZERO = np.zeros_like(z)
    BRB = B @ scipy.sparse.linalg.spsolve(R, B.T)

    success  = False
    LOOP_MAX = 20
    for loop in range(LOOP_MAX):
        rx  = - ( (Q @ x) - q + (A.T @ y) + (G.T @ z) )
        ry  = - ( (A @ x) - (BRB @ y) )
        rz  = - ( (G @ x) + s - h )
        gap = np.dot(z, s)
        terminal_cond = ( (np.sqrt(np.mean(rx**2)) < 1e-10) and
                          (np.sqrt(np.mean(ry**2)) < 1e-10) and
                          (np.sqrt(np.mean(rz**2)) < 1e-10) and
                          (gap / N_z < 1e-10) )
        if terminal_cond:
            success = True
            break

        sqrt_s = np.sqrt(s)
        sqrt_z = np.sqrt(z)
        w = sqrt_s / sqrt_z
        lambda_ = sqrt_s * sqrt_z
        newton_solver = make_newton_solver(Q, A, BRB, G, w)

        dx1, dy1, dz1, ds1 = newton_solver(rx, ry, rz, rs=-lambda_)
        alpha = calc_alpha(z, s, dz1, ds1)
        mu_prev   = gap / N_z
        mu_hat    = np.dot(z + alpha*dz1, s + alpha*ds1) / N_z
        mu_target = mu_prev * (mu_hat / mu_prev)**3

        rs = (mu_target - (dz1 * ds1)) / lambda_
        dx2, dy2, dz2, ds2 = newton_solver(X_ZERO, Y_ZERO, Z_ZERO, rs)

        dx = dx1 + dx2
        dy = dy1 + dy2
        dz = dz1 + dz2
        ds = ds1 + ds2
        alpha = calc_alpha(z, s, dz, ds)
        x += alpha * dx
        y += alpha * dy
        z += alpha * dz
        s += alpha * ds

    if not success:
        raise RuntimeError('Not solved')
    X_star = x
    # print(np.max(G @ X_star))
    return X_star

In [None]:
%%writefile map_matching.py
import glob
import numpy as np
import pandas as pd

import transform

INPUT_PATH = '../input/google-smartphone-decimeter-challenge'

COLLECTION_LIST_ALL = np.array(sorted(path.split('/')[-1] for path in glob.glob(f'{INPUT_PATH}/train/*')))
COLLECTION_LIST_DOWNTOWN = COLLECTION_LIST_ALL[np.array([24,27,29]) - 1]

def get_database_vectors():
    gt_df_list = []
    for collection in COLLECTION_LIST_DOWNTOWN:
        phone_list = [path.split('/')[-1] for path in glob.glob(f'{INPUT_PATH}/train/{collection}/*')]
        for phone in phone_list:
            gt_df = pd.read_csv(f'{INPUT_PATH}/train/{collection}/{phone}/ground_truth.csv')
            gt_df_list.append(gt_df)
    gt_df  = pd.concat(gt_df_list, axis=0)
    gt_df  = gt_df[['latDeg', 'lngDeg']].drop_duplicates(ignore_index=True)
    gt_blh = transform.BLH(
        lat=np.deg2rad(gt_df['latDeg']),
        lng=np.deg2rad(gt_df['lngDeg']),
        hgt=np.zeros(gt_df.shape[0]),
    )
    P  = transform.BLH_to_ECEF(gt_blh).to_numpy() # shape = (3, N_P)
    PP = np.sum(P**2, axis=0) # shape = (N_P, )
    return gt_df, P, PP

gt_df, P, PP = get_database_vectors()

def snap_to_nearest_neighbor(pred_df):
    pred_blh = transform.BLH(
        lat=np.deg2rad(pred_df['latDeg']),
        lng=np.deg2rad(pred_df['lngDeg']),
        hgt=np.zeros(pred_df.shape[0]),
    )
    Q = transform.BLH_to_ECEF(pred_blh).to_numpy() # shape=(3, N_Q)

    QQ  = np.sum(Q**2, axis=0) # shape=(N_Q, )
    PQ  = P.T @ Q              # shape=(N_P, N_Q)
    d2  = (QQ - 2 * PQ).T + PP # shape=(N_Q, N_P)
    idx = np.argmin(d2, axis=1)

    nn_df = gt_df.iloc[idx, :].reset_index(drop=True)
    pp_df = pred_df.copy()
    pp_df['latDeg'] = nn_df['latDeg']
    pp_df['lngDeg'] = nn_df['lngDeg']
    return pp_df

def distance_to_nearest_neighbor(pred_df):
    pred_blh = transform.BLH(
        lat=np.deg2rad(pred_df['latDeg']),
        lng=np.deg2rad(pred_df['lngDeg']),
        hgt=np.zeros(pred_df.shape[0]),
    )
    Q = transform.BLH_to_ECEF(pred_blh).to_numpy() # shape=(3, N_Q)
    N_Q = Q.shape[1]

    QQ  = np.sum(Q**2, axis=0) # shape=(N_Q, )
    PQ  = P.T @ Q              # shape=(N_P, N_Q)
    d2  = (QQ - 2 * PQ).T + PP # shape=(N_Q, N_P)
    idx = np.argmin(d2, axis=1)
    d   = np.zeros(N_Q)
    for i, j in zip(range(N_Q), idx):
        d[i] = np.sqrt(np.maximum(0, d2[i, j]))
    return d

In [None]:
%%writefile area_prediction.py
import numpy as np
import pandas as pd
from pathlib import Path
from glob import glob
from sklearn.neighbors import KNeighborsClassifier

BASE_DIR = Path('../input/google-smartphone-decimeter-challenge')

train_base = pd.read_csv(BASE_DIR / 'baseline_locations_train.csv')
train_base = train_base.sort_values([
    "collectionName", "phoneName", "millisSinceGpsEpoch"
]).reset_index(drop=True)

train_base['area'] = train_base['collectionName'].map(lambda x: x.split('-')[4])

train_name = np.array(sorted(path.split('/')[-1] for path in glob(f'{BASE_DIR}/train/*')))
train_highway  = train_name[np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21]) - 1]
train_tree     = train_name[np.array([22,23,25,26,28]) - 1]
train_downtown = train_name[np.array([24,27,29]) - 1]

train_base['area_target'] = -1
train_base.loc[train_base['collectionName'].isin(train_highway),  'area_target'] = 0
train_base.loc[train_base['collectionName'].isin(train_tree),     'area_target'] = 1
train_base.loc[train_base['collectionName'].isin(train_downtown), 'area_target'] = 2

def processing_downtown(input_df: pd.DataFrame, is_train=False):
    output_df = input_df.groupby('collectionName')[['latDeg', 'lngDeg']].std()
    if is_train:
        output_df = output_df.merge(
            input_df.groupby('collectionName')[['area_target']].first(),
            on='collectionName')
    output_df = output_df.merge(
        input_df.groupby('collectionName')['area'].first(),
        on='collectionName')
    output_df = output_df.merge(
        input_df.groupby('collectionName')['phoneName'].unique().apply(list),
        on='collectionName')
    return output_df

train = processing_downtown(train_base, is_train=True)
train['downtown_target'] = (train['area_target']==2).astype(int)

downtown_model_knn = KNeighborsClassifier(n_neighbors=1)
downtown_model_knn.fit(
    train[['latDeg', 'lngDeg']],
    train['downtown_target'],
)

def processing_highway_tree(input_df: pd.DataFrame, is_train=False):
    output_df = input_df.groupby('collectionName')[['latDeg', 'lngDeg']].min()
    if is_train:
        output_df = output_df.merge(
            input_df.groupby('collectionName')[['area_target']].first(),
            on='collectionName')
    output_df = output_df.merge(
        input_df.groupby('collectionName')['area'].first(),
        on='collectionName')
    output_df = output_df.merge(
        input_df.groupby('collectionName')['phoneName'].unique().apply(list),
        on='collectionName')
    return output_df

train = processing_highway_tree(train_base, is_train=True)

highway_tree_model_knn = KNeighborsClassifier(n_neighbors=1)
highway_tree_model_knn.fit(
    train.loc[train['area_target']!=2, ['latDeg', 'lngDeg']],
    train.loc[train['area_target']!=2, 'area_target'],
)

def predict_area(test_base):
    test_base = test_base.copy()
    test_base = test_base.sort_values([
        "collectionName", "phoneName", "millisSinceGpsEpoch"
    ]).reset_index(drop=True)
    test_base['area'] = test_base['collectionName'].map(lambda x: x.split('-')[4])

    test = processing_downtown(test_base)
    downtown_pred = downtown_model_knn.predict(test[['latDeg', 'lngDeg']])

    test = processing_highway_tree(test_base)
    test.loc[downtown_pred==1, 'area_pred'] = 2
    pred = highway_tree_model_knn.predict(test.loc[test['area_pred'].isnull(), ['latDeg', 'lngDeg']])
    test.loc[test['area_pred'].isnull(), 'area_pred'] = pred
    test['area_pred'] = test['area_pred'].astype(int)
    test['collectionName'] = test.index

    test_highway  = []
    test_tree     = []
    test_downtown = []
    for collection, area_pred in test[['collectionName', 'area_pred']].itertuples(index=False):
        if area_pred == 0:
            test_highway.append(collection)
        elif area_pred == 1:
            test_tree.append(collection)
        else:
            test_downtown.append(collection)
    return (test_highway, test_tree, test_downtown)

## main

In [None]:
import numpy as np
import pandas as pd
import scipy.sparse
import scipy.sparse.linalg
import multiprocessing
import glob
from tqdm.notebook import tqdm

import io_f
import signal_f
import design_filter
import transform
import qpsolver
import map_matching
import area_prediction

INPUT_PATH    = '../input/google-smartphone-decimeter-challenge'
VELOCITY_PATH = '../input/gsdc-vehicle-speed-estimation-result/_doppler_velocity'
DT_X = 1.0

VELOCITY_DF = pd.concat([pd.read_csv(f'{VELOCITY_PATH}/doppler_velocity_train.csv'),
                         pd.read_csv(f'{VELOCITY_PATH}/doppler_velocity_test.csv'),
                         ], axis=0)

def get_optimization_constants(base_df, velocity_df, sensor_df, params, use_sensor):
    const = dict()
    dt    = DT_X
    TIME_y = base_df['Time'].values
    TIME_d = velocity_df['Time'].values
    N_y = TIME_y.shape[0]
    N_d = TIME_d.shape[0]
    N_x = int(np.ceil(np.max(TIME_y) / dt) + 1)
    const['N_y'] = N_y
    const['N_d'] = N_d
    const['N_x'] = N_x

    a = np.array([[1, dt, (1/2)*dt**2],
                  [0,  1,  dt],
                  [0,  0,  1]])
    e3 = scipy.sparse.eye(3)
    A = np.empty(shape=(2*(N_x-1), 2*N_x), dtype=np.object)
    for i_x in range(N_x-1):
        A[2*i_x  , 2*i_x  ] = a
        A[2*i_x+1, 2*i_x+1] = a
        A[2*i_x  , 2*i_x+2] = -e3
        A[2*i_x+1, 2*i_x+3] = -e3
    const['A'] = scipy.sparse.bmat(A, format='csr')
    
    b = np.array([[(1/6)*dt**3,
                   (1/2)*dt**2,
                   dt]]).T
    const['B'] = scipy.sparse.block_diag([b for _ in range(2*(N_x-1))], format='csr')

    diag_R  = np.full(2*N_x - 2, params['sigma_u']**(-2) * dt)
    const['R'] = scipy.sparse.spdiags(diag_R, [0], 2*N_x - 2, 2*N_x - 2, format='csc')
    
    x_index  = np.floor(TIME_y / dt).astype(int)
    alpha    = (TIME_y / dt) - x_index
    coeff_y0 = 1 - 3*alpha**2 + 2*alpha**3
    coeff_y1 =     3*alpha**2 - 2*alpha**3
    coeff_v0 = dt * alpha * (alpha - 1)**2
    coeff_v1 = dt * alpha**2 * (alpha - 1)
    C = np.empty(shape=(2*N_y, 2*N_x), dtype=np.object)
    for i_x in range(N_x):
        C[0, 2*i_x  ] = scipy.sparse.coo_matrix((1, 3))
        C[0, 2*i_x+1] = scipy.sparse.coo_matrix((1, 3))
    for i_y in range(N_y):
        i_x = x_index[i_y]
        c_i = np.array([[coeff_y0[i_y], coeff_v0[i_y], 0]])
        C[2*i_y,   2*i_x]   = c_i
        C[2*i_y+1, 2*i_x+1] = c_i
        if i_x < N_x - 1:
            c_iplus = np.array([[coeff_y1[i_y], coeff_v1[i_y], 0]])
            C[2*i_y,   2*i_x+2] = c_iplus
            C[2*i_y+1, 2*i_x+3] = c_iplus
    const['Cp_orig']  = scipy.sparse.bmat(C, format='csr')

    diag_Lp = np.full(2*N_y, params['sigma_p']**(-2))
    const['Lp_orig'] = scipy.sparse.spdiags(diag_Lp, [0], 2*N_y, 2*N_y, format='csr')
    const['Yp_orig'] = base_df[['latDeg', 'lngDeg']].values.flatten()

    BLH = transform.BLH(
        lat=np.deg2rad(base_df['latDeg'].values),
        lng=np.deg2rad(base_df['lngDeg'].values),
        hgt=np.zeros(N_y),
    )
    DEG2RAD = np.pi / 180.0
    J = transform.jacobian_BL_to_EN(BLH) * DEG2RAD
    J = np.mean(J, axis=0)
    J[0, 0] = 0
    J[1, 1] = 0
    JJ = scipy.sparse.block_diag([J, J], format='csr')
    const['J'] = J

    # ドップラ速度に関するパラメータ
    x_index  = np.floor(TIME_d / dt).astype(int)
    alpha    = (TIME_d / dt) - x_index
    coeff_y0 = 1 - 3*alpha**2 + 2*alpha**3
    coeff_y1 =     3*alpha**2 - 2*alpha**3
    coeff_v0 = dt * alpha * (alpha - 1)**2
    coeff_v1 = dt * alpha**2 * (alpha - 1)
    C = np.empty(shape=(N_d, N_x), dtype=np.object)
    for i_x in range(N_x):
        C[0, i_x] = scipy.sparse.coo_matrix((2, 6))
    for i_d in range(N_d):
        i_x = x_index[i_d]
        c = np.array([[0, coeff_y0[i_d], coeff_v0[i_d], 0, 0, 0],
                      [0, 0, 0, 0, coeff_y0[i_d], coeff_v0[i_d]]])
        C[i_d, i_x] = J @ c
        if i_x < N_x - 1:
            c = np.array([[0, coeff_y1[i_d], coeff_v1[i_d], 0, 0, 0],
                          [0, 0, 0, 0, coeff_y1[i_d], coeff_v1[i_d]]])
            C[i_d, i_x+1] = J @ c
    const['Cd_orig']  = scipy.sparse.bmat(C, format='csr')

    diag_Ld = np.full(2*N_d, params['sigma_d']**(-2))
    const['Ld_orig'] = scipy.sparse.spdiags(diag_Ld, [0], 2*N_d, 2*N_d, format='csr')
    const['Yd_orig'] = velocity_df[['v_east', 'v_north']].values.flatten()

    if sensor_df is None:
        const['use_sensor'] = False
        const['use_inquality'] = False
        return const

    TIME_s = sensor_df['Time'].values
    N_s = TIME_s.shape[0]
    const['N_s'] = N_s
    const['use_sensor'] = use_sensor
    const['use_inquality'] = (use_sensor and params['use_not_go_back_constraint'])
    x_index = np.round(TIME_s / dt).astype(int)
    const['x_index_sensor'] = x_index
    if not use_sensor:
        return const

    # 速度制約・速度コストに関するパラメータ
    COS_TH = sensor_df['cos_th'].values
    SIN_TH = sensor_df['sin_th'].values
    CV = np.empty(shape=(N_s, N_x), dtype=np.object)
    GV = np.empty(shape=(N_s, N_x), dtype=np.object)
    cv = np.array([[0, 1, 0, 0, 0, 0],
                   [0, 0, 0, 0, 1, 0]], dtype=np.float64)
    for i_x in range(N_x):
        CV[0, i_x] = scipy.sparse.coo_matrix((1, 6))
        GV[0, i_x] = scipy.sparse.coo_matrix((1, 6))
    for i_s in range(N_s):
        i_x = x_index[i_s]
        k = np.array([[SIN_TH[i_s], -COS_TH[i_s]]])
        CV[i_s, i_x] = k @ J @ cv
        k = np.array([[-COS_TH[i_s], -SIN_TH[i_s]]])
        GV[i_s, i_x] = k @ J @ cv
    const['Cv'] = scipy.sparse.bmat(CV, format='csr')
    const['Gv'] = scipy.sparse.bmat(GV, format='csr')
    const['hv'] = np.full((N_s, ), -params['vmin'])

    diag_Lv = np.full(N_s, params['sigma_v']**(-2))
    const['Lv'] = scipy.sparse.spdiags(diag_Lv, [0], N_s, N_s, format='csr')

    # 加速度コストに関するパラメータ
    DOT_V_COS_TH = sensor_df['dotV'] * sensor_df['cos_th'].values
    DOT_V_SIN_TH = sensor_df['dotV'] * sensor_df['sin_th'].values
    OMEGA = sensor_df['omega'].values
    CA = np.empty(shape=(N_s, N_x), dtype=np.object)
    ca = np.array([[0, 1, 0, 0, 0, 0],
                   [0, 0, 0, 0, 1, 0],
                   [0, 0, 1, 0, 0, 0],
                   [0, 0, 0, 0, 0, 1]], dtype=np.float64)
    for i_x in range(N_x):
        CA[0, i_x] = scipy.sparse.coo_matrix((2, 6))
    for i_s in range(N_s):
        i_x = x_index[i_s]
        k = np.array([[0,  OMEGA[i_s], 1, 0],
                      [-OMEGA[i_s], 0, 0, 1]])
        CA[i_s, i_x] = k @ JJ @ ca
    const['Ca'] = scipy.sparse.bmat(CA, format='csr')
    const['Ya'] = np.stack([DOT_V_COS_TH, DOT_V_SIN_TH], axis=1).flatten()

    diag_La = np.full(2*N_s, params['sigma_a']**(-2))
    const['La'] = scipy.sparse.spdiags(diag_La, [0], 2*N_s, 2*N_s, format='csr')

    return const

def solve_QP(const, p_valid, d_valid):
    A = const['A']
    B = const['B']
    R = const['R']
    Cp_orig = const['Cp_orig']
    Lp_orig = const['Lp_orig']
    Yp_orig = const['Yp_orig']
    Cd_orig = const['Cd_orig']
    Ld_orig = const['Ld_orig']
    Yd_orig = const['Yd_orig']
    
    p_valid2 = np.stack([p_valid, p_valid], axis=1).flatten()
    Cp = Cp_orig[p_valid2, :]
    Lp = Lp_orig[np.ix_(p_valid2, p_valid2)]
    Yp = Yp_orig[p_valid2]

    d_valid2 = np.stack([d_valid, d_valid], axis=1).flatten()
    Cd = Cd_orig[d_valid2, :]
    Ld = Ld_orig[np.ix_(d_valid2, d_valid2)]
    Yd = Yd_orig[d_valid2]

    CLC_p = Cp.T @ (Lp @ Cp)
    CLC_d = Cd.T @ (Ld @ Cd)
    
    CLY_p = Cp.T @ (Lp @ Yp)
    CLY_d = Cd.T @ (Ld @ Yd)
    
    if const['use_sensor']:
        Cv = const['Cv']
        Lv = const['Lv']
        Ca = const['Ca']
        La = const['La']
        Ya = const['Ya']
        CLC_v = Cv.T @ (Lv @ Cv)
        CLC_a = Ca.T @ (La @ Ca)
        Q     = CLC_p + CLC_d + CLC_v + CLC_a

        CLY_a = Ca.T @ (La @ Ya)
        q     = CLY_p + CLY_d + CLY_a
    else:
        Q = CLC_p + CLC_d
        q = CLY_p + CLY_d

    if const['use_inquality']:
        G = const['Gv']
        h = const['hv']
        X_star = qpsolver.solve_qp_with_inequality(R=R, Q=Q, q=q, A=A, B=B, G=G, h=h)
    else:
        X_star = qpsolver.solve_qp(R=R, Q=Q, q=q, A=A, B=B)
    return X_star

def get_baseline(collection_name):
    df = BASELINE_DF[BASELINE_DF['collectionName'] == collection_name].copy()
    df.reset_index(drop=True, inplace=True)
    return df

def get_velocity(collection_name):
    df = VELOCITY_DF[VELOCITY_DF['collectionName'] == collection_name].copy()
    df.reset_index(drop=True, inplace=True)
    return df

def apply_costmin(base_df, velocity_df, sensor_df, params, N_LOOP):
    const = get_optimization_constants(base_df, velocity_df, sensor_df, params, use_sensor=True)

    if params['use_map']:
        distance = map_matching.distance_to_nearest_neighbor(base_df)
        default_p_valid = (distance < params['threshold_distance_to_nearest_neighbor'])
        p_valid = default_p_valid
    else:
        default_p_valid = np.full(const['N_y'], True)
        p_valid = default_p_valid

    V = np.sqrt(np.sum(velocity_df[['v_east', 'v_north']].values**2, axis=1))
    default_d_valid = (V < params['vmax'])
    d_valid = default_d_valid

    for loop in range(N_LOOP):
        X_star = solve_QP(const, p_valid, d_valid)
        Y_star = const['Cp_orig'] @ X_star
        Y_star = np.reshape(Y_star, (-1, 2))
        pp_df  = base_df.copy()
        pp_df['latDeg'] = Y_star[:, 0]
        pp_df['lngDeg'] = Y_star[:, 1]
        distance = transform.pd_haversine_distance(pp_df, base_df)
        p_valid = default_p_valid & (distance < params['reject_p'])

        dXYdt = const['Cd_orig'] @ X_star
        dXYdt = np.reshape(dXYdt, (-1, 2))
        v_err = dXYdt - velocity_df[['v_east', 'v_north']].values
        v_err = np.sqrt(np.sum(v_err**2, axis=1))
        d_valid = default_d_valid & (v_err < params['reject_d'])

    return pp_df

def recalibrate_sensor_by_vehicle_motion(base_df, velocity_df, sensor_df, params):
    const = get_optimization_constants(base_df, velocity_df, sensor_df, params, use_sensor=False)

    if params['use_map']:
        distance = map_matching.distance_to_nearest_neighbor(base_df)
        p_valid  = (distance < params['threshold_distance_to_nearest_neighbor'])
    else:
        p_valid = np.full(const['N_y'], True)

    V = np.sqrt(np.sum(velocity_df[['v_east', 'v_north']].values**2, axis=1))
    d_valid = (V < params['vmax'])

    X_star = solve_QP(const, p_valid, d_valid)
    X_mat  = np.reshape(X_star, (-1, 6))
    dotB   = X_mat[const['x_index_sensor'], 1]
    dotL   = X_mat[const['x_index_sensor'], 4]
    dotXY  = const['J'] @ np.stack([dotB, dotL], axis=0) # shape = (2, N)
    dotX   = dotXY[0, :]
    dotY   = dotXY[1, :]
    V = np.sqrt(np.sum(dotXY**2, axis=0))
    cond = (V > (20 / 3.6))
    trig_moving_direction = signal_f.Trig.from_data(dotX[cond], dotY[cond])
    trig_theta   = signal_f.Trig.from_rad(sensor_df['theta'].values[cond])
    trig_offset  = trig_theta - trig_moving_direction
    angle_offset = np.arctan2(np.mean(trig_offset.sin), np.mean(trig_offset.cos))
    sensor_df['theta']  = sensor_df['theta'] - angle_offset
    sensor_df['cos_th'] = np.cos(sensor_df['theta'].values)
    sensor_df['sin_th'] = np.sin(sensor_df['theta'].values)

    return sensor_df

def do_postprocess(args):
    train_or_test, collection, params = args

    base_df = get_baseline(collection)
    t_ref   = base_df['millisSinceGpsEpoch'].min()
    base_df['Time'] = 1e-3 * (base_df['millisSinceGpsEpoch'] - t_ref).values

    velocity_df = get_velocity(collection)
    velocity_df['Time'] = (1e-3 * (velocity_df['millisSinceGpsEpoch'] - t_ref).values
                           -  params['Mi8_velocity_timeshift'] * (velocity_df['phoneName'] == 'Mi8').astype(float)
                           )
    velocity_df = velocity_df[(  velocity_df['Time'] >= base_df['Time'].min())
                              & (velocity_df['Time'] <= base_df['Time'].max())]
    velocity_df.reset_index(drop=True, inplace=True)
    
    phone_list = [path.split('/')[-1] for path in sorted(glob.glob(f'{INPUT_PATH}/{train_or_test}/{collection}/*'))]
    sensor_df_list   = []
    dt_up   = 2.5 * 1e-3
    dt_down = DT_X
    FLT = design_filter.make_sinc_filter(F_cutoff=2.0, dt=dt_up)
    for phone in phone_list:
        gnss_log_filename = f'{INPUT_PATH}/{train_or_test}/{collection}/{phone}/{phone}_GnssLog.txt'
        sensor_df_orig = io_f.read_GnssLog_sensors(gnss_log_filename)
        if signal_f.check_sensor_availability(sensor_df_orig):
            sensor_df = signal_f.preprocess_sensor_data(sensor_df_orig, t_ref, dt_up, dt_down, FLT)
            sensor_df = signal_f.remove_different_posture(sensor_df)
            sensor_df = sensor_df[(  sensor_df['Time'] >= base_df['Time'].min())
                                  & (sensor_df['Time'] <= base_df['Time'].max())].copy()
            sensor_df.reset_index(drop=True, inplace=True)
            sensor_df_list.append(sensor_df)
    if len(sensor_df_list) > 0:
        time_list = [df['Time'].max() - df['Time'].min() for df in sensor_df_list]
        idx = np.argmax(time_list)
        sensor_df = sensor_df_list[idx]
        sensor_df = signal_f.add_calibrated_signals(sensor_df, dt_down)
        sensor_df = recalibrate_sensor_by_vehicle_motion(base_df, velocity_df, sensor_df, params)
    else:
        sensor_df = None

    pp_df = base_df
    pp_df = apply_costmin(pp_df, velocity_df, sensor_df, params, N_LOOP=3)
    if params['use_map']:
        params_stage2 = dict(params)
        params_stage2['sigma_p'] = params['sigma_p_stage2']
        for _ in range(params['num_stage2_iterations']):
            pp_df = map_matching.snap_to_nearest_neighbor(pp_df)
            pp_df = apply_costmin(pp_df, velocity_df, sensor_df, params_stage2, N_LOOP=1)
    return pp_df

def make_postprocessing_df(train_or_test, config):
    args_list = []
    for collection_list, params in config:
        for collection_name in collection_list:
            args_list.append((train_or_test, collection_name, params))
    processes = multiprocessing.cpu_count()
    with multiprocessing.Pool(processes=processes) as pool:
        df_list = pool.imap_unordered(do_postprocess, args_list)
        df_list = tqdm(df_list, total=len(args_list))
        df_list = list(df_list)
    output_df = pd.concat(df_list, axis=0).sort_values(['phone', 'millisSinceGpsEpoch'])
    return output_df

def print_score(output_df):
    score_list = []
    for gid, phone_df in output_df.groupby('phone'):
        drive, phone = gid.split('_')
        gt_df = pd.read_csv(f'{INPUT_PATH}/train/{drive}/{phone}/ground_truth.csv')
        d = transform.pd_haversine_distance(phone_df, gt_df)
        score = np.mean([np.quantile(d, 0.50), np.quantile(d, 0.95)])
        score_list.append(score)
    score = np.mean(score_list)
    print(f'train score: {score:.3f}')
    return

def main():
    params_highway = { 'sigma_u'  : 1.0,
                       'sigma_p'  : 3.0,
                       'sigma_a'  : 2.0 * 1e+5,
                       'sigma_v'  : 4.0 * 1e+5,
                       'sigma_d'  : 0.16 * 1e+5,
                       'reject_p' : 7.0,   # [m]
                       'reject_d' : 1.0,   # [m/s]
                       'vmin'     : -0.05, # [m/s]
                       'vmax'     : 50.0,  # [m/s]
                       'Mi8_velocity_timeshift' : 0.46,
                       'use_not_go_back_constraint' : False,
                       'use_map'  : False,
                      }
    params_treeway = { 'sigma_u'  : 1.0,
                       'sigma_p'  : 6.0,
                       'sigma_a'  : 0.8 * 1e+5,
                       'sigma_v'  : 3.0 * 1e+5,
                       'sigma_d'  : 0.12 * 1e+5,
                       'reject_p' : 12.0,  # [m]
                       'reject_d' : 1.0,   # [m/s]
                       'vmin'     : -0.05, # [m/s]
                       'vmax'     : 50.0,  # [m/s]
                       'Mi8_velocity_timeshift' : 0.30,
                       'use_not_go_back_constraint' : False,
                       'use_map'  : False,
                      }
    params_downtown = { 'sigma_u'  : 1.0,
                        'sigma_p'  : 20.0,
                        'sigma_a'  : 0.4 * 1e+5,
                        'sigma_v'  : 1.0 * 1e+5,
                        'sigma_d'  : 1.3 * 1e+5,
                        'reject_p' : 20.0,  # [m]
                        'reject_d' : 3.0,   # [m/s]
                        'vmin'     : -0.05, # [m/s]
                        'vmax'     : 50.0,  # [m/s]
                        'Mi8_velocity_timeshift' : 0.0,
                        'use_not_go_back_constraint' : False,
                        'use_map'  : True,
                        'threshold_distance_to_nearest_neighbor' : 8.0,
                        'sigma_p_stage2' : 3.0,
                        'num_stage2_iterations' : 1,
                       }
    collection_list_all = np.array(sorted(path.split('/')[-1] for path in glob.glob(f'{INPUT_PATH}/train/*')))
    collection_list_highway  = collection_list_all[np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21]) - 1]
    collection_list_treeway  = collection_list_all[np.array([22,23,25,26,28]) - 1]
    collection_list_downtown = collection_list_all[np.array([24,27,29]) - 1]
    config = [
        (collection_list_highway,  params_highway),
        (collection_list_treeway,  params_treeway),
        (collection_list_downtown, params_downtown),
    ]
    train_pp_df = make_postprocessing_df('train', config)
    print_score(train_pp_df)

    test_base = pd.read_csv(f'{INPUT_PATH}/baseline_locations_test.csv')
    collection_list_highway, collection_list_treeway, collection_list_downtown = area_prediction.predict_area(test_base)
    config = [
        (collection_list_highway,  params_highway),
        (collection_list_treeway,  params_treeway),
        (collection_list_downtown, params_downtown),
    ]
    test_pp_df = make_postprocessing_df('test', config)

    train_pp_df.to_csv('smoothing_2nd_train.csv', index=False)
    test_pp_df.to_csv('smoothing_2nd_test.csv', index=False)

    columns = ['phone', 'millisSinceGpsEpoch', 'latDeg', 'lngDeg']
    sub_df = test_pp_df[columns]
    sub_df.to_csv('submission.csv', index=False)
    return

In [None]:
main()