In [1]:
import numpy as np
import pandas as pd
import xml.etree.ElementTree as et
from pykalman import KalmanFilter

In [2]:
parse_result = et.parse('walk1.gpx')

In [3]:
parse_result

<xml.etree.ElementTree.ElementTree at 0x122d4be50>

In [4]:
root = parse_result.getroot()

In [5]:
root.tag

'{http://www.topografix.com/GPX/1/0}gpx'

In [6]:
root.attrib

{'version': '1.0',
 'creator': 'GPSLogger 86 - http://gpslogger.mendhak.com/',
 '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation': 'http://www.topografix.com/GPX/1/0 http://www.topografix.com/GPX/1/0/gpx.xsd'}

In [7]:
coord = []

In [8]:
for i in root.iter('{http://www.topografix.com/GPX/1/0}trkpt'):
    coord.append([i.attrib['lat'],i.attrib['lon']])

In [9]:
df = pd.DataFrame (coord, columns = ['lat','lon'], dtype=float)
df

Unnamed: 0,lat,lon
0,49.280158,-123.005283
1,49.280222,-123.005437
2,49.280231,-123.005601
3,49.280391,-123.006407
4,49.280211,-123.005965
...,...,...
116,49.275590,-123.017481
117,49.275652,-123.017862
118,49.275778,-123.017727
119,49.275721,-123.018331


In [10]:
def distance(df):
    df1 = (df)
    df2 = df.shift(-1,axis=0) 
    diff = np.deg2rad(df2-df1)  #calculate difference
    r = 6371000
    a = np.sin(diff['lat']/2) * np.sin(diff['lat']/2)  #formula from wikipedia
    b = np.cos(np.deg2rad(df1['lat']))*np.cos(np.deg2rad(df2['lat']))*np.sin(diff['lon']/2)*np.sin(diff['lon']/2)
    d = 2*r*np.arcsin(np.sqrt(a+b))
    return d.sum()

print('Unfiltered distance: %0.2f' % (distance(df)))

Unfiltered distance: 3186.44


In [94]:
def smooth(df):
    initial_state = df.iloc[0]
    observation_covariance = np.diag([15,20]) ** 2 # 
    transition_covariance = np.diag([10,9]) ** 2 
    transition = [[1,0],[0,1]] 
    kf = KalmanFilter(
    initial_state_mean=initial_state,
    observation_covariance=observation_covariance,
    transition_covariance=transition_covariance,
    transition_matrices=transition
    )
    kalman_smoothed, _ = kf.smooth(df)
    return pd.DataFrame(kalman_smoothed, columns=['lat', 'lon'])


In [95]:
smoothed_points = pd.DataFrame(smooth(df))
print('Filtered distance: %0.2f' % (distance(smoothed_points)))

Filtered distance: 1399.86


In [96]:
def output_gpx(points, output_filename):
    """
    Output a GPX file with latitude and longitude from the points DataFrame.
    """
    from xml.dom.minidom import getDOMImplementation
    def append_trkpt(pt, trkseg, doc):
        trkpt = doc.createElement('trkpt')
        trkpt.setAttribute('lat', '%.8f' % (pt['lat']))
        trkpt.setAttribute('lon', '%.8f' % (pt['lon']))
        trkseg.appendChild(trkpt)
    
    doc = getDOMImplementation().createDocument(None, 'gpx', None)
    trk = doc.createElement('trk')
    doc.documentElement.appendChild(trk)
    trkseg = doc.createElement('trkseg')
    trk.appendChild(trkseg)
    
    points.apply(append_trkpt, axis=1, trkseg=trkseg, doc=doc)
    
    with open(output_filename, 'w') as fh:
        doc.writexml(fh, indent=' ')

In [97]:
output_gpx(smoothed_points, 'out.gpx')