In [18]:
import fiona

import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import shapefile

from descartes import PolygonPatch
from matplotlib import cm, colors
from shapely.geometry import Polygon, Point
from geopandas import GeoSeries
from pyproj import Proj

In [19]:
plt.style.use('dark_background')
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'

In [20]:
LOG_PATH = '/home/yang/data/raw/'
H5_PATH = '/home/yang/data/gps/'
EVENT = 'jkwh'
MACHINE_ID = '8250'
DATE = '072019'
TZONE = 'America/Denver'
UTMZONE = 13

In [21]:
# Specify data types for each column
dtypes = {
    'ts': str, \
    'lat': np.float64, \
    'lon': np.float64, \
    'alt': np.float64, \
    'track': np.float64, \
    'speed': np.float64, \
    'climb': np.float64, \
    'epx': np.float64, \
    'epy': np.float64, \
    'epv': np.float64, \
    'epd': np.float64, \
    'eps': np.float64, \
    'epc': np.float64}

In [22]:
# Put the column names into a list
cols = list(dtypes.keys())

In [23]:
ext = 'gps.log'
LOG_NAME = '-'.join([EVENT, MACHINE_ID, DATE, ext])
gps_log = pd.read_csv(LOG_PATH + LOG_NAME, sep=' ', dtype=dtypes, names=cols, na_values='None', skip_blank_lines=True)

In [24]:
print(len(gps_log))

416886


In [25]:
gps_log.head()

Unnamed: 0,ts,lat,lon,alt,track,speed,climb,epx,epy,epv,epd,eps,epc
0,1551908489,40.429708,-86.911978,224.3,0.0,0.011,,,,,,,
1,1551908489,40.429708,-86.911978,224.3,0.0,0.011,0.0,,,46.0,,,
2,1551908493,40.429702,-86.911973,223.398,27.444,0.074,-0.047,17.53,48.443,45.77,,0.53,91.54
3,1551908494,40.429701,-86.911974,223.22,261.6007,0.019,-0.043,17.53,48.443,45.77,,0.48,91.54
4,1551908495,40.4297,-86.911973,223.019,199.8861,0.057,-0.06,17.53,48.443,45.77,,0.5,91.54


In [26]:
# Make lat/lon to UTM conversion
p = Proj(proj='utm', zone=UTMZONE, ellps='WGS84', preserve_units=False)
x, y = p(gps_log['lon'].values, gps_log['lat'].values)
gps_log['x'] = x
gps_log['y'] = y

In [27]:
# Convert from epoch to readable timestamps
gps_log['ts_datetime'] = pd.to_datetime(gps_log['ts'], unit='s')
gps_log['ts_local'] = gps_log['ts_datetime'].dt.tz_localize('UTC').dt.tz_convert(TZONE).dt.tz_localize(None)
# Drop the intermediate columns
gps_log_c = gps_log.drop(columns=['ts_datetime']).copy()

In [28]:
gps_log_c.head()

Unnamed: 0,ts,lat,lon,alt,track,speed,climb,epx,epy,epv,epd,eps,epc,x,y,ts_local
0,1551908489,40.429708,-86.911978,224.3,0.0,0.011,,,,,,,,2038035.0,4635795.0,2019-03-06 14:41:29
1,1551908489,40.429708,-86.911978,224.3,0.0,0.011,0.0,,,46.0,,,,2038035.0,4635795.0,2019-03-06 14:41:29
2,1551908493,40.429702,-86.911973,223.398,27.444,0.074,-0.047,17.53,48.443,45.77,,0.53,91.54,2038035.0,4635794.0,2019-03-06 14:41:33
3,1551908494,40.429701,-86.911974,223.22,261.6007,0.019,-0.043,17.53,48.443,45.77,,0.48,91.54,2038035.0,4635794.0,2019-03-06 14:41:34
4,1551908495,40.4297,-86.911973,223.019,199.8861,0.057,-0.06,17.53,48.443,45.77,,0.5,91.54,2038035.0,4635794.0,2019-03-06 14:41:35


In [29]:
# Drop duplicates in timestamp
gps_log_c = gps_log_c.drop_duplicates(subset='ts')
print(len(gps_log_c))

277932


In [30]:
# Drop anything that is nan or inf
gps_log_cc = gps_log_c[(gps_log_c['ts'] != np.nan) & (gps_log_c['x'] != np.nan) & (gps_log_c['y'] != np.nan) \
                      & (gps_log_c['x'] != np.inf) & (gps_log_c['y'] != np.inf)].copy()
print(len(gps_log_cc))

277738


In [31]:
# Reset the indices
gps_log_cc = gps_log_cc.reset_index(drop=True)

In [32]:
# Create gpd df that contains x,y map to Point object
gdf = gpd.GeoDataFrame(gps_log_cc, geometry=gpd.points_from_xy(gps_log_cc['lon'], gps_log_cc['lat']))

In [33]:
# Load the boundary
bnds = gpd.read_file('/home/yang/data/raw/jkwh.kml', driver='KML')

  for feature in features_lst:


In [34]:
for idx, b in bnds.iterrows():
    print('Looking at {}'.format(b['Name']))
    if sum(gdf['geometry'].within(b['geometry'])):
        print('Have data within this field, num. of pts: {}'.format(sum(gdf['geometry'].within(b['geometry']))))
        ext = b['Name'] + '-gps.h5'
        df = gdf[gdf['geometry'].within(b['geometry'])].copy()
        df = df.drop(columns=['epx', 'epy', 'epv', 'epd', 'eps', 'epc', 'geometry'])
        df = df.reset_index(drop=True)
        dff = pd.DataFrame(df)
        dff['ts'] = dff['ts'].astype('int64')
#         if len(dff) > 500:
#             dff.to_hdf(H5_PATH + '-'.join([EVENT, MACHINE_ID, DATE, ext]), key='df', mode='w')
    else:
        print('No data found')
        continue

Looking at f1
Have data within this field, num. of pts: 12341
Looking at f2
Have data within this field, num. of pts: 2552
Looking at f3
Have data within this field, num. of pts: 17643
Looking at f4
No data found
Looking at f5
Have data within this field, num. of pts: 16
Looking at f6
Have data within this field, num. of pts: 1333
Looking at f7
No data found
Looking at f8
No data found
Looking at f9
No data found
Looking at f10
Have data within this field, num. of pts: 19
Looking at f11
No data found
Looking at f12
No data found
