In [5]:
# load bounding box from geojson file, created using geojson.io 
# https://gist.github.com/anonymous/3d160c1955a8d31fd5f8d5e298248cde

import json
from shapely.geometry import mapping, shape

# load geojson file from project
with open ("manila.geojson", "r") as myfile:
    featureCollection=myfile.read()
    
    # geojson.io creates FeatureCollection, extracting the first geometry object from the collection
    geoJsonData = json.loads(featureCollection)['features'][0]['geometry']
    
    # createa shape 
    boundingBox = shape(geoJsonData)
  
print boundingBox

POLYGON ((120.926513671875 14.52642709481496, 121.0995483398437 14.52642709481496, 121.0995483398437 14.68456769548362, 120.926513671875 14.68456769548362, 120.926513671875 14.52642709481496))


In [14]:
# load bounding box from geojson file, created using geojson.io 
# https://gist.github.com/anonymous/3d160c1955a8d31fd5f8d5e298248cde

import json
from shapely.geometry import mapping, shape

# load geojson file from project
with open ("quezon_ave.geojson", "r") as myfile:
    featureCollection=myfile.read()
    
    # geojson.io creates FeatureCollection, extracting the first geometry object from the collection
    geoJsonData = json.loads(featureCollection)['features'][0]['geometry']
    
    # createa shape 
    quezonAve = shape(geoJsonData)
  
print quezonAve

POLYGON ((120.9887409210205 14.60360129181855, 121.0171508789062 14.62901550432417, 121.0452175140381 14.64753446629237, 121.0418701171875 14.6483648763837, 121.0154342651367 14.63266959370535, 120.9869384765625 14.60501326961862, 120.9887409210205 14.60360129181855))


In [16]:
# optional: visualize geoJson bounding box (requries ipyleaflet module)

from ipyleaflet import (
    Map,
    GeoJSON
)

center = [boundingBox.centroid.y, boundingBox.centroid.x]

# create 
mapObj = Map(center=center, zoom=11)

geoJsonLayer = GeoJSON(data=geoJsonData)
mapObj.add_layer(geoJsonLayer)

#display map
mapObj

In [17]:
import os, csv
from shapely.geometry import Point

raw_data_dir = "/data/raw"
output_data_dir = "/data/manila/"

output_file = open(output_data_dir + "processed_quezon.csv", 'w')

for name in os.listdir(raw_data_dir):
        
    csv_path = os.path.join(raw_data_dir, name)
    
    if not os.path.isfile(csv_path):
        continue
        
    with open(csv_path, "rb") as csvfile:
        data_reader = csv.reader(csvfile, delimiter='|')
        # return to top of file
        csvfile.seek(0)

        row_count = 0
        for row in data_reader:
            lat = float(row[9])
            lon = float(row[10])
            point = Point(lon, lat)

            # filter by bounding box
            if not boundingBox.contains(point):
                continue  
                
            # optionally filter points within quezon ave 
            if not quezonAve.contains(point):
                continue
             
            # write rows within bounding box as ("time","uuid","lat","lon")
            outputRow = [row[0], row[1], row[9], row[10]]
            output_file.write(",".join(outputRow) + "\n")
            row_count += 1

        if row_count > 0:
             print name + ": " + str(row_count)
        csvfile.close()
        
output_file.close()

2016_01_01_0000_part_00: 148
2016_01_01_0001_part_00: 9
2016_01_01_0002_part_00: 150
2016_01_01_0003_part_00: 252
2016_01_01_0004_part_00: 157
2016_01_01_0005_part_00: 24
2016_01_01_0006_part_00: 261
2016_01_01_0007_part_00: 2
2016_01_01_0008_part_00: 117
2016_01_01_0009_part_00: 177
2016_01_01_0010_part_00: 78
2016_01_01_0011_part_00: 71
2016_01_01_0012_part_00: 58
2016_01_01_0013_part_00: 8
2016_01_01_0014_part_00: 36
2016_01_01_0015_part_00: 91
2016_01_01_0016_part_00: 311
2016_01_01_0017_part_00: 282
2016_01_01_0018_part_00: 134
2016_01_01_0019_part_00: 72
2016_01_01_0021_part_00: 179
2016_01_01_0022_part_00: 284
2016_01_01_0023_part_00: 396
2016_01_01_0024_part_00: 59
2016_01_01_0025_part_00: 48
2016_01_01_0026_part_00: 219
2016_01_01_0027_part_00: 20
2016_01_01_0028_part_00: 212
2016_01_01_0029_part_00: 144
2016_01_01_0030_part_00: 249
2016_01_01_0031_part_00: 12
2016_01_01_0032_part_00: 160
2016_01_01_0033_part_00: 3
2016_01_01_0034_part_00: 118
2016_01_01_0035_part_00: 103
2016

In [22]:
import sys
import csv
import json
from operator import itemgetter
import time
import calendar

output_data_dir = "/data/manila/"

output_file = open(output_data_dir + "processed_quezon.json", 'w')

# Read the file
with open(output_data_dir + "processed_quezon.csv", 'r') as csvfile:
    columns = ("time","uuid","lat","lon")
    reader = csv.DictReader(csvfile, fieldnames=columns)
    trace = {'trace':[], 'uuid': None}
    
    # For each row of data
    
    for row in sorted(reader, key=itemgetter(columns[1], columns[0])):
        # Convert to epoch seconds
        row[columns[0]] = calendar.timegm(time.strptime(row.get(columns[0]),"%Y-%m-%d %H:%M:%S"))
        # These shouldn't be strings
        row['lon'] = float(row['lon'])
        row['lat'] = float(row['lat'])

        # Continuation of same uuid
        if len(trace['trace']) and row.get(columns[1]) == trace['uuid']:
            del row[columns[1]]
            trace['trace'].append(row)
        # End the prior vehicle
        else:
            if len(trace['trace']):
                output_file.write(json.dumps(trace, separators=(',',':')) + "\n")
            #print json.dumps({'type': 'Feature', 'geometry': { 'type': 'LineString', 'coordinates': [ [i['lon'], i['lat']] for i in trace ] }, 'properties':{'uuid':uuid}}, separators=(',',':')), ','
            trace['uuid'] = row[columns[1]]
            del row[columns[1]]
            trace['trace'] = [ row ]
    
    
