In [2]:
import nlgeojson as nl
import mapkit as mk
import os
import time
import pandas as pd
import pipegeohash as p

In [3]:
lines = mk.get_database('la_routes')
points = pd.read_csv('chiraq.csv')
counties = mk.get_normal_db('counties',tablename='flat')
blocks = p.map_table(points,12,map_only=True)

In [4]:
# creating get size of function
def getsize(filename):
    statinfo = os.stat(filename)
    return float(statinfo.st_size) / (1000 ** 2)

def speed_test(args,type,mbsize,function):
    s = time.time()
    if type == 'points':
        filename = 'points_old.geojson'
        function(args,filename=filename,list=True)
    if type == 'lines':
        filename = 'lines_old.geojson'
        function(args,filename,list=True)

    
    e = time.time() - s
    
    speed = mbsize / e
    return speed




### Lines
Lines accepts two forms of input columns one of which is requred. Either a st_asewkt field is required with in a string from a posgis database, or a field called coords that contains literally a string representation of the geojson coordinate list.

My algorithm heavily prefers input from coords fields as nothing is required to parse into geojson correctly. So if using st_asekwt be aware that behind the scenes its parsing out your coords into coord strings anyway and it may be worth it to add a field in your database for this.


In [5]:
s = time.time()
nl.make_lines(lines,'lines.geojson')
e = time.time() - s

# getting size of geojson file
mbsizel = getsize('lines.geojson')
speedline = mbsizel / e
print 'Parsed lines geojson at %s mb / s' % (speedline)

Wrote lines.geojson filename to geojson file.
Parsed lines geojson at 59.7069874057 mb / s


### Points

Points are quite simple have a field with 'LAT' or 'LONG' in denoting lat / long fields to be parsed into points within the geojson struture.

In [6]:
s = time.time()
nl.make_points(points,'points.geojson')
e = time.time() - s

# getting size of geojson file
mbsizep = getsize('points.geojson')
speedpoint = mbsizep / e
print 'Parsed points geojson at %s mb / s' % (speedpoint)

Wrote geojson file to points.geojson.
Parsed points geojson at 40.6154139762 mb / s


### Polygons 

On the other hand polygons are the most complex to get into a field. They contain a structured string to denote multipolygons about one area for things like states. This makes it so you can have complex geometries that can still be aggregated about a single field. However makes getting tables into this format sort of a one way street, my module polygon logic parses shp or kml files into complex geometries (holes,multipolygon) and outputs a dataframe table. 

The idea here is once the table is made once you can throw it in postgres and when you use something like ult to relate all points to a given polygon, group by your polygon area on the points df, and have a super easy table join to the polygon dataframe. 

Basically it works and thats all I care about currently.

In [7]:
s = time.time()
nl.make_polygons(counties,'polygons.geojson')
e = time.time() - s

# getting size of geojson file
mbsize = getsize('polygons.geojson')
speedpolygon = mbsize / e
print 'Parsed polygons geojson at %s mb / s' % (speedpolygon)

Wrote polygons.geojson filename to geojson file.
Parsed polygons geojson at 71.7193896661 mb / s


### Blocks
Blocks can accept either a geohash field or the for cardinal fields NORTH,SOUTH,EAST,WEST in the columns of the df.

In [8]:
s = time.time()
nl.make_blocks(blocks,'blocks.geojson')
e = time.time() - s

# getting size of geojson file
mbsizeb = getsize('blocks.geojson')
speedblock = mbsizeb / e
print 'Parsed blocks geojson at %s mb / s' % (speedblock)

Wrote blocks.geojson filename to geojson file.
Parsed blocks geojson at 31.0193727936 mb / s


### Comparison again Old Modules (pipegeohash)
The following are the comparison of speed between this and the old geojson parsing library pipegeohash. Polygons will be omitted as support like this was never implemented entirely. 


In [10]:
import pipegeojson as pg

# creating dataframe for first speed
speeds = pd.DataFrame([speedpoint,speedline,speedblock],columns=['NLGEOJSON (mb/s)'])
speeds
print lines

pspeed = speed_test(points[:1000],'points',mbsizep,pg.make_points)
lspeed = speed_test(lines[:1000],'lines',mbsizel,pg.make_postgis_lines)
bspeed = speed_test(blocks[:1000],'blocks',mbsizeb,pg.make_blocks)
speeds['PIPEGEOJSON (mb/s)'] = [pspeed,lspeed,bspeed]
speeds

           gid       linearid              fullname rttyp  mtfcc  \
0           82  1104486642780          State Rte 14     S  S1200   
1        93775  1101583261345                  None  None  S1400   
2        93902  1101576692134          N Pannes Ave     M  S1400   
3        94047  1101583239809                  None  None  S1400   
4        94459  1101583265329                  None  None  S1400   
5        95145  1101583228688                  None  None  S1400   
6        95137  1101576712568           Reisner Way     M  S1400   
7        95326  1101583249549                  None  None  S1400   
8        95803  1101576668435              Arbor Ln     M  S1400   
9        95695  1101576730804        Saint Louis Ct     M  S1400   
10       95958  1101576674905             Menlo Ave     M  S1400   
11       96146  1101576700801           Carmona Ave     M  S1400   
12       98877  1101583242176                  None  None  S1400   
13       96719  1101576731603           Toberman

IndexError: list index out of range

In [None]:
'''
pspeed = speed_test(points,'points',pg.make_points)
lspeed = speed_test(lines,'lines',pg.make_lines)
bspeed = speed_test(blocks,'blocks',pg.make_blocks)
[pspeed,lspeed,bspeed]
'''