In [2]:
import nlgeojson as nl
import mapkit as mk
import os
import time
import pandas as pd
import pipegeohash as p

In [3]:
lines = mk.get_database('la_routes')
points = pd.read_csv('chiraq.csv')
counties = mk.get_normal_db('counties',tablename='flat')
blocks = p.map_table(points,12,map_only=True)

In [4]:
# creating get size of function
def getsize(filename):
    statinfo = os.stat(filename)
    return float(statinfo.st_size) / (1000 ** 2)

def speed_test(args,type,mbsize,function):
    s = time.time()
    if type == 'points':
        filename = 'points_old.geojson'
        function(args,filename=filename,list=True)
    if type == 'lines':
        filename = 'lines_old.geojson'
        function(args,filename,list=True)

    
    e = time.time() - s
    
    speed = mbsize / e
    return speed




### Lines
Lines accepts two forms of input columns one of which is requred. Either a st_asewkt field is required with in a string from a posgis database, or a field called coords that contains literally a string representation of the geojson coordinate list.

My algorithm heavily prefers input from coords fields as nothing is required to parse into geojson correctly. So if using st_asekwt be aware that behind the scenes its parsing out your coords into coord strings anyway and it may be worth it to add a field in your database for this.


In [5]:
s = time.time()
nl.make_lines(lines,'lines.geojson')
e = time.time() - s

# getting size of geojson file
mbsizel = getsize('lines.geojson')
speedline = mbsizel / e
sizeline = float(len(lines)) / e

print 'Parsed lines geojson at %s mb / s' % (speedline)

Wrote lines.geojson filename to geojson file.
Parsed lines geojson at 52.2251602487 mb / s


### Points

Points are quite simple have a field with 'LAT' or 'LONG' in denoting lat / long fields to be parsed into points within the geojson struture.

In [6]:
s = time.time()
nl.make_points(points,'points.geojson')
e = time.time() - s

# getting size of geojson file
mbsizep = getsize('points.geojson')
speedpoint = mbsizep / e
sizepoint = float(len(points)) / e

print 'Parsed points geojson at %s mb / s' % (speedpoint)

Wrote geojson file to points.geojson.
Parsed points geojson at 36.5386411 mb / s


### Polygons 

On the other hand polygons are the most complex to get into a field. They contain a structured string to denote multipolygons about one area for things like states. This makes it so you can have complex geometries that can still be aggregated about a single field. However makes getting tables into this format sort of a one way street, my module polygon logic parses shp or kml files into complex geometries (holes,multipolygon) and outputs a dataframe table. 

The idea here is once the table is made once you can throw it in postgres and when you use something like ult to relate all points to a given polygon, group by your polygon area on the points df, and have a super easy table join to the polygon dataframe. 

Basically it works and thats all I care about currently.

In [7]:
s = time.time()
nl.make_polygons(counties,'polygons.geojson')
e = time.time() - s

# getting size of geojson file
mbsize = getsize('polygons.geojson')
speedpolygon = mbsize / e
sizepolygon = float(len(counties)) / e
print 'Parsed polygons geojson at %s mb / s' % (speedpolygon)

Wrote polygons.geojson filename to geojson file.
Parsed polygons geojson at 72.8088523952 mb / s


### Blocks
Blocks can accept either a geohash field or the for cardinal fields NORTH,SOUTH,EAST,WEST in the columns of the df.

In [8]:
s = time.time()
nl.make_blocks(blocks,'blocks.geojson')
e = time.time() - s

# getting size of geojson file
mbsizeb = getsize('blocks.geojson')
speedblock = mbsizeb / e
sizeblock = float(len(blocks)) / e
print 'Parsed blocks geojson at %s mb / s' % (speedblock)

Wrote blocks.geojson filename to geojson file.
Parsed blocks geojson at 29.0093918195 mb / s


In [13]:
pd.DataFrame([[speedpoint,sizepoint],[speedline,sizeline],[speedpolygon,sizepolygon],[speedblock,sizeblock]]
             ,columns=['(MB/s)','(row / s)'],
            index=['POINTS','LINES','POLYGONS','BLOCKS'])

Unnamed: 0,(MB/s),(row / s)
POINTS,36.538641,60714.133549
LINES,52.22516,94688.705795
POLYGONS,72.808852,9171.315837
BLOCKS,29.009392,39407.21613


### Comparison again Old Modules (pipegeohash)
The following are the comparison of speed between this and the old geojson parsing library pipegeohash. Polygons will be omitted as support like this was never implemented entirely aswell as blocks as differing implemenations means its to hard to compare the two accurately but points and liens should be pretty apples to apples.

Pipegeojson was json serialization created as dictionary object in python as you might have guessed this detail was what made it slow, as it would take almost no time to create teh structure but 10-100x more time just to serialize it into json.


In [14]:
import pipegeojson as pg

# creating dataframe for first speed
speeds = pd.DataFrame([speedpoint,speedline],columns=['NLGEOJSON (mb/s)'])
pspeed = speed_test(points,'points',mbsizep,pg.make_points)
lspeed = speed_test(lines,'lines',mbsizel,pg.make_postgis_lines)
speeds['PIPEGEOJSON (mb/s)'] = [pspeed,lspeed]


Wrote points_old.geojson to geojson.
Wrote lines_old.geojson to geojson.


In [15]:
speeds['FACTOR'] = speeds['NLGEOJSON (mb/s)'] / speeds['PIPEGEOJSON (mb/s)']
speeds

Unnamed: 0,NLGEOJSON (mb/s),PIPEGEOJSON (mb/s),FACTOR
0,36.538641,2.73689,13.350424
1,52.22516,1.708433,30.569038


# Required fields blocks (although NORTH,SOUTH,EAST,WEST are also taken)

In [28]:
blocks['GEOHASH']

0         dp3w7wguk7rt
1         dp3tx9bxgh5n
2         dp3wcfr8zcf8
3         dp3wk0x3137d
4         dp3w7pw2nfhb
5         dp3wktfq36mt
6         dp3tt2yjyv21
7         dp3tdv0536v2
8         dp3wjuxfbw52
9         dp3tmtbq9v4y
10        dp3whn8w0cre
11        dp3tnjffcbv5
12        dp3whnp92qmw
13        dp3wv4dw0wxy
14        dp3wk7vne7h4
15        dp3w9kseqwee
16        dp3tecrkbvtc
17        dp3wq10f53f5
18        dp3wmkb5dh0e
19        dp3wkjzmzh9h
20        dp3wmcuep2b0
21        dp3wugvqbj1x
22        dp3tdvcbusn6
23        dp3we6ygmz73
24        dp3wervemmwd
25        dp3tn4sfbxhk
26        dp3twjgxesbv
27        dp3w5pzb319e
28        dp3w7sgc3heg
29        dp3ws1b07p7c
              ...     
291262    dp3wefm01udy
291263    dp3twujt8e5x
291264    dp3trxg4xrhm
291265    dp3wn2504wb2
291266    dp3wtqumbbf4
291267    dp3ts0evcwf4
291268    dp3ttss3zmcj
291269    dp3wd33bvfp5
291270    dp3tx79gfhxy
291271    dp3twcyjgvfs
291272    dp3wdykde2gg
291273    dp3tu2k0zf8f
291274    d

# Required fields Points (will search columns for closest lat and long field)

In [27]:
points[['Latitude','Longitude']]

Unnamed: 0,Latitude,Longitude
0,41.917710,-87.731474
1,41.759013,-87.560216
2,41.980271,-87.803085
3,41.882808,-87.704829
4,41.921092,-87.749910
5,41.912554,-87.689720
6,41.753182,-87.651383
7,41.776085,-87.769714
8,41.861161,-87.627271
9,41.736831,-87.648504


# Required fields lines One or the other or both


In [23]:
lines[['coords','st_asewkt']]

Unnamed: 0,coords,st_asewkt
0,"[[-118.505568, 34.341925], [-118.505522, 34.34...",SRID=4326;MULTILINESTRING((-118.505568 34.3419...
1,"[[-118.30853, 33.920221], [-118.308523, 33.919...",SRID=4326;MULTILINESTRING((-118.30853 33.92022...
2,"[[-118.202971, 33.901611], [-118.203038, 33.90...",SRID=4326;MULTILINESTRING((-118.202971 33.9016...
3,"[[-118.359439, 33.898825], [-118.35889, 33.898...",SRID=4326;MULTILINESTRING((-118.359439 33.8988...
4,"[[-118.358253, 33.960056], [-118.358242, 33.95...",SRID=4326;MULTILINESTRING((-118.358253 33.9600...
5,"[[-118.331999, 33.935813], [-118.332001, 33.93...",SRID=4326;MULTILINESTRING((-118.331999 33.9358...
6,"[[-118.175445, 33.953186], [-118.174335, 33.95...",SRID=4326;MULTILINESTRING((-118.175445 33.9531...
7,"[[-118.463221, 33.994698], [-118.463183, 33.99...",SRID=4326;MULTILINESTRING((-118.463221 33.9946...
8,"[[-118.330036, 33.900339], [-118.330028, 33.89...",SRID=4326;MULTILINESTRING((-118.330036 33.9003...
9,"[[-118.388995, 34.010275], [-118.388736, 34.01...",SRID=4326;MULTILINESTRING((-118.388995 34.0102...


# Required field polygon (see polygon_logic repo to produce this for yourself on your shp or kml)

In [31]:
counties[['COORDS']]

Unnamed: 0,COORDS
0,"[[[-85.748032,31.619181],[-85.748251,31.618048..."
1,"[[[-88.473227,31.893856],[-88.472642,31.875153..."
2,"[[[-87.427204,31.26436],[-87.427455,31.260386]..."
3,"[[[-86.413335,32.750591],[-86.413116,32.707386..."
4,"[[[-87.870464,32.762442],[-87.870206,32.761434..."
5,"[[[-86.199408,31.807861],[-86.199378,31.79045]..."
6,"[[[-85.434697,32.317613],[-85.434631,32.31711]..."
7,"[[[-87.026846,33.246459],[-87.025976,33.231607..."
8,"[[[179.482464,51.982834],[179.475569,51.937456..."
9,"[[[-114.753196,36.089513],[-114.754798,36.0847..."
