# Safety on Public Transportation in Chicago

## Script for Creation of Grid Polygons

### Authors: Jainam Mehta, Julian Kleindiek, Lola Johnston, Peter Eusebio
### Date: 12/06/2019

In [2]:
!pip install geopandas

In [144]:
import geopandas as gpd # for polygon ccreation
from shapely.geometry import Point, Polygon # for polygon creation
from shapely import wkt # for creation of shapefile


from sodapy import Socrata # for API calls
import sqlalchemy as db # for SQL
import pymysql # for SQL
import pandas as pd # for data cleaning
import datetime # for data cleaning
import numpy as np #for grid generation math
import math #for grid generation math.  standard module, shouldn't need installation.

In [166]:
# create connection to CloudSQL
engine = db.create_engine('mysql+pymysql://root:patronus@146.148.80.202/mydb')
connection = engine.connect()
metadata = db.MetaData()

In [167]:
# pull grid
grid = db.Table('grid', metadata, autoload=True, autoload_with=engine)

# query the table
query = db.select([grid])

# store query as data frame
grid = pd.read_sql(query, connection)

In [168]:
# look at the grid table
grid = grid.set_index('gridId')
grid.head()

Unnamed: 0_level_0,minlat,maxlat,minlong,maxlong
gridId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,41.6242,41.6387,-87.9444,-87.9255
2,41.6242,41.6387,-87.9255,-87.9067
3,41.6242,41.6387,-87.9067,-87.8878
4,41.6242,41.6387,-87.8878,-87.8689
5,41.6242,41.6387,-87.8689,-87.85


In [11]:
# TRY I: Unstack data frame

# grid_minlat = grid["minlat"]
# grid_maxlat = grid["maxlat"]
# grid_minlong = grid["minlong"]
# grid_maxlong = grid["maxlong"]

# grid_latitude = pd.DataFrame(grid_minlat.append(grid_maxlat))
# grid_latitude.columns = ["latitude"]

# grid_longitude = pd.DataFrame(grid_minlong.append(grid_maxlong))
# grid_longitude.columns = ["longitude"]

# df = pd.DataFrame({"latitude": grid_latitude.latitude, 
#                  "longitude": grid_longitude.longitude})

# df.sort_index().head()

In [154]:
# TRY I

# geoms = []
# for index, row in gdf.iterrows():
#    maxlat = row.maxlat
#    maxlon = row.maxlon
#    minlat = row.minlat
#    minlon = row.minlon
#    geom = Polygon([(maxlat, minlon), (maxlat, maxlon), (minlat, minlon), (minlat, maxlon)])
#    geoms.append(geom)

In [155]:
# TRY II: switched lat and longs
# geoms = []
# for index, row in gdf.iterrows():
#    maxlat = row.maxlat
#    maxlon = row.maxlon
#    minlat = row.minlat
#    minlon = row.minlon
#    geom = Polygon([(minlon, maxlat), (maxlon, maxlat), (minlon, minlat), (maxlon, minlat)])
#    geoms.append(geom)

In [153]:
# assign lat longs
maxlat = grid.maxlat
maxlon = grid.maxlong
minlat = grid.minlat
minlon = grid.minlong

gdf = gpd.GeoDataFrame()
gdf['maxlat'] = maxlat
gdf['maxlon'] = maxlon
gdf['minlat'] = minlat
gdf['minlon'] = minlon

In [156]:
# create polygon from loatlongs
geoms = []
for index, row in gdf.iterrows():
    maxlat = row.maxlat
    maxlon = row.maxlon
    minlat = row.minlat
    minlon = row.minlon
    geom = Polygon([(minlon, maxlat), (maxlon, maxlat), (maxlon, minlat), (minlon, minlat)])
    geoms.append(geom)

In [157]:
# add polygon to grid table
grid['geometry'] = geoms

In [158]:
# extract only gridId and geometry from grid table
df = grid.geometry
df.head()

gridId
1    POLYGON ((-87.9444 41.6387, -87.9255 41.6387, ...
2    POLYGON ((-87.9255 41.6387, -87.9067 41.6387, ...
3    POLYGON ((-87.9067 41.6387, -87.8878 41.6387, ...
4    POLYGON ((-87.8878 41.6387, -87.8689 41.6387, ...
5    POLYGON ((-87.8689 41.6387, -87.84999999999999...
Name: geometry, dtype: object

In [159]:
# export table to .csv
df.to_csv('gridPolygon.csv', header=True)

In [160]:
# import .csv file
df_new = pd.read_csv('gridPolygon.csv')
df_new.head()

Unnamed: 0,gridId,geometry
0,1,"POLYGON ((-87.9444 41.6387, -87.9255 41.6387, ..."
1,2,"POLYGON ((-87.9255 41.6387, -87.9067 41.6387, ..."
2,3,"POLYGON ((-87.9067 41.6387, -87.8878 41.6387, ..."
3,4,"POLYGON ((-87.8878 41.6387, -87.8689 41.6387, ..."
4,5,"POLYGON ((-87.8689 41.6387, -87.84999999999999..."


In [161]:
# make geoms a valid geometry object
df_new['geometry'] = df_new['geometry'].apply(wkt.loads)

In [163]:
# create a GeoDataFrame from the original data frame
gdf = gpd.GeoDataFrame(df_new, geometry='geometry')

print(gdf.head())

   gridId                                           geometry
0       1  POLYGON ((-87.94440 41.63870, -87.92550 41.638...
1       2  POLYGON ((-87.92550 41.63870, -87.90670 41.638...
2       3  POLYGON ((-87.90670 41.63870, -87.88780 41.638...
3       4  POLYGON ((-87.88780 41.63870, -87.86890 41.638...
4       5  POLYGON ((-87.86890 41.63870, -87.85000 41.638...


In [164]:
# safe GeoDataFrame to shapefile
gdf.to_file(filename='gridGeometry.shp', driver="ESRI Shapefile")