In [36]:
import os

import pandas as pd
import numpy as np

import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
from sqlalchemy import func # library to use aggregate functions

# libraries to read json
import json
import requests
from pandas.io.json import json_normalize

#from flask import Flask, jsonify, render_template
#from flask_sqlalchemy import SQLAlchemy

In [50]:
# Declare global variables
dbPath = "trafficViolations/static/db"

dbName = "trafficViolations.sqlite"

# GEOJSON filename
geojson_fname = "geoLoc.json"


#Police District JSON link for district Polygon Coordinates
police_dist_URL = "https://data.montgomerycountymd.gov/resource/vxy6-ve2e.json"

In [47]:
################### HELPER FUNCTIONS
def readJSON(url):
    # Extract JSON though requests.get()
    try:
        resp = requests.get(url)

        #check if the status code is other 200 (ie. not successful request)
        if(resp.status_code != 200):
            raise HTTPError

        # extract the JSON data
        return resp.json()

    except ConnectionError as c:
        raise ("Error in Connection :" + e)

    except HTTPError as h:
        raise ("Unsuccessful in obtaining JSON : " + h)
        

# function that constructs the feature details for GEOJSON
def genFeatureDict(info):
    
    f = {
        "type" : "Feature",
        "geometry":{
            "type": info['the_geom.type'],
            "coordinates": info['the_geom.coordinates']
        },
        "properties" : {
            "name" : info['SubAgency'],
            "distID" : info['PoliceDistrictID'],
            "total_traffic_violations" : info['TotalViolations']
        }
    }
    
    return f
    

In [16]:
# create the connection to SQLite db
eng = create_engine(f"sqlite:///{dbPath}/{dbName}")

In [17]:
# reflect an existing database into a new model
Base  = automap_base()

#prepare and reflect all tables wih data
Base.prepare(eng, reflect = True)

In [18]:
print(Base.classes.keys())

['traffic_violations']


In [22]:
V = Base.classes['traffic_violations']

In [21]:
# create session
session = Session(bind = eng)

In [31]:
# query the table and obtain the results for Total violations count at the Police district level

resDF = pd.DataFrame(session.query(V.SubAgency, V.PoliceDistrictID, func.sum(V.ViolationCount)).\
                group_by(V.SubAgency, V.PoliceDistrictID).all(), columns = ['SubAgency','PoliceDistrictID','TotalViolations'])

In [39]:
print(resDF.dtypes)
resDF

SubAgency           object
PoliceDistrictID     int64
TotalViolations      int64
dtype: object


Unnamed: 0,SubAgency,PoliceDistrictID,TotalViolations
0,"1st district, Rockville",1,108585
1,"2nd district, Bethesda",2,132846
2,"3rd district, Silver Spring",3,195723
3,"4th district, Wheaton",4,226657
4,"5th district, Germantown",5,98148
5,"6th district, Gaithersburg / Montgomery Village",6,125278
6,Headquarters and Special Operations,8,31595


In [40]:
# Request JSON file containing Police district's coordinates
resp = readJSON(police_dist_URL)

coordsDF = json_normalize(resp)

coordsDF.head()

Unnamed: 0,dist,objectid,shape_area,shape_len,the_geom.coordinates,the_geom.type
0,2,2,1565751500.03924,248658.778723035,"[[[[-77.1457618498, 39.037860378201], [-77.145...",MultiPolygon
1,1,1,4182813700.92157,453658.955673688,"[[[[-77.187567669146, 39.110811579485], [-77.1...",MultiPolygon
2,4,4,2313490108.83212,418565.068792495,"[[[[-77.115896846336, 39.126723839894], [-77.1...",MultiPolygon
3,6,6,1133619164.7974,337359.39232825703,"[[[[-77.246867924553, 39.175370078619], [-77.2...",MultiPolygon
4,8,7,58130070.0200888,44856.0606935784,"[[[[-77.001499589417, 38.989799321823], [-77.0...",MultiPolygon


In [41]:
#drop cols not necessary
# only select the columns needed from the JSON on Police district
coordsDF = coordsDF[['dist', 'the_geom.coordinates','the_geom.type']]

coordsDF.rename(columns = {"dist" : "PoliceDistrictID"}, inplace = True)

coordsDF.PoliceDistrictID = coordsDF.PoliceDistrictID.astype(int)

In [42]:
# Merge SQLIte result DF and Coordinates DF
coordsDF = pd.merge(resDF, coordsDF, on="PoliceDistrictID", how = "inner")

coordsDF

Unnamed: 0,SubAgency,PoliceDistrictID,TotalViolations,the_geom.coordinates,the_geom.type
0,"1st district, Rockville",1,108585,"[[[[-77.187567669146, 39.110811579485], [-77.1...",MultiPolygon
1,"2nd district, Bethesda",2,132846,"[[[[-77.1457618498, 39.037860378201], [-77.145...",MultiPolygon
2,"3rd district, Silver Spring",3,195723,"[[[[-77.05481528585, 39.013802780305], [-77.05...",MultiPolygon
3,"4th district, Wheaton",4,226657,"[[[[-77.115896846336, 39.126723839894], [-77.1...",MultiPolygon
4,"5th district, Germantown",5,98148,"[[[[-77.255447065893, 39.313587822258], [-77.2...",MultiPolygon
5,"6th district, Gaithersburg / Montgomery Village",6,125278,"[[[[-77.246867924553, 39.175370078619], [-77.2...",MultiPolygon
6,Headquarters and Special Operations,8,31595,"[[[[-77.001499589417, 38.989799321823], [-77.0...",MultiPolygon


In [44]:
#covert all values to string for easy json creation
coordsDF[['PoliceDistrictID','TotalViolations']] = coordsDF[['PoliceDistrictID','TotalViolations']] .astype(str)


coordsDF.dtypes

SubAgency               object
PoliceDistrictID        object
TotalViolations         object
the_geom.coordinates    object
the_geom.type           object
dtype: object

In [48]:
# declare default structure of geoJSON
map_geojson = {
    "type": "FeatureCollection",
    "features" : []
}


#construct the geoJSON features
for i in range(0,coordsDF.index.size):
    map_geojson['features'].append(genFeatureDict(coordsDF.iloc[i,]))

In [51]:
# write file to disk
with open(os.path.join(dbPath,geojson_fname), "w") as write_file:
    json.dump(map_geojson, write_file)