# Import Libraries

In [1]:
#Dependencies
import pandas as pd
import numpy as np
import json
import os
import requests 
import pymongo

# Perform API CALL

In [2]:
#Base URL for grabbing data from City Of Chicago
base_url ="https://data.cityofchicago.org/resource/ijzp-q8t2.geojson"

#Specify the Test URL with limit and offset parameters
#test_url ="https://data.cityofchicago.org/resource/ijzp-q8t2.geojson?$limit=50000&$offset=0&$order=id&$where=year>2017"


### Define query paramerters to pull data from API

In [3]:
offset= 0
limit = 50000 #50000
year =2020

# Set empty lists to hold the combined data
crime_data = []

### Paging through the data using query parameters

In [4]:
#Paging through the data using query parameters - Loop through from pages
for i in range(5):
     query_url =f"{base_url}?$limit={limit}&$offset={offset}&$order=id&year={year}"
     #query_url =f"{base_url}?$limit={limit}&$offset={offset}&$order=id&$where=year=2018"
     #query_url =f"{base_url}?$limit={limit}&$offset={offset}&$order=id&$where=(year>2017&&year<2020)"
     print("Loading data from", query_url)
     response = requests.get(query_url)
     data = response.json()
     #page = requests.get(f"https://data.cityofchicago.org/resource/ijzp-q8t2.geojson?$limit={limit}&$offset={offset}&$order=id&$where=year > 2017")
     offset += limit
     #crime_data += data
     crime_data = crime_data + data['features']
     print("Data Load completed for ", year)

Loading data from https://data.cityofchicago.org/resource/ijzp-q8t2.geojson?$limit=50000&$offset=0&$order=id&year=2020
Data Load completed for  2020
Loading data from https://data.cityofchicago.org/resource/ijzp-q8t2.geojson?$limit=50000&$offset=50000&$order=id&year=2020
Data Load completed for  2020
Loading data from https://data.cityofchicago.org/resource/ijzp-q8t2.geojson?$limit=50000&$offset=100000&$order=id&year=2020
Data Load completed for  2020
Loading data from https://data.cityofchicago.org/resource/ijzp-q8t2.geojson?$limit=50000&$offset=150000&$order=id&year=2020
Data Load completed for  2020
Loading data from https://data.cityofchicago.org/resource/ijzp-q8t2.geojson?$limit=50000&$offset=200000&$order=id&year=2020
Data Load completed for  2020


### Print first crime

In [5]:
# Print first crime
#print(f"The first response is {json.dumps(crime_data[0], indent=2)}.")
crime_data[0]

{'type': 'Feature',
 'geometry': {'type': 'Point', 'coordinates': [-87.561272312, 41.764728045]},
 'properties': {'location_state': '',
  'location_zip': '',
  'x_coordinate': '1194878',
  'domestic': False,
  'latitude': '41.764728045',
  'updated_on': '2020-06-20T15:48:45.000',
  'description': 'FIRST DEGREE MURDER',
  'location_address': '',
  'arrest': True,
  'location_city': '',
  'year': '2020',
  'longitude': '-87.561272312',
  'block': '072XX S SOUTH SHORE DR',
  'fbi_code': '01A',
  'ward': '7',
  'id': '24889',
  'date': '2020-01-02T02:54:00.000',
  'beat': '0334',
  'y_coordinate': '1857803',
  'community_area': '43',
  'location_description': 'APARTMENT',
  'district': '003',
  'iucr': '0110',
  'case_number': 'JD101272',
  'primary_type': 'HOMICIDE'}}

In [6]:
print(f"We received {len(crime_data)} responses.")

We received 211349 responses.


### Data Preprocessing - JSON File

In [7]:
#Validation Columns
crime_data_pd = pd.json_normalize(crime_data)
crime_data_pd.head()
crime_data_pd.columns

Index(['type', 'geometry.type', 'geometry.coordinates',
       'properties.location_state', 'properties.location_zip',
       'properties.x_coordinate', 'properties.domestic', 'properties.latitude',
       'properties.updated_on', 'properties.description',
       'properties.location_address', 'properties.arrest',
       'properties.location_city', 'properties.year', 'properties.longitude',
       'properties.block', 'properties.fbi_code', 'properties.ward',
       'properties.id', 'properties.date', 'properties.beat',
       'properties.y_coordinate', 'properties.community_area',
       'properties.location_description', 'properties.district',
       'properties.iucr', 'properties.case_number', 'properties.primary_type',
       'geometry'],
      dtype='object')

In [8]:
#Test transform pd to json
data_json = json.loads(crime_data_pd.to_json(orient='records'))
data_json[1]

{'type': 'Feature',
 'geometry.type': 'Point',
 'geometry.coordinates': [-87.561272312, 41.764728045],
 'properties.location_state': '',
 'properties.location_zip': '',
 'properties.x_coordinate': '1194878',
 'properties.domestic': False,
 'properties.latitude': '41.764728045',
 'properties.updated_on': '2020-06-20T15:48:45.000',
 'properties.description': 'FIRST DEGREE MURDER',
 'properties.location_address': '',
 'properties.arrest': True,
 'properties.location_city': '',
 'properties.year': '2020',
 'properties.longitude': '-87.561272312',
 'properties.block': '072XX S SOUTH SHORE DR',
 'properties.fbi_code': '01A',
 'properties.ward': '7',
 'properties.id': '24890',
 'properties.date': '2020-01-02T03:17:00.000',
 'properties.beat': '0334',
 'properties.y_coordinate': '1857803',
 'properties.community_area': '43',
 'properties.location_description': 'APARTMENT',
 'properties.district': '003',
 'properties.iucr': '0110',
 'properties.case_number': 'JD101272',
 'properties.primary_typ

In [9]:
# Get rid of unnecessary columns
for property in crime_data:
    properties = (property['properties'])
    del (properties['iucr'])
    del (properties['updated_on'])
    del (properties['fbi_code'])
    del (properties['case_number'])
    del (properties['ward'])   
    del (properties['beat'])
    del (properties['community_area'])
    del (properties['location_zip'])
    del (properties['location_city'])

In [10]:
crime_data[0]

{'type': 'Feature',
 'geometry': {'type': 'Point', 'coordinates': [-87.561272312, 41.764728045]},
 'properties': {'location_state': '',
  'location_zip': '',
  'x_coordinate': '1194878',
  'domestic': False,
  'latitude': '41.764728045',
  'description': 'FIRST DEGREE MURDER',
  'location_address': '',
  'arrest': True,
  'location_city': '',
  'year': '2020',
  'longitude': '-87.561272312',
  'block': '072XX S SOUTH SHORE DR',
  'id': '24889',
  'date': '2020-01-02T02:54:00.000',
  'y_coordinate': '1857803',
  'location_description': 'APARTMENT',
  'primary_type': 'HOMICIDE'}}

### Loading Data to MONGODB  Database

In [11]:
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

# Define the database in Mongo
db = client.chicago_crime

In [12]:
collection = db.events

In [13]:
collection.insert_many(crime_data)    

<pymongo.results.InsertManyResult at 0x2ad58e25cc0>

In [14]:
# query the classroom collection in flask application
#classroom = db.classroom.find()
crime = collection.find()

In [15]:
crime.count()

  crime.count()


211349