In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

<h1 style="text-align: center;">Data gathering</h1>

<h2>1. Detroit neighborhoods info and polygons</h2>

In [None]:
import json
# Detroit neighborhoods GEOJSON API
url = "https://services2.arcgis.com/qvkbeam7Wirps6zC/arcgis/rest/services/Neighborhoods_032015[shape]/FeatureServer/0/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryPolygon&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields=*&returnGeometry=true&returnCentroid=false&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pgeojson&token="
# Detroit neighborhoods polygons
neighbors = requests.get(url).json()
# Write json to file to be used later when making geo-heatmap
with open('data/raw/detroit_geo.json', 'w') as outfile:
    json.dump(neighbors, outfile)

# convert to dataframe
d = pd.DataFrame.from_dict(neighbors["features"])
polys = d["geometry"]
temp = []
for i in range(d.shape[0]):
    temp.append(pd.DataFrame.from_dict(d["properties"][i], orient="index").transpose())
df = pd.concat(temp, axis=0, ignore_index=True)
df = pd.concat([df, polys], axis=1)
print(df.shape)
# write neighborhood information to csv file
df.to_csv("data/raw/neighborhoods.csv", mode="w", header=True, index=False)

<hr style="border: 0.5px dashed;">
<h2>2. Detroit 911 calls</h2>

<h3>2.1 Data since Sep 20, 2016</h3>

In [None]:
import urllib.request
print("Beginning file download...")
# Detroit city 911 calls since Sep 20, 2016
url = "https://opendata.arcgis.com/datasets/4f49eb825f564efa9a23cd103c4ba13b_0.csv"
urllib.request.urlretrieve(url, f"data/raw/Detroit_911_calls/911_Calls_For_Service.csv")

In [None]:
%%bash # Use awk to select only data from year 2020.
head -1 data/raw/Detroit_911_calls/911_Calls_For_Service.csv > data/raw/Detroit_911_calls/911_Calls_2020.csv # keep only column labels
awk -F"," '$11~2020' data/raw/Detroit_911_calls/911_Calls_For_Service.csv >> data/raw/Detroit_911_calls/911_Calls_2020_file0.csv # add to file.

<h3>2.2 Data last 30 days</h3>

In [1]:
import urllib.request
print("Beginning file download...")
# file increment
file_num = 4
# Detroit city 911 calls last 30 days
url = "https://opendata.arcgis.com/datasets/2901fec24266445588b4a3bf67098886_0.csv"
# Save csv to local machine
urllib.request.urlretrieve(url, f"data/raw/Detroit_911_calls/911_Calls_2020_file{file_num}.csv")

Beginning file download...


('data/raw/Detroit_911_calls/911_Calls_2020_file4.csv',
 <http.client.HTTPMessage at 0x7fae25882d90>)

<hr style="border: 0.5px dashed;">
<h2>3. Gather tweets</h2>
<b>NOTE: Twitter API</b>
<ul>
    <li>Time stamp: Twitter time stamp in GMT</li>
    <li>Radius: "mi" or "km". Maximum 25mi.</li>
</ul>
<b>Current issues:</b>
<ul>
    <li>2020-09-23</li>
    <ul>
        <li>While can search tweets comming from around a geo-coordinate. Twitter will fall back to user's profile coordinate if tweets' geotag is not found and not enabled. However strangely, when I queried at (42.437298,-82.951111) Detroit, I got tweets by user_id=1308885595586953222, who profile is in England.</li>
    </ul>
</ul>
<b>Tweets processing</b>
<ul>
    <li>Select only geo-tagged tweet</li>
    <li>Remove tweets related to advertising (e.g. jobs, traffic updates, internet bot). DON'T Know how to do this yet.</li>
</ul>

In [2]:
# CREATE THE QUERY OBJECT
# Import custom utility package
import utils
# Import personal Twitter API secrets
from keys import my_api_secrets
# List of gps coordinates (radius=1 mile) covering Detroit
# This is used for Twitter REST API
coords = utils.Detroit_gps.coords_1mile
r = "1mi"
# Detroit bounding box. Used for Twitter stream API
box = utils.Detroit_gps.box
# **CSV FILE INCREMENT**
num = 6

# Create tweet query object
# NOTE: Twitter_query object will write csv files into
# directory "data/raw/tweets/"
detroit_tweets = utils.Twitter_query.Twq(my_api_secrets.twitter_secrets, coords, r, box, num)

In [None]:
# If need to reimport the utils package
#import importlib
#importlib.reload(utils)

In [3]:
# METHOD 1: REST API, search (pull) request.
# Run search once
detroit_tweets.search()
# Schedule search for automatic run in future
#detroit_tweets.repeated_search(interval=1) #every 1hour

Local time: 2020-11-25	10:4
Tweets: 2471	Users: 2471	Places: 1861
--------------------------------------------------


In [None]:
# METHOD 2: LIVE STREAM API, twitter push request.
# start stream
detroit_tweets.start_stream()

In [None]:
# stop stream
detroit_tweets.stop_stream()

In [None]:
detroit_tweets.search_api.rate_limit_status()