In [5]:
import time
import requests
from urllib.parse import urlparse
import sys
import json
import lxml.html
import csv
from utils import make_request, parse_html, make_link_absolute, page_grab
import datetime

In [9]:
def epochtime_yesterdaymidnight():
    """
    Returns epoch time for yesterday
    """
    now = datetime.datetime.now()  # Current date and time in local timezone
    yesterday = now - datetime.timedelta(
        days=1
    )  # Subtract one day from the current date
    midnight_utc = datetime.datetime.combine(
        yesterday.date(), datetime.time(0, 0)
    ).astimezone(datetime.timezone.utc)
    midnight_epoch = int(midnight_utc.timestamp())
    return midnight_epoch


def get_table(
    url="https://incidentreports.uchicago.edu/incidentReportArchive.php?reportDate=1688360400"
):
    """
    This function takes a URL and returns the table from that day.

    Parameters:
        * url:  a URL to a page of parks

    Returns:
        A list of URLs to each park on the page.
    """
    
    inc_dict = dict()
    response = page_grab(url)
    urls = []
    container = response.cssselect("thead")
    categories = container[0].cssselect("th")
    incidents = response.cssselect("tbody")
    incident_rows = incidents[0].cssselect("tr")
    for j in incident_rows:
        if len(j) == 1:
            continue
        id = str(j[6].text)
        if id == "None":
            continue
        inc_dict[id] = dict()
        for i in range(len(categories) - 1):
            inc_dict[id][str(categories[i].text)] = j[i].text

    # track page number, as offset will take you back to zero
    pages = response.cssselect("span.page-link")
    slash_index = pages[0].text.find("/")
    if slash_index != -1:
        pagenumber = int(pages[0].text[: slash_index - 1])
    else:
        pagenumber = 0
    return inc_dict, pagenumber


def get_yesterday():
    """
    Returns Yesterdays UCPD Crime reports
    """
    yesterday = epochtime_yesterdaymidnight()
    return get_table(
        url="https://incidentreports.uchicago.edu/incidentReportArchive.php?reportDate="
        + str(yesterday)
    )

get_yesterday()

Fetching https://incidentreports.uchicago.edu/incidentReportArchive.php?reportDate=1688446800


({'2023-023552': {'Incident': 'Information / Aggravated Battery',
   'Location': '1320 E. Hyde Park (Apartment Building)',
   'Reported': '7/4/23 2:51 AM',
   'Occurred': '7/4/23 2:50 AM',
   'Comments / Nature of Fire': 'Two subjects struck each other with blunt objects and both transported to UCMED by CFD EMS for evaluation and treatment / CPD case JG326847',
   'Disposition': 'CPD'},
  '23-00637': {'Incident': 'Found Narcotics',
   'Location': '5815 S. Maryland (Mitchell Hospital)',
   'Reported': '7/4/23 10:02 AM',
   'Occurred': '7/4/23 10:00 AM',
   'Comments / Nature of Fire': "UCM staff found suspect narcotics in patient's room / Contraband turned over to UCPD for proper disposal",
   'Disposition': 'Closed'},
  '23-00638': {'Incident': 'Found Property',
   'Location': '1228 E. 53rd St. (Public Way)',
   'Reported': '7/4/23 12:45 PM',
   'Occurred': '7/4/23 12:45 PM',
   'Comments / Nature of Fire': "Driver's license, ID and credit cards found and turned over to UCPD for safeke

In [None]:
# We can't recurse because we would go over the limit
initialurl = "https://incidentreports.uchicago.edu/incidentReportArchive.php?startDate=1293861600&endDate=1688274000&offset=0"


def get_alltables(initialurl):
    """
    Goes through all queried tables until we offset back to the first table.
    inputs:
    initialurl- a url containing all the queried days in question
    output:
    json of dictionary of dictionaries of incidents. Keys of the outer dictionary
    are
    """
    pagenumber = 100000000
    incid_dict, _ = get_table(url=initialurl)

    # find starting offset
    offset_index = int(initialurl.find("offset="))
    offset = int(initialurl[offset_index + 7 :]) + 5

    # loop until you offset to the start of query
    while pagenumber != 1:
        rev_dict, pagenumber = get_table(
            url="https://incidentreports.uchicago.edu/incidentReportArchive.php?startDate=1293861600&endDate=1688274000&offset="
            + str(offset)
        )
        if pagenumber == 1:
            break
        incid_dict.update(rev_dict)
        offset += 5
    fullreport = str(incid_dict)
    return fullreport


def export_string(jsonString):
    """
    Save JSON String to File
    """
    try:
        with open("output.json", "w") as file:
            file.write(jsonString)
        print("JSON has been saved to file successfully.")
    except IOError as e:
        print("Error writing JSON to file:", str(e))


j = get_alltables(initialurl)
export_string(j)

Fetching https://incidentreports.uchicago.edu/incidentReportArchive.php?startDate=1293861600&endDate=1688274000&offset=0
Fetching https://incidentreports.uchicago.edu/incidentReportArchive.php?startDate=1293861600&endDate=1688274000&offset=5
Fetching https://incidentreports.uchicago.edu/incidentReportArchive.php?startDate=1293861600&endDate=1688274000&offset=10
Fetching https://incidentreports.uchicago.edu/incidentReportArchive.php?startDate=1293861600&endDate=1688274000&offset=15
Fetching https://incidentreports.uchicago.edu/incidentReportArchive.php?startDate=1293861600&endDate=1688274000&offset=20
Fetching https://incidentreports.uchicago.edu/incidentReportArchive.php?startDate=1293861600&endDate=1688274000&offset=25
Fetching https://incidentreports.uchicago.edu/incidentReportArchive.php?startDate=1293861600&endDate=1688274000&offset=30
Fetching https://incidentreports.uchicago.edu/incidentReportArchive.php?startDate=1293861600&endDate=1688274000&offset=35
Fetching https://incidentr