In [88]:
import re
import datetime as dt
import logging
from urllib import request
from urllib.error import HTTPError, URLError

import pandas as pd
from bs4 import BeautifulSoup as bs

import boto3
from botocore.exceptions import ClientError

def upload_file(file_name, bucket, object_name=None, ExtraArgs={'ACL': 'public-read'}):
    """Upload a file to an S3 bucket

    :param file_name: File to upload
    :param bucket: Bucket to upload to
    :param object_name: S3 object name. If not specified then file_name is used
    :return: True if file was uploaded, else False
    """

    # If S3 object_name was not specified, use file_name
    if object_name is None:
        object_name = file_name

    # Upload the file
    s3_client = boto3.client('s3')
    try:
        response = s3_client.upload_file(file_name, bucket, object_name)
    except ClientError as e:
        logging.error(e)
        return False
    return True

YESTERDAY = dt.datetime.now() + dt.timedelta(days = -1)
MONTHDAYYEAR = "%m-%d-%Y"

county = re.compile(r'^(\w*[\s\w]*)\sCOUNTY.*')
town = re.compile(r'.\s?(\w*):\s(\d*).*')
deaths = re.compile(
            r'.*\w*:\s?.*(?:with)?\s(\d*)\s(?:death|fatalitie|fatality|who died)s?.*',
            re.IGNORECASE
        )
recovered = re.compile(
            r'.*(\d+)\s(?:cleared from quarantine|who recovered'\
            +r'|have recovered|recovered).*',
            re.IGNORECASE
            )

zipcode_url = 'https://www.zipcodestogo.com/New%20Jersey/'
url = f"https://www.nj.com/coronavirus/{YESTERDAY.strftime('%Y')}/{YESTERDAY.strftime('%m')}/"+\
        f"where-is-the-coronavirus-in-nj-latest-map-update-on-county-by-county-cases-"+\
        f"{YESTERDAY.strftime('%B').lower()}-{YESTERDAY.strftime('%-m')}-{YESTERDAY.strftime('%Y')}.html"

try:
    html = bs(request.urlopen(url))
    print(f'Date: {YESTERDAY.strftime(MONTHDAYYEAR)}\nSuccessfully opened {url}')
except HTTPError as e:
    print(f'Date: {YESTERDAY.strftime(MONTHDAYYEAR)}\nError trying to open {url}:\n{e}')
except URLError as e:
    print(f'Date: {YESTERDAY.strftime(MONTHDAYYEAR)}\nError trying to open {url}:\n{e}')

try:
    zips = pd.read_html(
            zipcode_url,
            skiprows=0,
            header=1,
            converters={'Zip Code':str}
            )[0].iloc[:, :-1]
    print(f'Date: {YESTERDAY.strftime(MONTHDAYYEAR)}\nSuccessfully opened {zipcode_url}')
except HTTPError as e:
    print(f'Date: {YESTERDAY.strftime(MONTHDAYYEAR)}\nError trying to open {zipcode_url}:\n{e}')
except URLError as e:
    print(f'Date: {YESTERDAY.strftime(MONTHDAYYEAR)}\nError trying to open {zipcode_url}:\n{e}')

In [61]:
current_county = ''
towns = []

for p in html.findAll('p'):
    if county.match(p.text):
        current_county = county.match(p.text).group(1).title()
        print(current_county)
    if town.match(p.text):
        town_name = town.match(p.text).group(1)
        town_ct = int(town.match(p.text).group(2))
        death_ct = 0
        recovered_ct = 0
        if deaths.match(p.text):
            death_ct = deaths.match(p.text).group(1)
            print(p.text)
        if recovered.match(p.text):
            recovered_ct = recovered.match(p.text).group(1)
            print(p.text)
        towns.append(
                    [current_county,
                     town_name,
                     town_ct,
                     death_ct,
                     recovered_ct
                    ])

towns = pd.DataFrame(towns,
                     columns=['County', 'City', 'Cases',
                              'Deaths', 'Recoveries']
                    )
print(towns.head())

zips = pd.read_csv('s3://athenedyne-covid-19/NJzips.csv',
                   converters = {'Zip Code': str})
print(zips.head())

output = towns.merge(zips, how='left', left_on=['City','County'], right_on=['City','County'])

print(output.head())

output.to_csv(f'{YESTERDAY.strftime(MONTHDAYYEAR)}-complete.csv')
output[['Zip Code', 'City', 'Cases']].to_csv(f'{YESTERDAY.strftime(MONTHDAYYEAR)}-cases.csv')
output.groupby('Zip Code').sum().to_csv(f'{YESTERDAY.strftime(MONTHDAYYEAR)}-zips.csv')

upload_file(f'{YESTERDAY.strftime(MONTHDAYYEAR)}-complete.csv',
            'athenedyne-covid-19')
            
upload_file(f'{YESTERDAY.strftime(MONTHDAYYEAR)}-cases.csv',
           'athenedyne-covid-19')

upload_file(f'{YESTERDAY.strftime(MONTHDAYYEAR)}-zips.csv',
           'athenedyne-covid-19')

Atlantic
Bergen
Bergen Allendale 21
Bergen Alpine 11
Bergen Bergenfield 252
Bergen Bogota 65
Bergen Carlstadt 28
Bergen Closter 25
Bergen Cresskill 33
Bergen Demarest 19
Bergen Dumont 91
Bergen Edgewater 52
Bergen Emerson 29
Bergen Englewood 229
Bergen Fairview 72
Bergen Garfield 149
Bergen Hackensack 304
Bergen Haworth 10
Bergen Hillsdale 41
Bergen Leonia 39
Bergen Lodi 152
Bergen Lyndhurst 87
Bergen Mahwah 79
Bergen Maywood 40
Bergen Montvale 34
Bergen Moonachie 23
Bergen Northvale 16
Bergen Norwood 25
Bergen Oakland 58
Bergen Oradell 48
Bergen Paramus 148
Bergen Ramsey 50
Bergen Ridgefield 49
Bergen Ridgewood 103
Bergen Rockleigh 2
Bergen Rutherford 49
Bergen Teaneck 421
Bergen Tenafly 50
Bergen Teterboro 1
Bergen Waldwick 45
Bergen Wallington 35
Bergen Westwood 46
Bergen Wyckoff 74
Burlington
Burlington Beverly 4
Burlington Chesterfield 1
• Chesterfield: 1 (1 cleared from quarantine)
Burlington Cinnaminson 6
• Cinnaminson: 6 (and 1 death)
Burlington Delanco 4
Burlington Delran 17
B