In [27]:
import gzip
import os
import requests
import urllib
from helper_functions import create_session

In [2]:
session = create_session()
s3 = session.client('s3')

In [3]:
files = {
    'https://covid19-lake.s3.us-east-2.amazonaws.com/enigma-jhu/csv/Enigma-JHU.csv.gz': 'enigma-jhu.csv',  # 
    'https://covid19-lake.s3.us-east-2.amazonaws.com/enigma-nytimes-data-in-usa/csv/us_county/us_county.csv': 'enigma-nyt-usa-counties.csv',  # NYT Data US Counties
    'https://covid19-lake.s3.us-east-2.amazonaws.com/enigma-nytimes-data-in-usa/csv/us_states/us_states.csv': 'enigma-nyt-usa-states.csv',  # NYT Data US States
    'https://covid19-lake.s3.us-east-2.amazonaws.com/rearc-covid-19-testing-data/csv/states_daily/states_daily.csv': 'rearc-states-daily-test.csv',  # Rearc USA States Daily Testing
    'https://covid19-lake.s3.us-east-2.amazonaws.com/rearc-covid-19-testing-data/csv/us-total-latest/us.csv': 'rearc-usa-latest-total.csv',  # Rearc USA Total Latest Testing
    'https://covid19-lake.s3.us-east-2.amazonaws.com/rearc-covid-19-testing-data/csv/us_daily/us_daily.csv': 'rearc-usa-daily-test.csv',  # Rearc USA Daily Testing 
    'https://covid19-lake.s3.us-east-2.amazonaws.com/rearc-usa-hospital-beds/json/usa-hospital-beds.geojson': 'rearc-usa-hospital-beds.json',  # Rearc USA Hospital Beds JSON
    'https://covid19-lake.s3.us-east-2.amazonaws.com/static-datasets/csv/countrycode/CountryCodeQS.csv': 'static-country-codes.csv',  # Static Dataset Country Code
    'https://covid19-lake.s3.us-east-2.amazonaws.com/static-datasets/csv/CountyPopulation/County_Population.csv': 'static-county-codes.csv',  # Static Dataset County Population
    'https://covid19-lake.s3.us-east-2.amazonaws.com/static-datasets/csv/state-abv/states_abv.csv': 'static-state-codes.csv',  # Static Dataset State Abbreviation
}

In [None]:
# Deleting files if necessary
for file in files:
    res = s3.delete_object(
        Bucket='kc-covid-project',
        Key=files[file].split('.')[0]
    )
    print(f'Status Code: {res["ResponseMetadata"]["HTTPStatusCode"]}')

In [None]:
# Downloading files to data directory
for file_key in files:
    res = requests.get(file_key)
    print(f'File: {file_key}')
    print(f'Status Code: {res}')
    if file_key.endswith('.csv'):
        with open(f'./data/{files[file_key]}', 'wb') as reader:
            reader.write(res.content)
    else:
        with urllib.request.urlopen(file_key) as res:
            with gzip.GzipFile(fileobj=res) as uncompressed:
                file_content = uncompressed.read()
        with open(f'./data/{files[file_key]}', 'wb') as f:
            f.write(file_content)
    # Write files to S3 as objects
    name = files[file_key].split(".")[0]
    s3.put_object(
        Bucket="kc-covid-project",
        Key=f'raw/{name}/{name}',
        Body=res.content
    )

In [14]:
# Print files to check data directory
for file in os.listdir('./data/'):
    print(file)

enigma-jhu.csv
enigma-nyt-usa-states.csv
enigma-usa-counties.csv
rearc-usa-daily-test.csv
rearc-usa-daily-testing.csv
rearc-usa-hospital-beds.json
rearc-usa-latest-total.csv
static-country-codes.csv
static-county-codes.csv
static-state-codes.csv
