In [8]:
#Import packages and secrets
import sys
import requests
import pandas as pd
import json
import boto3
import botocore
import pyarrow
import os
import snowflake.connector
from snowflake.connector.pandas_tools import write_pandas
from datetime import datetime
from config import api_key_secret, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY
from aws_secretsmanager_caching import SecretCache, SecretCacheConfig 

############################################################################
###### SET API TARGET, PARAMETERS, DESTINATION BUCKET AND LOGGING ##########
############################################################################

# Set API Target, Parameters and number of api calls
api_name = 'WAPolice'
base_url = "https://www.police.wa.gov.au/apiws/CrimeStatsApi/GetLocalityCrimeStats/"
num_api_calls = 2 #API limited to 500 per day

# Set Destination S3 bucket variables
s3 = boto3.client('s3')
jsonbucket = 'sbx-wapolice-injest-json'

# Create empty list of lists to capture details for each successful loop run
loop_log = []

############################################################################
############### GET LIST OF SEEDS FROM SNOWFLAKE ##############
############################################################################

# Get secrets from AWS Secrets Manager
client = botocore.session.get_session().create_client('secretsmanager')
cache_config = SecretCacheConfig()
cache = SecretCache( config = cache_config, client = client)
secrets = cache.get_secret_string('snowflake-creds')
secrets = (json.loads(secrets)) #Put secrets into a dictionary

# Initialize connection to Snowflake
ctx = snowflake.connector.connect(
    user='NICKLILLEYMAN',
    password=(secrets['password']),
    account=(secrets['account']),
    region =(secrets['region']),
    warehouse=(secrets['warehouse']),
    database='SBX_RAW',
    schema='PUBLIC'
    )

# Create a cursor object.
cur = ctx.cursor()

# Execute SQL to get list of seeds from dim_suburb_geography, minus any seeds already executed in previous runs (see api_call_log table)
seed_table = "SBX_ANALYTICS.DBT_NLILLEYMAN_COMMON.DIM_SUBURB_GEOGRAPHY"
sql = ("""
SELECT
    DIM_SUBURB_SK
    ,SUBURB_ID
    ,SUBURB
    ,POSTCODE
    ,STATE
FROM %(seed_table)s
WHERE
    STATE = 'WA'
    AND  CONCAT('%(api_name)s','-',DIM_SUBURB_SK) NOT IN (SELECT SEED_KEY FROM SBX_RAW.PUBLIC.API_CALL_LOG)
  --AND SUBURB in ('Willetton','Brentwood','Harrisdale','Leeming')
ORDER BY DIM_SUBURB_SK
""" % {"seed_table": seed_table,"api_name": api_name})
cur.execute(sql)
if cur.rowcount == 0:
    print("No seeds to process")

# Put query results into a dataframe and restrict dataframe to limit API calls
df = cur.fetch_pandas_all()
df['api_call_target_id'] = df.reset_index().index #Create row number for iterator
start_iterating = 0
stop_iterating = start_iterating + num_api_calls
df = df.loc[df["api_call_target_id"].between(start_iterating,stop_iterating)] #restict dataframe

#Convert dataframe to lists
api_call_target_ids = ((df["api_call_target_id"]).astype(str).tolist())  #convert df to string and then convert to list
dim_suburb_sks = (df["DIM_SUBURB_SK"]).tolist()
states = (df["STATE"]).tolist()
suburbs = (df["SUBURB"]).tolist()
postcodes = ((df["POSTCODE"]).astype(str).tolist()) #convert df to string and then convert to list


for api_call_target_id,state,suburb,postcode,dim_suburb_sk in zip(api_call_target_ids,states,suburbs,postcodes,dim_suburb_sks):    
    try:
        #Define metadata variables
        api_call_datetime = datetime.now()
        seed_key = api_name+'-'+dim_suburb_sk
        full_url = base_url+str(suburb)
        
        api_status_code = 200
        api_status_reason = 'Success'

        #
        response = requests.get(full_url)
        data = response.json()

        #Define filename
        file_name = state+"_"+suburb+"_"+postcode+"_"+str(api_status_code)+"_"+api_name
        #dump data to json file
        with open(file_name+".json", 'w') as f:
            json.dump(data, f)
        
        #Upload json file to S3
        s3.upload_file(file_name+".json", jsonbucket, file_name+".json")

        #Upload json response into S3 bucket
        #s3.put_object(Key=file_name+".json",Body=json.dumps(response), Bucket=jsonbucket)
        #os.remove(file_name+".json")

        #Set loop outcome to Success
        loop_outcome = "Success"
        #Logging - append each loop to list of lists
        loop_log.append([api_name,seed_table,seed_key,full_url,api_status_code,api_status_reason,loop_outcome,api_call_datetime])            
    except:
        loop_outcome = "Error"
        loop_log.append([api_name,seed_table,seed_key,full_url,api_status_code,api_status_reason,loop_outcome,api_call_datetime])            

#Convert loop_log to dataframe and adjust datatypes
loop_log_df = pd.DataFrame(loop_log, columns = ['API_NAME', 'SEED_TABLE', 'SEED_KEY', 'TARGET_URL', 'API_STATUS_CODE', 'API_STATUS_REASON','LOOP_OUTCOME','API_CALL_DATETIME'])
loop_log_df['API_CALL_DATETIME'] = (loop_log_df["API_CALL_DATETIME"]).astype(str)

#Write loop_log_df to Snowflake API_CALL_LOG table
success, nchunks, nrows, _ = write_pandas(ctx, loop_log_df, 'API_CALL_LOG')
print(loop_log_df)

   API_NAME                                         SEED_TABLE  \
0  WAPolice  SBX_ANALYTICS.DBT_NLILLEYMAN_COMMON.DIM_SUBURB...   
1  WAPolice  SBX_ANALYTICS.DBT_NLILLEYMAN_COMMON.DIM_SUBURB...   
2  WAPolice  SBX_ANALYTICS.DBT_NLILLEYMAN_COMMON.DIM_SUBURB...   

                                    SEED_KEY  \
0  WAPolice-0185201a4c039520f12a57066d489b88   
1  WAPolice-01a2834048b36cccd552a0feccaf2cde   
2  WAPolice-01a4e651942fe19a7aaf7b6f67ff47dc   

                                          TARGET_URL  API_STATUS_CODE  \
0  https://www.police.wa.gov.au/apiws/CrimeStatsA...              200   
1  https://www.police.wa.gov.au/apiws/CrimeStatsA...              200   
2  https://www.police.wa.gov.au/apiws/CrimeStatsA...              200   

  API_STATUS_REASON LOOP_OUTCOME           API_CALL_DATETIME  
0           Success      Success  2022-10-15 18:35:13.937358  
1           Success      Success  2022-10-15 18:35:17.728871  
2           Success      Success  2022-10-15 18:35:19.877414

In [5]:
############################################################################
###### SET API TARGET, PARAMETERS, DESTINATION BUCKET AND LOGGING ##########
############################################################################


open_data_url = "https://www.police.wa.gov.au/apiws/CrimeStatsApi/GetLocalityCrimeStats/WILLETTON"

#save open_data_url as json
response = requests.get(open_data_url)
data = response.json()

dog = 'emily'

#dump data to json file
with open(dog, 'w') as f:
    json.dump(data, f)