In [51]:
#Import packages and secrets
import sys
import requests
import pandas as pd
import json
import boto3
import botocore
import pyarrow
import snowflake.connector
from datetime import datetime
from config import api_key_secret, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY
from aws_secretsmanager_caching import SecretCache, SecretCacheConfig 


############### SET API TARGET, PARAMETERS, DESTINATION BUCKET AND LOGGING ###############

#Set API Target and Parameters
base_url = "https://api.domain.com.au"
version = "v2"
year = "2016"
api_types = "suburbPerformanceStatistics"
property_types = ["house"]
period_size = 'Quarters'
total_periods = '100'
#Authentication
header = {"X-API-Key" : api_key_secret}

#Set Destination S3 bucket variables
s3 = boto3.client('s3')
jsonbucket = 'sbx-apidomainonline-injest-json'
parquetbucket = 'sbx-apidomainonline-injest-parquet'

#Create empty list of lists to capture details for each successful loop run
loop_run_log = []

############### GET LIST OF SEEDS FOR API CALL FROM SNOWFLAKE ##############

# Get secrets from AWS Secrets Manager
client = botocore.session.get_session().create_client('secretsmanager')
cache_config = SecretCacheConfig()
cache = SecretCache( config = cache_config, client = client)
secrets = cache.get_secret_string('snowflake-creds')
secrets = (json.loads(secrets)) #Put secrets into a dictionary

# Initialize connection to Snowflake
ctx = snowflake.connector.connect(
    user='NICKLILLEYMAN',
    password=(secrets['password']),
    account=(secrets['account']),
    region =(secrets['region']),
    warehouse=(secrets['warehouse']),
    database='SBX_RAW',
    schema='PUBLIC'
    )

# Create a cursor object.
cur = ctx.cursor()

# Execute a statement that will generate a result set.
sql = ("""
SELECT
     DIM_SUBURB_SK
    ,SUBURB_ID
    ,SUBURB
    ,POSTCODE
    ,STATE
FROM SBX_ANALYTICS.DBT_NLILLEYMAN_COMMON.DIM_SUBURB_GEOGRAPHY
WHERE
     STATE = 'WA'
AND  DIM_SUBURB_SK NOT IN (SELECT SEED_KEY FROM SBX_RAW.PUBLIC.API_CALL_LOG)
AND SUBURB = 'Willetton'
AND POSTCODE = '6955'

ORDER BY DIM_SUBURB_SK
 """)
cur.execute(sql)

#Put query results into a dataframe and then into lists
df = cur.fetch_pandas_all()
df['api_call_target_id'] = df.reset_index().index
api_call_target_ids = ((df["api_call_target_id"]).astype(str).tolist())  #convert df to string and then convert to list
states = (df["STATE"]).tolist()
suburbs = (df["SUBURB"]).tolist()
postcodes = ((df["POSTCODE"]).astype(str).tolist()) #convert df to string and then convert to list
#api_types = (df["api_type"]).tolist()
api_types = "suburbPerformanceStatistics"
print(df)

#Set API call targets
#API calls limited to 500 a day, need to iterate through API call targets day by day
#ToDo: Get start and stop from a logging table
start_iterating = 0
stop_iterating = start_iterating + 20

#import api_call_target_id, state, suburb and postcodes into dataframe
df = df.loc[df["api_call_target_id"].between(start_iterating,stop_iterating)] #filter by api_call_target_id
print(df)

############################################################################
############### LOOP THROUGH SEEDS AND CALL API FOR EACH SEED ##############
############################################################################

#Loop through each item in lists, construct request URL, dump json response and errors to directory 
#Loop through each item in lists, construct request URL, dump json response and errors to directory 
for i in property_types:
    property_types = i
    for api_call_target_id, state, suburb, postcode in zip(api_call_target_ids, states, suburbs, postcodes):    
        #Define metadata variables
        looprundatetime = datetime.now()
        #full_url = base_url + "/" + version + "/" + api_type + "/" + str(state) + "/" + str(suburb) + "/" + str(postcode) + "?types=" + str(api_sub_type)  + "=" + "&year" + str(year)
        #full_url = base_url + "/" + version + "/" + api_type + "/" + str(state) + "/" + str(suburb) + "/" + str(postcode)
        full_url = base_url+"/"+version+"/"+api_types+"/"+str(state)+"/"+str(suburb)+"/"+str(postcode)+"?propertyCategory="+str(property_types)+"&periodSize="+str(period_size)+"&totalPeriods="+str(total_periods)
        print(full_url)

        #Call API and put errors and responses into variables
        response = requests.get(full_url, headers=header)
        api_status_code, api_status_reason = response.status_code, response.reason #Get API call status & reasons for error
        print(api_status_code)
        response = response.json()
        print(response)
        

        #Define filename
        file_name = state+"_"+suburb+"_"+postcode+"_"+year+"_"+str(api_status_code)+"_"+api_types+"_"+property_types
        
        #Convert json response to parquet file
        df = pd.DataFrame(response)
        print(df)
        df.to_parquet(file_name+".parquet")
        
        #Upload parquet files into S3 bucket
        s3.upload_file(file_name+".parquet", parquetbucket, file_name+".parquet")
        #os.remove(file_name+".parquet")
        
        #Upload json response into S3 bucket
        df.to_json(file_name+".json")
        #s3.put_object(Key=file_name+".json",Body=json.dumps(response), Bucket=jsonbucket)
        
        #Logging - append each loop to list of lists
     #   loop_run_log.append([api_call_target_id, state, suburb, postcode, api_type, api_status_code, api_status_reason, looprundatetime])
#Get loop_run_log results in a dataframe
df_loop_run_log = pd.DataFrame(loop_run_log, columns = ['api_call_target_id','state','suburb','postcode','api_type', 'api_status_code', 'apistatusreason','looprundatetime'])
print(df_loop_run_log)        


                      DIM_SUBURB_SK          SUBURB_ID     SUBURB POSTCODE  \
0  ef838f6fb3ed416b5f2e8cb50f557e7d  Willetton-6955-WA  Willetton     6955   

  STATE  api_call_target_id  
0    WA                   0  
                      DIM_SUBURB_SK          SUBURB_ID     SUBURB POSTCODE  \
0  ef838f6fb3ed416b5f2e8cb50f557e7d  Willetton-6955-WA  Willetton     6955   

  STATE  api_call_target_id  
0    WA                   0  
https://api.domain.com.au/v2/suburbPerformanceStatistics/WA/Willetton/6955?propertyCategory=house&periodSize=Quarters&totalPeriods=100
200


JSONDecodeError: Expecting value: line 1 column 1 (char 0)