In [99]:
#Import packages and secrets
import sys
import requests
import pandas as pd
import json
import boto3
from datetime import datetime
from config import api_key_secret

#Set API call targets
#API calls limited to 500 a day, need to iterate through API call targets day by day
#ToDo: Get start and stop from a logging table
start_iterating = 1
stop_iterating = start_iterating + 500

#Set Destination S3 bucket variables
s3 = boto3.client('s3')
bucket = 'sbx-suburbproject-api-responses'

#Create empty list of lists to capture details for each successful loop run
loop_run_log = []

#import api_call_target_id, state, suburb and postcodes into dataframe
df = pd.read_csv('suburbdatatest.csv')
df = df.loc[df["api_call_target_id"].between(start_iterating,stop_iterating)] #filter by api_call_target_id

#Convert dataframe into lists
api_call_target_ids = ((df["api_call_target_id"]).astype(str).tolist())  #convert df to string and then convert to list
states = (df["state"]).tolist()
suburbs = (df["suburb"]).tolist()
postcodes = ((df["postcode"]).astype(str).tolist()) #convert df to string and then convert to list
api_types = (df["api_type"]).tolist()

#Other URL Parameters
base_url = "https://api.domain.com.au"
version = "v2"
api_sub_types = "Rent"
year = "2016"

#Authentication
header = {"X-API-Key" : api_key_secret}

#Loop through each item in lists, construct request URL, dump json response and errors to directory 
for api_call_target_id, state, suburb, postcode, api_type, api_sub_type in zip(api_call_target_ids, states, suburbs, postcodes, api_types, api_sub_types):    
    #Define metadata variables
    looprundatetime = datetime.now()
    full_url = base_url + "/" + version + "/" + api_type + "/" + str(state) + "/" + str(suburb) + "/" + str(postcode) + "?types=" + str(api_sub_type)  + "=" + "&year" + str(year)
    file_name = api_type+"_"+state+"_"+suburb+"_"+"_"+postcode+"_"+api_sub_types+"_"+year

    #Call API and put errors and responses into variables
    response = requests.get(full_url, headers=header)
    api_status_code, api_status_reason = response.status_code, response.reason #Get API call status & reasons for error
    response = response.json()

    #Upload response file to S3 bucket as a JSON file
    s3.put_object(Key=file_name,Body=json.dumps(response), Bucket=bucket)

    #Logging - append each loop to list of lists
    loop_run_log.append([api_call_target_id, state, suburb, postcode, api_type, api_sub_types, api_status_code, api_status_reason, looprundatetime])

#Get loop_run_log results in a dataframe
df_loop_run_log = pd.DataFrame(loop_run_log, columns = ['api_call_target_id','state','suburb','postcode','api_type','api_sub_type','api_status_code', 'apistatusreason','looprundatetime'])
print(df_loop_run_log)

  api_call_target_id state     suburb postcode      api_type api_sub_type  \
0                  1   NSW    Pyrmont     2009  demographics         Rent   
1                  2    WA  Willetton     6155  demographics         Rent   

   api_status_code apistatusreason            looprundatetime  
0              200              OK 2022-08-21 12:00:45.003191  
1              200              OK 2022-08-21 12:00:47.465886  
