In [20]:
#Import packages and secrets
import sys
import requests
import pandas as pd
import json
import boto3
import pyarrow
from datetime import datetime
from config import api_key_secret, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY

#Set API call targets
#API calls limited to 500 a day, need to iterate through API call targets day by day
#ToDo: Get start and stop from a logging table
start_iterating = 1
stop_iterating = start_iterating + 500

#Set Destination S3 bucket variables
s3 = boto3.client('s3')
jsonbucket = 'sbx-apidomainonline-injest-json'
parquetbucket = 'sbx-apidomainonline-injest-parquet'

#Create empty list of lists to capture details for each successful loop run
loop_run_log = []

#import api_call_target_id, state, suburb and postcodes into dataframe
df = pd.read_csv("target_data//suburbdatatest.csv")
df = df.loc[df["api_call_target_id"].between(start_iterating,stop_iterating)] #filter by api_call_target_id
print(df)

#Convert dataframe into lists
api_call_target_ids = ((df["api_call_target_id"]).astype(str).tolist())  #convert df to string and then convert to list
states = (df["state"]).tolist()
suburbs = (df["suburb"]).tolist()
postcodes = ((df["postcode"]).astype(str).tolist()) #convert df to string and then convert to list
api_types = (df["api_type"]).tolist()

#Other URL Parameters
base_url = "https://api.domain.com.au"
version = "v2"
#api_sub_types = "house"
year = "2016"

#Authentication
header = {"X-API-Key" : api_key_secret}

#Loop through each item in lists, construct request URL, dump json response and errors to directory 
for api_call_target_id, state, suburb, postcode, api_type in zip(api_call_target_ids, states, suburbs, postcodes, api_types):    
    #Define metadata variables
    looprundatetime = datetime.now()
    #full_url = base_url + "/" + version + "/" + api_type + "/" + str(state) + "/" + str(suburb) + "/" + str(postcode) + "?types=" + str(api_sub_type)  + "=" + "&year" + str(year)
    full_url = base_url + "/" + version + "/" + api_type + "/" + str(state) + "/" + str(suburb) + "/" + str(postcode)
   
    #Call API and put errors and responses into variables
    response = requests.get(full_url, headers=header)
    api_status_code, api_status_reason = response.status_code, response.reason #Get API call status & reasons for error
    response = response.json()

    #Define filename
    file_name = state+"_"+suburb+"_"+postcode+"_"+year+"_"+str(api_status_code)+"_"+api_type+"_"+'a'

    #Convert json response to parquet file
    df = pd.DataFrame(response)
    df.to_parquet(file_name+".parquet")
    
    #Upload parquet files into S3 bucket
    s3.upload_file(file_name+".parquet", parquetbucket, file_name+".parquet")
    os.remove(file_name+".parquet")
      
    #Upload json response into S3 bucket
    df.to_json(file_name+".json")
    #s3.put_object(Key=file_name+".json",Body=json.dumps(response), Bucket=jsonbucket)
    
    #Logging - append each loop to list of lists
    loop_run_log.append([api_call_target_id, state, suburb, postcode, api_type, api_status_code, api_status_reason, looprundatetime])

#Get loop_run_log results in a dataframe
df_loop_run_log = pd.DataFrame(loop_run_log, columns = ['api_call_target_id','state','suburb','postcode','api_type', 'api_status_code', 'apistatusreason','looprundatetime'])
print(df_loop_run_log)


   api_call_target_id state  postcode      suburb                     api_type
0                   1   NSW      2009     Pyrmont                 demographics
1                   2    WA      6155   Willetton                 demographics
2                   3    WA      6112  Harrisdale                 demographics
3                   4   NSW      2009     Pyrmont  suburbPerformanceStatistics
4                   5    WA      6155   Willetton  suburbPerformanceStatistics
5                   6    WA      6112  Harrisdale  suburbPerformanceStatistics
  api_call_target_id state      suburb postcode                     api_type  \
0                  1   NSW     Pyrmont     2009                 demographics   
1                  2    WA   Willetton     6155                 demographics   
2                  3    WA  Harrisdale     6112                 demographics   
3                  4   NSW     Pyrmont     2009  suburbPerformanceStatistics   
4                  5    WA   Willetton     6155