In [1]:
#importing relevant modules 
#important note. This code may require multiple API keys to run because of the limits on Domain APIs
import json
import requests 
import pandas as pd
from config import Api_key
from config import Api_key2
from config import Api_key3
from config import Api_key4

The code below does numerous API calls using different Domain API URLs. The first one is retrieving the Suburb ID from the Domain API so that it can be used to interact with different URLs. The second API call obtains the Median Price in January, the Median Price in April, the  Number sold in January and the Number Sold in April. The last three API calls use three different URLs in order to obtain different demograhic information (Age Group, occupation and how many people are born in Australia). Multiple API keys are used due to the limits on the Domain API keys. 

In [2]:
#Suburb_csv is a list of suburbs in the Melbourne Metropolitan area
Suburb_name=pd.read_csv("Suburb.csv")
Suburb_name=pd.DataFrame(Suburb_name["Suburb"])
#creating a new column for the Suburbs ID. This ID means it can interact with other Domain APIs
Suburb_name['ID'] = ""

In [3]:
for index, row in Suburb_name.iterrows():
    
    suburb=row["Suburb"]
    
    try:
        url = ("https://api.domain.com.au/v1/addressLocators?searchLevel=Suburb&suburb={0}&state=VIC&postcode=3052").format(suburb)
        auth = {"X-Api-key":Api_key}
        request = requests.get(url,headers=auth)
        r=request.json()
    
        Suburb_name.loc[index, 'ID'] = r[0]['ids'][0]['id']
    
    #in the case of a JSONDecode Error, this ensures the code will keep running
    except (KeyError, IndexError, ValueError):
        Suburb_name.loc[index, 'ID'] = "No ID"



In [4]:
Suburb_name

Unnamed: 0,Suburb,ID
0,Abbotsford,627
1,Aberfeldie,647
2,Airport West,757
3,Albanvale,787
4,Albert Park,797
...,...,...
309,Wonga Park,44437
310,Wyndham Vale,45307
311,Yallambie,45427
312,Yarra Glen,45657


In [5]:
#exporting this to a new csv file. The ID will now be used to interact with the Domain API 
Suburb_name.to_csv("Suburb2.csv")


In [6]:
#Step 2; Using the ID to obtain the Median price and Number sold for each suburb.
Median_suburb1=pd.read_csv("Suburb2.csv")
Median_suburb1=pd.DataFrame(Median_suburb1.loc[Median_suburb1["ID"]!="No ID",:])

Median_suburb1['Jan 2020 Median'] = ""
Median_suburb1['April 2020 Median'] = ""
Median_suburb1['Jan 2020 Number Sold'] = ""
Median_suburb1['April 2020 Number Sold'] = ""

  res_values = method(rvalues)


In [7]:
for index, row in Median_suburb1.iterrows():
    
    id=row["ID"]
    
    try:
        url = ("https://api.domain.com.au/v1/suburbPerformanceStatistics?state=VIC&suburbId={0}&propertyCategory=house&chronologicalSpan=3&tPlusFrom=1&tPlusTo=2").format(id)
        auth = {"X-Api-key":Api_key}
        request = requests.get(url,headers=auth)
        r=request.json()
        
        #obtaining the values needed from the API - Jan 2020 Median, Apr 2020 Median, Jan 2020 Number sold and Apr 2020 number sold
        Median_suburb1.loc[index, 'Jan 2020 Median'] = r['series']['seriesInfo'][0]['values']['medianSoldPrice']
        Median_suburb1.loc[index, 'April 2020 Median'] = r['series']['seriesInfo'][1]['values']['medianSoldPrice']
        Median_suburb1.loc[index, 'Jan 2020 Number Sold']=r['series']['seriesInfo'][0]['values']['numberSold']
        Median_suburb1.loc[index, 'April 2020 Number Sold']=r['series']['seriesInfo'][1]['values']['numberSold']


    #allows the code to keep running 
    except (KeyError, NameError, IndexError, ValueError):
        Median_suburb1.loc[index, 'Jan 2020 Median'] = 'none'
        Median_suburb1.loc[index, 'April 2020 Median'] = 'none'
        Median_suburb1.loc[index, 'Jan 2020 Number Sold'] = 'none'
        Median_suburb1.loc[index, 'April 2020 Number Sold'] = 'none'



In [8]:
Median_suburb1

Unnamed: 0.1,Unnamed: 0,Suburb,ID,Jan 2020 Median,April 2020 Median,Jan 2020 Number Sold,April 2020 Number Sold
0,0,Abbotsford,627,1110000,1115000,18,16
1,1,Aberfeldie,647,,1520000,8,11
2,2,Airport West,757,840000,785000,32,25
3,3,Albanvale,787,530000,540000,14,11
4,4,Albert Park,797,2423000,1679000,26,30
...,...,...,...,...,...,...,...
309,309,Wonga Park,44437,1715000,,10,5
310,310,Wyndham Vale,45307,490000,473000,69,58
311,311,Yallambie,45427,,,9,7
312,312,Yarra Glen,45657,838000,771000,12,11


In [9]:
#exporting the updated dataframe to a csv- First analysis
Median_suburb1.to_csv("Jan_Apr_2020_data.csv") 

In [10]:
#Third analysis: Pulling demographic data from the suburb using csv from before
Age_group=pd.read_csv("Suburb2.csv")

#Adding the different columns in the dataset 
Age_group['0 to 4'] = ""
Age_group['5 to 19'] = ""
Age_group['20 to 39'] = ""
Age_group['40 to 59'] = ""
Age_group['60+'] = ""


In [11]:
#getting Age group data from the domain API 
for index, row in Age_group.iterrows():
    
    id=row["ID"]
    
    try:
        url = ("https://api.domain.com.au/v1/demographics?level=Suburb&id={0}&types=AgeGroupOfPopulation&year=2016").format(id)
        auth = {"X-Api-key":Api_key4}
        request = requests.get(url,headers=auth)
        r=request.json()
        
       #This API sometimes put age groups in different orders and this an additional loop was used to get the right information
        for i in range (5):
            Age_label=r['demographics'][0]['items'][i]['label']
            Age_value=r['demographics'][0]['items'][i]['value']
    
            if Age_label=="0 to 4": 
                Age_0to4=Age_value
        
            if Age_label=="5 to 19":
                Age_5to19=Age_value
    
            if Age_label=="20 to 39": 
                Age_20to39=Age_value
    
            if Age_label=="40 to 59":
                Age_40to59=Age_value
        
            if Age_label=="60+": 
                Age_60plus=Age_value
    

        Age_group.loc[index, '0 to 4']=Age_0to4
        Age_group.loc[index, '5 to 19']=Age_5to19
        Age_group.loc[index, '20 to 39']=Age_20to39
        Age_group.loc[index, '40 to 59']=Age_40to59
        Age_group.loc[index, '60+']=Age_60plus
    
    except (KeyError, NameError, IndexError, ValueError):
        pass
    
    


In [12]:
Age_group

Unnamed: 0.1,Unnamed: 0,Suburb,ID,0 to 4,5 to 19,20 to 39,40 to 59,60+
0,0,Abbotsford,627,319,535,4740,1582,997
1,1,Aberfeldie,647,179,897,850,1281,684
2,2,Airport West,757,463,1012,2240,1920,1932
3,3,Albanvale,787,371,1000,1621,1373,1129
4,4,Albert Park,797,367,938,1469,1943,1493
...,...,...,...,...,...,...,...,...
309,309,Wonga Park,44437,157,883,724,1291,750
310,310,Wyndham Vale,45307,2448,5348,8079,5207,2212
311,311,Yallambie,45427,313,832,1207,1068,699
312,312,Yarra Glen,45657,176,604,682,773,632


In [13]:
#Exporting the age group demographic data to a new file (intermediate step)
Age_group.to_csv("Age_group.csv")

In [14]:
#obtaining the occupation data by re-reading the csv
Test_occ=pd.read_csv("Age_group.csv")

In [15]:
#Adding all occupations in 
Test_occ['Not Stated'] = ""
Test_occ['Machinery'] = ""
Test_occ['Labourers'] = ""
Test_occ['Community'] = ""
Test_occ['Technicians'] = ""
Test_occ['Sales'] = ""
Test_occ['Clerical'] = ""
Test_occ['Managers'] = ""
Test_occ['Professionals'] = ""



In [16]:
#getting occupation data from the demographics domain API 
for index, row in Test_occ.iterrows():
    
    id=row["ID"]
    
    try:
        url = ("https://api.domain.com.au/v1/demographics?level=Suburb&id={0}&types=Occupation&year=2016").format(id)
        auth = {"X-Api-key":Api_key2}
        request = requests.get(url,headers=auth)
        r=request.json()
        
        #once again the occupation domain API is arranged slightly randomly and this helps ensure the right occupation info is being pulled from the dataset 
        for i in range (9):
            Occ_label=r['demographics'][0]['items'][i]['label']
            Occ_value=r['demographics'][0]['items'][i]['value']
    
            if Occ_label=="Inadequately Described/Not Stated": 
                Not_state=Occ_value
        
            if Occ_label=="Machinery Operators and Drivers": 
                Machinery=Occ_value
    
            if Occ_label=="Labourers": 
                Labour=Occ_value
    
            if Occ_label=="Community and Personal Service Workers": 
                Community=Occ_value
            
            if Occ_label=="Technicians and Trade Workers": 
                Technicians=Occ_value
        
            if Occ_label=="Sales Workers": 
                Sales=Occ_value
                
            if Occ_label=="Clerical and Administrative Workers": 
                Admin=Occ_value
                
            if Occ_label=="Managers": 
                Managers=Occ_value
                
            if Occ_label=="Professionals": 
                Professionals=Occ_value

        Test_occ.loc[index, 'Not Stated']=Not_state
        Test_occ.loc[index, 'Machinery']=Machinery
        Test_occ.loc[index, 'Labourers']=Labour
        Test_occ.loc[index, 'Community']=Community
        Test_occ.loc[index, 'Technicians']=Technicians
        Test_occ.loc[index, 'Sales']=Sales
        Test_occ.loc[index, 'Clerical']=Admin
        Test_occ.loc[index, 'Managers']=Managers
        Test_occ.loc[index, 'Professionals']=Professionals
        
    
    except (KeyError, NameError, IndexError, ValueError):
        pass


In [17]:
#Exporting this to a new csv file and saving
Test_occ.to_csv("Occupations_Age.csv")

In [18]:
#Adding some new cells
Test_occ["Born in Australia"]=""
Test_occ["Total"]=""

In [19]:
#obtaining the demographic data
for index, row in Test_occ.iterrows():
    
    id=row["ID"]
    
    try:
        url = ("https://api.domain.com.au/v1/demographics?level=Suburb&id={0}&types=CountryOfBirth&year=2016").format(id)
        auth = {"X-Api-key":Api_key3}
        request = requests.get(url,headers=auth)
        r=request.json()
        
        country=r['demographics'][0]['items']
        i=(len(country))-1
        
        Test_occ.loc[index, 'Born in Australia']=r['demographics'][0]['items'][i]['value']
        Test_occ.loc[index, 'Total']=r['demographics'][0]['total']
        
    
    except (KeyError, NameError, IndexError, ValueError):
        pass


In [20]:
#Exporting to the final Demographics data 
Test_occ.to_csv("Demographics.csv")
