In [30]:

from google.colab import drive
drive.mount('/content/drive')




Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# 1. Imports

In [31]:
from google.colab import drive, files # google colab specific
import requests
import pandas as pd
import os
import warnings
import sys
import matplotlib.pyplot as plt
import json
import time
import numpy as np
from requests.sessions import codes


In [32]:
sys.setrecursionlimit(10000000) # Prevent infinite recursion (https://www.geeksforgeeks.org/python-sys-setrecursionlimit-method/)

# 2. API Key
API key goes here, add your own key. <br>
Found here: https://estated.com/developers/docs/v4

In [33]:
# When testing use your personal API key so that we don't run out of calls so fast.
estated_api_key = 'Rs6rtschbMLGUPachAcAS5Xxi76hiJ'

# 3. Estated Request and Response
`get_estated_detail()` implementation

Credit: https://www.youtube.com/watch?v=HxdGS48cdSU&t=506s&ab_channel=AnalyticsAriel





In [50]:
def get_estated_detail(api_key, 
                       street_address, 
                       city, 
                       state, 
                       zip_code):
  # sample API get request
  base_url = 'https://apis.estated.com/v4/property'

  params = (
      ('token', estated_api_key),
      ('street_address', street_address),
      ('city', city),
      ('state', state),
      ('zip_code', zip_code),
  )

  response = requests.get(base_url, params=params)
  return response

# 4. Create Dataframe using Address Data

In [54]:
# Read in the addresses from the csv files and store them into a dataframe

df_addresses_clean = pd.read_csv('/content/zillow_addresses_stPete.csv')
df_addresses_clean = df_addresses_clean.rename(columns={"stPete_addresses" : "address"})

# The below portion is uncessary if you manually clean the csv data before using
#********************************************************************
# clear null values
df_addresses_clean = df_addresses_clean[df_addresses_clean['address'].notnull()]
# drop some invalid entries
df_addresses_clean = df_addresses_clean.drop(70)
df_addresses_clean = df_addresses_clean.drop(77)
df_addresses_clean = df_addresses_clean.drop(99)
# reset index 
df_addresses_clean = df_addresses_clean.reset_index(drop=True)
#********************************************************************


In [55]:
# Manual Testing since API calls are limited and expensive

# create a dataframe
#d = {'address': ['9307 crandon lane, tampa, FL 33635', '310 72nd Ave N, Saint Petersburg, FL 33702', '6990 18th St S, Saint Petersburg, FL 33712', '5718 18th Ave N, Saint Petersburg, FL 33710', '115 112th Ave NE APT 1027, Saint Petersburg, FL 33716', '4600 Dr Martin Luther King Jr St N, Saint Petersburg, FL 33703', '6420 6th Ave N, Saint Petersburg, FL 33710', '5386 Alhambra Way S, Saint Petersburg, FL 33712', '7403 46th Ave N LOT 274, Saint Petersburg, FL 33709', '3125 54th Ave N, Saint Petersburg, FL 33714'   ]}
#d = {'address': ['9307 crandon lane, tampa, FL 33635']}#, '310 72nd Ave N, Saint Petersburg, FL 33702']}#, '310 72nd Ave N, Saint Petersburg, FL 33702', '135 NW Monroe Cir N, Saint Petersburg, FL 33702' ]}
#d = {'address': ['135 NW Monroe Cir N, Saint Petersburg, FL 33702']}
#df_addresses_clean = pd.DataFrame(data=d)
df_addresses_clean

Unnamed: 0,address
0,"1162 37th Ave N, Saint Petersburg, FL 33704"
1,"7913 9th Ave S, Saint Petersburg, FL 33707"
2,"10263 Gandy Blvd N APT 102, Saint Petersburg, ..."
3,"1769 38th Ave N, Saint Petersburg, FL 33713"
4,"5262 3rd Ave S, Saint Petersburg, FL 33707"
...,...
82,"3301 58th Ave N #110, Saint Petersburg, FL 33714"
83,"6857 49th Ave N, Saint Petersburg, FL 33709"
84,"555 5th Ave NE #1302, Saint Petersburg, FL 33701"
85,"22 Franklin Ct S #C, Saint Petersburg, FL 33711"


# 5. Supply Address to Dataframe
Credit: analysticsariel

In [65]:
# split dataframe into sub address elements
#df_addresses = df_addresses_clean.copy()
df_addresses_clean = df_addresses_clean.copy()
df_addresses_clean['street'] = df_addresses_clean.apply(lambda x: x['address'].split(',')[0], axis=1)
df_addresses_clean['city'] = df_addresses_clean.apply(lambda x: x['address'].split(',')[1].strip(), axis=1)
df_addresses_clean['state'] = df_addresses_clean.apply(lambda x: x['address'].split(',')[2].split()[0], axis=1)
df_addresses_clean['zip_code'] = df_addresses_clean.apply(lambda x: x['address'].split(',')[2].split()[1], axis=1)
df_addresses_clean

Unnamed: 0,address,street,city,state,zip_code
0,"1162 37th Ave N, Saint Petersburg, FL 33704",1162 37th Ave N,Saint Petersburg,FL,33704
1,"7913 9th Ave S, Saint Petersburg, FL 33707",7913 9th Ave S,Saint Petersburg,FL,33707
2,"10263 Gandy Blvd N APT 102, Saint Petersburg, ...",10263 Gandy Blvd N APT 102,Saint Petersburg,FL,33702
3,"1769 38th Ave N, Saint Petersburg, FL 33713",1769 38th Ave N,Saint Petersburg,FL,33713
4,"5262 3rd Ave S, Saint Petersburg, FL 33707",5262 3rd Ave S,Saint Petersburg,FL,33707
...,...,...,...,...,...
82,"3301 58th Ave N #110, Saint Petersburg, FL 33714",3301 58th Ave N #110,Saint Petersburg,FL,33714
83,"6857 49th Ave N, Saint Petersburg, FL 33709",6857 49th Ave N,Saint Petersburg,FL,33709
84,"555 5th Ave NE #1302, Saint Petersburg, FL 33701",555 5th Ave NE #1302,Saint Petersburg,FL,33701
85,"22 Franklin Ct S #C, Saint Petersburg, FL 33711",22 Franklin Ct S #C,Saint Petersburg,FL,33711


# 6. Declare Shiller Data Dataframe 


In [66]:
# Get Shiller data
shiller_filePath = "/content/TPXRSA.csv"
shiller_data = pd.read_csv(shiller_filePath, header = 0)

# Convert date and set index to datetime
shiller_data['DATE'] = pd.to_datetime(shiller_data['DATE'])
shiller_data = shiller_data.set_index('DATE')

# Set period and interpolate
shiller_data.index = shiller_data.index.to_period("1D")
shiller_data = shiller_data.resample('1D').interpolate()

# 7. Declare Mother Dataframe

In [67]:
# Create MDF and only run once otherwise you will reset it. 
m_df = shiller_data.copy()

# 8. Single-Execution-Pipeline 

In [68]:
def automated_sep(response, index_num): 
    
    # ****************** STAGE 1 *********************************
    print('Entering Stage 1')
    # Set response
    d = response

    # Normalize, drop unnecessary columns and rename others.
    df5 = pd.json_normalize(d['data']['deeds'])
    df5 = df5.drop(['document_type',
                    'buyer_unit_number',
                    'buyer_unit_type',
                    'buyer_city',
                    'buyer_address',
                    'buyer2_last_name',
                    'buyer2_first_name',
                    'buyer_last_name',
                    'buyer_first_name',
                    'seller_zip_plus_four_code',
                    'seller_zip_code',
                    'seller_state',
                    'seller_city',
                    'seller_unit_number',
                    'seller_address',
                    'seller2_last_name',
                    'seller2_first_name',
                    'seller_last_name',
                    'seller_first_name',
                    'real_estate_owned',
                    'original_contract_date',
                    'deed_book',
                    'deed_page',
                    'document_id',
                    'sale_price_description',
                    'transfer_tax',
                    'distressed_sale',
                    'buyer_state',
                    'buyer_zip_code',
                    'buyer_zip_plus_four_code',
                    'lender_name',
                    'lender_type',
                    'loan_amount',
                    'loan_type',
                    'loan_due_date',
                    'loan_finance_type',
                    'loan_interest_rate'], 
                    axis = 1)
    df5 = df5.rename(columns={"recording_date": "Recording Date", 
                              "sale_price": "Sale Price"})

    # Change column type from object to datetime for resampling.
    df5['Recording Date'] = pd.to_datetime(df5['Recording Date'])
    df5['Sale Price'] = pd.to_numeric(df5['Sale Price'])

    # Set index
    df5 = df5.set_index('Recording Date')
    df5.index = df5.index.to_period("1D")

    # Resample and interpolate to fill null values.
    #df5 = df5.resample('1D').interpolate()
   
    print('Entering Stage 2')
    # ******************************** STAGE 2 *********************************
    # Initiate Automatic Interpolation Coefficient Extraction Process (A.I.C.E.P)
    
    # Concat the current house sale prices to the shiller index. 
    pdList = [shiller_data, df5]  
    new_df = pd.concat(pdList, axis = 1)

    # Filter the dates where there is both Shiller Index Data and a Sale Price. 
    coef_df = new_df[(new_df['TPXRSA'].notnull() & new_df['Sale Price'].notnull())]

    # Create coefficient numpy array
    coef_arr = []

    # Compute the interpolation coefficient. 
    for index, row in coef_df.iterrows():
      shiller_val = row['TPXRSA']
      house_val = row['Sale Price']
      #print('Shiller Value', shiller_val)
      #print('House Val', house_val)
      c = int(house_val / shiller_val)
      coef_arr.append(c)

    # Compute the mean of all coefficient and that will be used as the main coefficient to interpolate the house prices based on Shiller's index
    if (len(coef_arr) == 0):
        print('Current address has no available Sale Prices. Exit and return to loop for next address.')
        return 
    average_coef = sum(coef_arr) / len(coef_arr)

    #print("The mean of all coefficients computed for that property is:", average_coef) 

    # ************************************** STAGE 3 *****************************************
    print('Entering Stage 3')
    ##11. Initiate Automatic Shiller Interpolation Process. 
    print('The average coefficient computed is:', average_coef)
    interpolated_df = new_df.copy()

    # Update the Sale Prices of the House with the Interpolation Coefficient
    interpolated_df['Sale Price'] = interpolated_df['TPXRSA'] * average_coef

    # Clean the new interpolated dataframe of any null values which will be the entries prior to 2002. 
    interpolated_df = interpolated_df[(interpolated_df['TPXRSA'].notnull() & interpolated_df['Sale Price'].notnull())]

    # Now that everything is perfect append the Sale Price Column to the 'Mother Dataframe' - A separate dataframe to which the Sale Price of a House will be appended to separately. 

    # Concat the new sale price column of the current house to the MDF
    test_df = m_df.copy()
    house = address
    m_df[house] = interpolated_df['Sale Price']
    #m_df
    #interpolated_df
    #interpolated_df['Sale Price'].plot()
    #interpolated_df['TPXRSA'].plot()


# 9. Automation begins here! 


In [69]:
# get estated property detail per address
response_list = []

# iterate through rows in the dataframe (table)
for index, row in df_addresses_clean.iterrows():
  
  # get parameters
  address = row['address']
  street = row['street']
  city = row['city']
  state = row['state']
  zip_code = row['zip_code']
  
  # get api response
  print('Getting data for address: {0}'.format(address))
  print('Current Index is:', index)

  # Delay added to prevent API call bottleneck.
  time.sleep(10)
  try:
      response = get_estated_detail(estated_api_key, 
                            street, 
                            city, 
                            state, 
                            zip_code)
      # append to list
      #response_list.append(response.json())


      # Call automation function 
      print('Passing the json response to the automation pipeline')

      automated_sep(response.json(), index)
  except Exception as e:
    print("Error in  index ", index, ' Moving on')
  #response.json()

Getting data for address: 1162 37th Ave N, Saint Petersburg, FL 33704
Current Index is: 0
Passing the json response to the automation pipeline
Entering Stage 1
Entering Stage 2
Current address has no available Sale Prices. Exit and return to loop for next address.
Getting data for address: 7913 9th Ave S, Saint Petersburg, FL 33707
Current Index is: 1
Passing the json response to the automation pipeline
Entering Stage 1
Entering Stage 2
Entering Stage 3
The average coefficient computed is: 4592.0
Getting data for address: 10263 Gandy Blvd N APT 102, Saint Petersburg, FL 33702
Current Index is: 2
Passing the json response to the automation pipeline
Entering Stage 1
Entering Stage 2
Entering Stage 3
The average coefficient computed is: 847.0
Getting data for address: 1769 38th Ave N, Saint Petersburg, FL 33713
Current Index is: 3
Passing the json response to the automation pipeline
Entering Stage 1
Entering Stage 2
Entering Stage 3
The average coefficient computed is: 577.0
Getting data 

KeyboardInterrupt: ignored

In [None]:
response.json()

{'data': None,
 'metadata': {'timestamp': '2022-03-21T06:36:21.434146Z',
  'version': '0.11.0-2.23.8'},
   'description': 'No property was found for the given input',
   'metadata': {},

# 10. Review the MDF output

In [70]:
m_df

Unnamed: 0_level_0,TPXRSA,"7913 9th Ave S, Saint Petersburg, FL 33707","10263 Gandy Blvd N APT 102, Saint Petersburg, FL 33702","1769 38th Ave N, Saint Petersburg, FL 33713","5262 3rd Ave S, Saint Petersburg, FL 33707","5631 Venetian Blvd NE, Saint Petersburg, FL 33703","6165 58th St N APT 6A, Saint Petersburg, FL 33709","626 35th Ave S, Saint Petersburg, FL 33705","6140 Sun Blvd #7, Saint Petersburg, FL 33715","721 83rd Ave N APT 105, Saint Petersburg, FL 33702"
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2002-01-01,120.395793,5.528575e+05,101975.236655,69468.372550,58151.168010,130147.852212,27811.428179,85962.596188,175175.878787,65736.102967
2002-01-02,120.405607,5.529025e+05,101983.549291,69474.035350,58155.908274,130158.461374,27813.695261,85969.603535,175190.158464,65741.461527
2002-01-03,120.415421,5.529476e+05,101991.861928,69479.698149,58160.648537,130169.070536,27815.962344,85976.610881,175204.438140,65746.820086
2002-01-04,120.425236,5.529927e+05,102000.174564,69485.360949,58165.388801,130179.679698,27818.229427,85983.618228,175218.717817,65752.178645
2002-01-05,120.435050,5.530377e+05,102008.487201,69491.023749,58170.129065,130190.288860,27820.496509,85990.625574,175232.997494,65757.537204
...,...,...,...,...,...,...,...,...,...,...
2021-11-27,321.037986,1.474206e+06,271919.173968,185238.917803,155061.347139,347042.062644,74159.774718,229221.121857,467110.269331,175286.740244
2021-11-28,321.231846,1.475097e+06,272083.373816,185350.775315,155154.981763,347251.625850,74204.556495,229359.538258,467392.336366,175392.588080
2021-11-29,321.425707,1.475987e+06,272247.573664,185462.632826,155248.616387,347461.189056,74249.338272,229497.954659,467674.403401,175498.435915
2021-11-30,321.619567,1.476877e+06,272411.773512,185574.490338,155342.251011,347670.752262,74294.120049,229636.371059,467956.470436,175604.283751


# 11. Download Dataframe CSV

In [62]:
# Convert the MDF to CSV and DOWNLOAD 

m_df.to_csv('estated_output_MDF.csv')
files.download('estated_output_MDF.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>