# 1. Imports

In [1]:
from google.colab import drive, files # google colab specific
import requests
import pandas as pd
import os
import warnings
import sys
import matplotlib.pyplot as plt
import json

In [2]:
sys.setrecursionlimit(10000000) # Prevent infinite recursion (https://www.geeksforgeeks.org/python-sys-setrecursionlimit-method/)

# 2. API Key
API key goes here, add your own key. <br>
Found here: https://estated.com/developers/docs/v4

In [3]:
estated_api_key = ''

# 3. Estated Request and Response
`get_estated_detail()` implementation

Credit: https://www.youtube.com/watch?v=HxdGS48cdSU&t=506s&ab_channel=AnalyticsAriel





In [4]:
def get_estated_detail(api_key, 
                       street_address, 
                       city, 
                       state, 
                       zip_code):
  # sample API get request
  base_url = 'https://apis.estated.com/v4/property'

  params = (
      ('token', estated_api_key),
      ('street_address', street_address),
      ('city', city),
      ('state', state),
      ('zip_code', zip_code),
  )

  response = requests.get(base_url, params=params)
  return response

# 4. Create Dataframe using Address Data

In [None]:
# create a dataframe
d = {'address': ['9307 crandon lane, tampa, FL 33635']}
_df_addresses = pd.DataFrame(data=d)
_df_addresses

# 5. Supply Address to Dataframe
Credit: analysticsariel

In [None]:
# split dataframe into sub address elements
df_addresses = _df_addresses.copy()
df_addresses['street'] = df_addresses.apply(lambda x: x['address'].split(',')[0], axis=1)
df_addresses['city'] = df_addresses.apply(lambda x: x['address'].split(',')[1].strip(), axis=1)
df_addresses['state'] = df_addresses.apply(lambda x: x['address'].split(',')[2].split()[0], axis=1)
df_addresses['zip_code'] = df_addresses.apply(lambda x: x['address'].split(',')[2].split()[1], axis=1)
df_addresses

# 6. Call `get_estated_detail()`

In [20]:
# get estated property detail per address
response_list = []

# iterate through rows in the dataframe (table)
for index, row in df_addresses.iterrows():
  
  # get parameters
  address = row['address']
  street = row['street']
  city = row['city']
  state = row['state']
  zip_code = row['zip_code']
  
  # get api response
  print('Getting data for address: {0}'.format(address))
  response = get_estated_detail(estated_api_key, 
                        street, 
                        city, 
                        state, 
                        zip_code)
  # append to list
  response_list.append(response)

  response.json()

Getting data for address: 9307 crandon lane, tampa, FL 33635


# 7. Data Cleaning


1. Normalize required columns.
2. Drop unnneccessary columns.
3. Rename remaining columns.
4. Configure date to support monthly or yearly increments.
5. Change types to work with resampling.
6. Mitigate null values.



In [15]:
# Set response
d = response.json()

# Normalize, drop unnecessary columns and rename others.
df5 = pd.json_normalize(d['data']['deeds'])
df5 = df5.drop(['document_type',
                'buyer_unit_number',
                'buyer_unit_type',
                'buyer_city',
                'buyer_address',
                'buyer2_last_name',
                'buyer2_first_name',
                'buyer_last_name',
                'buyer_first_name',
                'seller_zip_plus_four_code',
                'seller_zip_code',
                'seller_state',
                'seller_city',
                'seller_unit_number',
                'seller_address',
                'seller2_last_name',
                'seller2_first_name',
                'seller_last_name',
                'seller_first_name',
                'real_estate_owned',
                'original_contract_date',
                'deed_book',
                'deed_page',
                'document_id',
                'sale_price_description',
                'transfer_tax',
                'distressed_sale',
                'buyer_state',
                'buyer_zip_code',
                'buyer_zip_plus_four_code',
                'lender_name',
                'lender_type',
                'loan_amount',
                'loan_type',
                'loan_due_date',
                'loan_finance_type',
                'loan_interest_rate'], 
                axis = 1
               )
df5 = df5.rename(columns={"recording_date": "Recording Date", "sale_price": "Sale Price"})

# Change date to support monthly increments.
df5['Recording Date'][0] = '2020-05-31'
df5['Recording Date'][1] = '1995-06-30'

# Change column type from object to datetime for resampling.
df5['Recording Date'] = pd.to_datetime(df5['Recording Date'])

# Resample and interpolate to fill null values.
df5 = df5.set_index('Recording Date').resample('1M').interpolate()

# 8. Plot Clean Data (example)

In [None]:
print(plt.plot(df5))

# 9. Repeat section 7 for other Dataframes

In [None]:
df1 = pd.json_normalize(d['data']['assessments'])
df1 = df1.rename(columns={"year": "Tax year", "land_value": "Land value", "improvement_value": "Improvement value","total_value":"Total tax value"})

pd.json_normalize(d['data']['address'])

df2 = pd.json_normalize(d['data']['address'])
df2 = df2.drop(['street_number','street_post_direction','street_pre_direction','street_name','street_suffix','carrier_code','latitude','longitude','geocoding_accuracy','census_tract'], axis = 1)
df2 = df2.rename(columns={"unit_type": "Unit Type", "unit_number": "Unit Number", "formatted_street_address": "Street Address","city": "City","state": "State","zip_code": "Zip Code","zip_plus_four_code": "Zip Plus Four Code"})

pd.json_normalize(d['data']['valuation'])

df3 = pd.json_normalize(d['data']['valuation'])
df3 = df3.rename(columns={"value": "Estated current property value", "high": "Estated highest probable value", "low": "Estated lowest probable value","forecast_standard_deviation": "Forecast standard deviation","date": "Date"})

pd.json_normalize(d['data']['structure'])

df4 = pd.json_normalize(d['data']['structure'])
df4 = df4.drop(['plumbing_fixtures_count','interior_wall_type','water_type','sewer_type','other_areas','other_rooms','other_features','other_improvements','amenities','basement_type','fireplaces','air_conditioning_type','heating_fuel_type','heating_type','roof_style_type','roof_material_type','foundation_type','exterior_wall_type','pool_type'], axis = 1)
df4 = df4.rename(columns={"year_built": "Year built", "effective_year_built": "Effective year built", "stories": "Stories","rooms_count": "Rooms count","beds_count": "Beds","baths": "Baths","partial_baths_count": "Partial baths","units_count": "Units count","parking_type": "Parking type","parking_spaces_count": "Parking spaces","architecture_type": "Architecture type","construction_type": "Construction type", "quality": "Quality", "condition": "Condition", "flooring_types": "Flooring types","total_area_sq_ft": "Total area sq ft."})


# 10. Concat Dataframes and Set Axis

In [None]:
pdList = [df2, df3, df4, df1, df5]  # List of your dataframes
new_df = pd.concat(pdList, axis = 1)

# Print new dataframe
new_df

In [None]:
# download

new_df.to_csv('estated_output.csv', index=False)
files.download('estated_output.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>