In [3]:
import os
from dotenv import load_dotenv
import requests
import json
import pandas as pd

In [4]:
# Accompanying Notion Doc:
# https://www.notion.so/AirDNA-API-c4d49bd11c6642adba6cf23470425e66

In [5]:
# Load API Key
load_dotenv()
AIRDNA_TOKEN = os.getenv("AIRDNA_TOKEN")

In [6]:
# Same for All API Calls
HEADERS = {"access_token": AIRDNA_TOKEN}
HOST = "https://api.airdna.co/client/v1/"

# Market Data

## /MarketSearch

In [7]:
# MARKET SEARCH
# {{HOST}}/client/{{VERSION_CLIENT}}/market/search?
# access_token={{CLIENT_TOKEN}}&term='https://www.airbnb.com/rooms/15807599'
# Country
# City
# Neighbourhood
# State (US Only)
# Metropolitan Statistical Area (US Only)
# Zip Code (US Only)

In [8]:
# Test Connection HERE <- Search Calls are Free
# API CALL - Market Search - Find Gatlinburg Market ID
endpoint = "market/search"
url = HOST + endpoint

params = {
    "access_token": AIRDNA_TOKEN,
    "term": "gatlinburg"
}


response = requests.request("GET", url,  
                            params=params)

In [9]:
# Get Market ID
gatlinburg_id = response.json()['items'][0]['city']['id']

# Other information Available from This Response:
# - name, country, region, state, type, zipcode

In [10]:
gatlinburg_id

79116

In [11]:
# Turn this Into Function

# Market Search

def get_city_id(city_name: str) -> str:
    
    endpoint = "market/search"
    url = HOST + endpoint

    params = {
        "access_token": AIRDNA_TOKEN,
        "term": str(city_name)
    }


    response = requests.request("GET", url,  
                                params=params)

    # city_id
    city_id = response.json()['items'][0]['city']['id']
    
    return city_id

In [12]:
city_id = get_city_id("gatlinburg")
city_id

79116

## /MarketSummary

In [65]:
# MARKET SUMMARY
# {{HOST}}/client/{{VERSION}}/market/summary?
# access_token={{CLIENT_TOKEN}}&city_id=59380&region_id=12341&currency=usd

# PARAMS
# city_id
# region_id
# currency

In [66]:
# ---- API CALL ----
# API CALL - Market Summary - Get Info on Gatlinburg
endpoint = "market/summary"
url = HOST + endpoint

params = {
    "access_token": AIRDNA_TOKEN,
    "city_id": gatlinburg_id
}


response = requests.request("GET", url,  
                            params=params)
# ---- API CALL ----

In [67]:
# Jasonify Results
json = response.json()

### Parcing Through Available Data at /MarketSummary Enpoint

In [68]:
# Data Frame Dictionary
dfd = {}

In [69]:
# Market - Summary - Request_Info
# COLS: city_id
city_id = json['request_info']['city_id']
dfd.update({'city_id': city_id})
city_id

79116

In [70]:
# Market - Summary - Data - Calendar Months
# adr -> Average Daily Rate
# COLS: occ, adr, revenue
ent_stats = json['data']['calendar_months']['room_type']['entire_place']
ent_stats
ent_stats_dict = {
    'occ': ent_stats['occ']['50th_percentile'],
    'adr': ent_stats['adr']['50th_percentile'],
    'revenue': ent_stats['revenue']['50th_percentile']
                 }
dfd.update(ent_stats_dict)
ent_stats

{'occ': {'50th_percentile': 0.741935483870968},
 'adr': {'50th_percentile': 262.958333333333},
 'revenue': {'50th_percentile': 4743.5}}

In [71]:
# Market - Summary - Data - Host_Info - Hosts
# COLS: total_hosts, superhosts, multi_unit_hosts, single_unit_hosts
host_counts = json['data']['host_info']['hosts']
dfd.update(host_counts)
host_counts

{'total_hosts': 641,
 'superhosts': 340,
 'multi_unit_hosts': 198,
 'single_unit_hosts': 443}

In [72]:
# Market - Summary - Data - Host_Info - Host_Properties
# COLS: multi_host_properties, total_properties, single_host_properties
property_counts = json['data']['host_info']['host_properties']
dfd.update(property_counts)
property_counts

{'multi_host_properties': 3106,
 'total_properties': 3549,
 'single_host_properties': 443}

In [73]:
# Market - Summary - Data - Rental_Activity - Available
# COLS:
# TODO: ?- What is '10-12', '1-3', and such? -?
# They are either length-of-stay or they are n-people
available = json['data']['rental_activity']['available']
available_dict = {
    'available_10_12': available['10-12'],
    'available_1_3': available['1-3'],
    'available_7_9': available['7-9'],
    'available_4_6': available['4-6']
                 }

dfd.update(available_dict)
available

{'10-12': 2055, '1-3': 1065, '7-9': 668, '4-6': 816}

In [74]:
# Market - Summary - Data - Rental_Activity - Booked
# COLS:
# TODO: ?- What is '10-12', '1-3', and such? -?
# They are either length-of-stay or they are n-people
booked = json['data']['rental_activity']['booked']
booked_dict = {
    'booked_10_12': booked['10-12'],
    'booked_1_3': booked['1-3'],
    'booked_7_9': booked['7-9'],
    'booked_4_6': booked['4-6']
                 }
dfd.update(booked_dict)
booked

{'10-12': 435, '1-3': 1763, '7-9': 1231, '4-6': 1175}

In [75]:
# Market - Summary - Data - Rental Counts - Counts - Private_Room
# COLS: n_private_rooms
n_private_rooms = json['data']['rental_counts']['counts']['private_room']['all']
dfd.update({'n_private_rooms': n_private_rooms})
json['data']['rental_counts']['counts']['private_room']

{'0': 14.0, '1': 72.0, '2': 15.0, '3': 2.0, 'all': 103.0}

In [76]:
# Market - Summary - Data - Rental Counts - Counts - Entire Place by n_rooms
# COLS: rms0_rntl_cnt, rms1_rntl_cnt, rms3_rntl_cnt
#       rms4_rntl_cnt, rms5plus_rntl_cnt, n_entire_place
count_by_n_rooms = json['data']['rental_counts']['counts']['entire_place']
count_by_n_rooms_dict = {
    'n_rooms_0': count_by_n_rooms['0'],
    'n_rooms_1': count_by_n_rooms['1'],
    'n_rooms_2': count_by_n_rooms['2'],
    'n_rooms_3': count_by_n_rooms['3'],
    'n_rooms_4': count_by_n_rooms['4'],
    'n_rooms_5plus': count_by_n_rooms['5'],
    'tot_count_entire_place': count_by_n_rooms['all'],
                        }

dfd.update(count_by_n_rooms_dict)
count_by_n_rooms

{'0': 113.0,
 '1': 816.0,
 '2': 1214.0,
 '3': 642.0,
 '4': 317.0,
 '5': 343.0,
 'all': 3445.0}

In [77]:
# Market - Summary - Data - Rental Counts - Average
# COLS: average_n_bedrooms, average_accomodates
average = json['data']['rental_counts']['average']
average_dict = {
    'avg_n_rooms': average['bedrooms'],
    'avg_accommodates': average['accommodates']
}
dfd.update(average_dict)

In [78]:
dfd

{'city_id': 79116,
 'occ': 0.741935483870968,
 'adr': 262.958333333333,
 'revenue': 4743.5,
 'total_hosts': 641,
 'superhosts': 340,
 'multi_unit_hosts': 198,
 'single_unit_hosts': 443,
 'multi_host_properties': 3106,
 'total_properties': 3549,
 'single_host_properties': 443,
 'available_10_12': 2055,
 'available_1_3': 1065,
 'available_7_9': 668,
 'available_4_6': 816,
 'booked_10_12': 435,
 'booked_1_3': 1763,
 'booked_7_9': 1231,
 'booked_4_6': 1175,
 'n_private_rooms': 103.0,
 'n_rooms_0': 113.0,
 'n_rooms_1': 816.0,
 'n_rooms_2': 1214.0,
 'n_rooms_3': 642.0,
 'n_rooms_4': 317.0,
 'n_rooms_5plus': 343.0,
 'tot_count_entire_place': 3445.0,
 'avg_n_rooms': 2.5,
 'avg_accommodates': 7.6}

In [79]:
import pandas as pd

In [82]:
gat_df_row = pd.Series(dfd)
gat_df_row

city_id                   79116.000000
occ                           0.741935
adr                         262.958333
revenue                    4743.500000
total_hosts                 641.000000
superhosts                  340.000000
multi_unit_hosts            198.000000
single_unit_hosts           443.000000
multi_host_properties      3106.000000
total_properties           3549.000000
single_host_properties      443.000000
available_10_12            2055.000000
available_1_3              1065.000000
available_7_9               668.000000
available_4_6               816.000000
booked_10_12                435.000000
booked_1_3                 1763.000000
booked_7_9                 1231.000000
booked_4_6                 1175.000000
n_private_rooms             103.000000
n_rooms_0                   113.000000
n_rooms_1                   816.000000
n_rooms_2                  1214.000000
n_rooms_3                   642.000000
n_rooms_4                   317.000000
n_rooms_5plus            

In [103]:
# Connect to DB
import psycopg2
import os
from dotenv import load_dotenv
import os


load_dotenv()
RDS_HOSTNAME = os.getenv("RDS_HOSTNAME")
RDS_PORT = os.getenv("RDS_PORT")
RDS_DB_NAME = os.getenv("RDS_DB_NAME")
RDS_USERNAME = os.getenv("RDS_USERNAME")
RDS_PASSWORD = os.getenv("RDS_PASSWORD")

conn = psycopg2.connect(
    dbname=RDS_DB_NAME,
    user=RDS_USERNAME,
    password=RDS_PASSWORD,
    host=RDS_HOSTNAME,
    port=RDS_PORT
    )

In [108]:
conn.close()

In [104]:
curs = conn.cursor()

In [105]:
VALUES = str(tuple(gat_df_row.values))
Q_INSERT_COL_NAMES = """
INSERT INTO market_summary (
city_id, occ, adr, revenue,
total_hosts, superhosts, 
multi_unit_hosts, single_unit_hosts,
multi_host_properties, total_properties,
single_host_properties, available_10_12,
available_1_3, available_7_9, available_4_6,
booked_10_12, booked_1_3, booked_7_9, 
booked_4_6, n_private_rooms, n_rooms_0,
n_rooms_1, n_rooms_2, n_rooms_3, n_rooms_4,
n_rooms_5plus, tot_count_entire_place,
avg_n_rooms, avg_accommodates
)
VALUES """
Q_INSERT_FINAL = Q_INSERT_COL_NAMES + VALUES + ";"

In [106]:
curs.execute(Q_INSERT_FINAL)

In [107]:
conn.commit()

# Listing Data

## /ComparableProperties

In [28]:
# ---- API CALL ----
# API CALL - Comparable Properties

# Get City ID
gatlinburg_id = get_city_id("gatlinburg")

endpoint = "market/property/list"
url = HOST + endpoint

params = {
    "access_token": AIRDNA_TOKEN,
    "city_id": gatlinburg_id,
    "order": "revenue",
    "room_types": "entire_home",
    "show_amenities": "True",
    "show_images": "True",
    "show_location": "True"
}


response = requests.request("GET", url,  
                            params=params)

comp_list = response.json()

# ---- API CALL ----

In [29]:
# Includes 3 Components from the Top:
# Request Info -> Lists out the Request Parameters
# Properties -> 25 Properties
# Area Info -> Name and Location of City
comp_list = response.json()

In [31]:
# ?- How are these 25 chosen? -?
# Answer: I think it is by most recently scraped entries.
len(comp_list['properties'])

25

In [32]:
# Area Info -> Name and Location of City
# Market - Property - List - Area_Info
# COLS: state, city
comp_list['area_info']

{'geom': {'code': {'country': 'us',
   'state': 'tennessee',
   'city': 'gatlinburg'},
  'name': {'country': 'United States',
   'state': 'Tennessee',
   'city': 'Gatlinburg'},
  'id': {'country': 1, 'state': 44, 'city': 79116}}}

In [33]:
df = pd.DataFrame(comp_list['properties'])

In [34]:
print(df.shape)
df.head(2)

(25, 43)


Unnamed: 0,instant_book,rating_overall,bathrooms,revenue,listing_url,longitude,extra_person_charge,property_type,listed_dt,price_monthly,...,days_b_ltm,last_calendar_update,title,location,accommodates,room_type,img_cover,cleaning_fee,response_rate,cancellation
0,True,7.8,7.0,287435,https://www.airbnb.com/rooms/36135973,-83.46441,,House,2019-07-15,,...,9,2019-08-17,"Big Sky Lodge II, 7 Bedrooms, Sleeps 28, Theat...","{'city': 'Gatlinburg', 'state': 'Tennessee', '...",16,E,https://a0.muscache.com/im/pictures/397cac0a-2...,295,96,super_strict_30
1,True,10.0,6.0,277154,https://www.airbnb.com/rooms/29344064,-83.34223,0.0,Cabin,2018-10-29,39816.0,...,0,2019-08-21,Dancing Bear Lodge,"{'city': 'Gatlinburg', 'state': 'Tennessee', '...",16,E,https://a0.muscache.com/im/pictures/e789be62-0...,496,100,super_strict_30


In [35]:
df['scraped_dt'].value_counts()

2021-01-18    22
2021-01-02     1
2021-01-06     1
2021-01-03     1
Name: scraped_dt, dtype: int64

In [36]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 43 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   instant_book          25 non-null     bool   
 1   rating_overall        24 non-null     float64
 2   bathrooms             25 non-null     float64
 3   revenue               25 non-null     int64  
 4   listing_url           25 non-null     object 
 5   longitude             25 non-null     float64
 6   extra_person_charge   14 non-null     float64
 7   property_type         25 non-null     object 
 8   listed_dt             25 non-null     object 
 9   price_monthly         14 non-null     float64
 10  reviews               25 non-null     int64  
 11  check_in              25 non-null     object 
 12  airbnb_host_id        25 non-null     int64  
 13  business_ready        25 non-null     bool   
 14  price_weekly          14 non-null     float64
 15  occ                   25 

In [37]:
df['revenue']

0     287435
1     277154
2     267959
3     243132
4     233808
5     226434
6     223679
7     222552
8     215896
9     212437
10    207241
11    206060
12    201504
13    200053
14    197586
15    197533
16    197055
17    191561
18    190599
19    188342
20    180690
21    180036
22    177814
23    171756
24    170447
Name: revenue, dtype: int64

In [38]:
df.iloc[23]

instant_book                                                         True
rating_overall                                                   9.900000
bathrooms                                                        4.000000
revenue                                                            171756
listing_url                         https://www.airbnb.com/rooms/13028680
longitude                                                      -83.497130
extra_person_charge                                              0.000000
property_type                                                       Cabin
listed_dt                                                      2016-05-14
price_monthly                                                11200.000000
reviews                                                               124
check_in                                                    After 4:00 PM
airbnb_host_id                                                   22965698
business_ready                        

In [39]:
df['title']

0     Big Sky Lodge II, 7 Bedrooms, Sleeps 28, Theat...
1                                    Dancing Bear Lodge
2     $950/nt Oct 11-16~Privacy on 7 Acres, Hiking T...
3                              Gatlinburg Amazing Grace
4                                      Among The Clouds
5     Majestic Mtn Getaway: Game Room, Decks & Hot Tub!
6     All About The View, 6 Bedrooms, Theater, Mount...
7           Smoky Mountain Ridge Swimming Pool Cabin 21
8                                      Splash ''N Views
9         Nov 9-13 Open! Stunning 7BR w/Honeymoon Suite
10                                   Gatlinburg Mansion
11    Feb 19-24 Open! Huge 6BR Cabin, Gorgeous Fire Pit
12                           Mountain View POOL Lodge!"
13    Jan 18-26 Open! Huge 6BR Cabin, Mtn View, Fire...
14                              Pool And A View Mansion
15    Huge Private Luxury Log Lodge On CREEK w Fire Pit
16    Brand New! 6 bedroom cabin w/ Indoor heated pool!
17    A+Views This 9BR/6BATH Gatlinburg Cabin ha

In [40]:
df.iloc[4]

instant_book                                                         True
rating_overall                                                  10.000000
bathrooms                                                        4.500000
revenue                                                            233808
listing_url                         https://www.airbnb.com/rooms/28622510
longitude                                                      -83.361650
extra_person_charge                                              0.000000
property_type                                                       Cabin
listed_dt                                                      2018-09-19
price_monthly                                                27888.000000
reviews                                                                 1
check_in                                                    After 4:00 PM
airbnb_host_id                                                  197358896
business_ready                        

In [41]:
df['scraped_dt'].value_counts()

2021-01-18    22
2021-01-02     1
2021-01-06     1
2021-01-03     1
Name: scraped_dt, dtype: int64

In [55]:
df['zip_code'] = [d['zipcode'] for d in df['location']]

In [58]:
df['city_state'] = [f"{d['city']}, {d['state']}" for d in df['location']]

In [61]:
df['instant_book'].head(2)

0    True
1    True
Name: instant_book, dtype: bool

In [59]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 45 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   instant_book          25 non-null     bool   
 1   rating_overall        24 non-null     float64
 2   bathrooms             25 non-null     float64
 3   revenue               25 non-null     int64  
 4   listing_url           25 non-null     object 
 5   longitude             25 non-null     float64
 6   extra_person_charge   14 non-null     float64
 7   property_type         25 non-null     object 
 8   listed_dt             25 non-null     object 
 9   price_monthly         14 non-null     float64
 10  reviews               25 non-null     int64  
 11  check_in              25 non-null     object 
 12  airbnb_host_id        25 non-null     int64  
 13  business_ready        25 non-null     bool   
 14  price_weekly          14 non-null     float64
 15  occ                   25 