In [165]:
import os
from dotenv import load_dotenv
import requests
import numpy
import json
import pandas as pd

In [4]:
# Accompanying Notion Doc:
# https://www.notion.so/AirDNA-API-c4d49bd11c6642adba6cf23470425e66

In [5]:
# Load API Key
load_dotenv()
AIRDNA_TOKEN = os.getenv("AIRDNA_TOKEN")

In [6]:
# Same for All API Calls
HEADERS = {"access_token": AIRDNA_TOKEN}
HOST = "https://api.airdna.co/client/v1/"

# Market Data

## /MarketSearch

In [7]:
# MARKET SEARCH
# {{HOST}}/client/{{VERSION_CLIENT}}/market/search?
# access_token={{CLIENT_TOKEN}}&term='https://www.airbnb.com/rooms/15807599'
# Country
# City
# Neighbourhood
# State (US Only)
# Metropolitan Statistical Area (US Only)
# Zip Code (US Only)

In [8]:
# Test Connection HERE <- Search Calls are Free
# API CALL - Market Search - Find Gatlinburg Market ID
endpoint = "market/search"
url = HOST + endpoint

params = {
    "access_token": AIRDNA_TOKEN,
    "term": "gatlinburg"
}


response = requests.request("GET", url,  
                            params=params)

In [9]:
# Get Market ID
gatlinburg_id = response.json()['items'][0]['city']['id']

# Other information Available from This Response:
# - name, country, region, state, type, zipcode

In [10]:
gatlinburg_id

79116

In [11]:
# Turn this Into Function

# Market Search

def get_city_id(city_name: str) -> str:
    
    endpoint = "market/search"
    url = HOST + endpoint

    params = {
        "access_token": AIRDNA_TOKEN,
        "term": str(city_name)
    }


    response = requests.request("GET", url,  
                                params=params)

    # city_id
    city_id = response.json()['items'][0]['city']['id']
    
    return city_id

In [12]:
city_id = get_city_id("gatlinburg")
city_id

79116

## /MarketSummary

In [65]:
# MARKET SUMMARY
# {{HOST}}/client/{{VERSION}}/market/summary?
# access_token={{CLIENT_TOKEN}}&city_id=59380&region_id=12341&currency=usd

# PARAMS
# city_id
# region_id
# currency

In [66]:
# ---- API CALL ----
# API CALL - Market Summary - Get Info on Gatlinburg
endpoint = "market/summary"
url = HOST + endpoint

params = {
    "access_token": AIRDNA_TOKEN,
    "city_id": gatlinburg_id
}


response = requests.request("GET", url,  
                            params=params)
# ---- API CALL ----

In [67]:
# Jasonify Results
json = response.json()

### Parcing Through Available Data at /MarketSummary Enpoint

In [68]:
# Data Frame Dictionary
dfd = {}

In [69]:
# Market - Summary - Request_Info
# COLS: city_id
city_id = json['request_info']['city_id']
dfd.update({'city_id': city_id})
city_id

79116

In [70]:
# Market - Summary - Data - Calendar Months
# adr -> Average Daily Rate
# COLS: occ, adr, revenue
ent_stats = json['data']['calendar_months']['room_type']['entire_place']
ent_stats
ent_stats_dict = {
    'occ': ent_stats['occ']['50th_percentile'],
    'adr': ent_stats['adr']['50th_percentile'],
    'revenue': ent_stats['revenue']['50th_percentile']
                 }
dfd.update(ent_stats_dict)
ent_stats

{'occ': {'50th_percentile': 0.741935483870968},
 'adr': {'50th_percentile': 262.958333333333},
 'revenue': {'50th_percentile': 4743.5}}

In [71]:
# Market - Summary - Data - Host_Info - Hosts
# COLS: total_hosts, superhosts, multi_unit_hosts, single_unit_hosts
host_counts = json['data']['host_info']['hosts']
dfd.update(host_counts)
host_counts

{'total_hosts': 641,
 'superhosts': 340,
 'multi_unit_hosts': 198,
 'single_unit_hosts': 443}

In [72]:
# Market - Summary - Data - Host_Info - Host_Properties
# COLS: multi_host_properties, total_properties, single_host_properties
property_counts = json['data']['host_info']['host_properties']
dfd.update(property_counts)
property_counts

{'multi_host_properties': 3106,
 'total_properties': 3549,
 'single_host_properties': 443}

In [73]:
# Market - Summary - Data - Rental_Activity - Available
# COLS:
# TODO: ?- What is '10-12', '1-3', and such? -?
# They are either length-of-stay or they are n-people
available = json['data']['rental_activity']['available']
available_dict = {
    'available_10_12': available['10-12'],
    'available_1_3': available['1-3'],
    'available_7_9': available['7-9'],
    'available_4_6': available['4-6']
                 }

dfd.update(available_dict)
available

{'10-12': 2055, '1-3': 1065, '7-9': 668, '4-6': 816}

In [74]:
# Market - Summary - Data - Rental_Activity - Booked
# COLS:
# TODO: ?- What is '10-12', '1-3', and such? -?
# They are either length-of-stay or they are n-people
booked = json['data']['rental_activity']['booked']
booked_dict = {
    'booked_10_12': booked['10-12'],
    'booked_1_3': booked['1-3'],
    'booked_7_9': booked['7-9'],
    'booked_4_6': booked['4-6']
                 }
dfd.update(booked_dict)
booked

{'10-12': 435, '1-3': 1763, '7-9': 1231, '4-6': 1175}

In [75]:
# Market - Summary - Data - Rental Counts - Counts - Private_Room
# COLS: n_private_rooms
n_private_rooms = json['data']['rental_counts']['counts']['private_room']['all']
dfd.update({'n_private_rooms': n_private_rooms})
json['data']['rental_counts']['counts']['private_room']

{'0': 14.0, '1': 72.0, '2': 15.0, '3': 2.0, 'all': 103.0}

In [76]:
# Market - Summary - Data - Rental Counts - Counts - Entire Place by n_rooms
# COLS: rms0_rntl_cnt, rms1_rntl_cnt, rms3_rntl_cnt
#       rms4_rntl_cnt, rms5plus_rntl_cnt, n_entire_place
count_by_n_rooms = json['data']['rental_counts']['counts']['entire_place']
count_by_n_rooms_dict = {
    'n_rooms_0': count_by_n_rooms['0'],
    'n_rooms_1': count_by_n_rooms['1'],
    'n_rooms_2': count_by_n_rooms['2'],
    'n_rooms_3': count_by_n_rooms['3'],
    'n_rooms_4': count_by_n_rooms['4'],
    'n_rooms_5plus': count_by_n_rooms['5'],
    'tot_count_entire_place': count_by_n_rooms['all'],
                        }

dfd.update(count_by_n_rooms_dict)
count_by_n_rooms

{'0': 113.0,
 '1': 816.0,
 '2': 1214.0,
 '3': 642.0,
 '4': 317.0,
 '5': 343.0,
 'all': 3445.0}

In [77]:
# Market - Summary - Data - Rental Counts - Average
# COLS: average_n_bedrooms, average_accomodates
average = json['data']['rental_counts']['average']
average_dict = {
    'avg_n_rooms': average['bedrooms'],
    'avg_accommodates': average['accommodates']
}
dfd.update(average_dict)

In [78]:
dfd

{'city_id': 79116,
 'occ': 0.741935483870968,
 'adr': 262.958333333333,
 'revenue': 4743.5,
 'total_hosts': 641,
 'superhosts': 340,
 'multi_unit_hosts': 198,
 'single_unit_hosts': 443,
 'multi_host_properties': 3106,
 'total_properties': 3549,
 'single_host_properties': 443,
 'available_10_12': 2055,
 'available_1_3': 1065,
 'available_7_9': 668,
 'available_4_6': 816,
 'booked_10_12': 435,
 'booked_1_3': 1763,
 'booked_7_9': 1231,
 'booked_4_6': 1175,
 'n_private_rooms': 103.0,
 'n_rooms_0': 113.0,
 'n_rooms_1': 816.0,
 'n_rooms_2': 1214.0,
 'n_rooms_3': 642.0,
 'n_rooms_4': 317.0,
 'n_rooms_5plus': 343.0,
 'tot_count_entire_place': 3445.0,
 'avg_n_rooms': 2.5,
 'avg_accommodates': 7.6}

In [79]:
import pandas as pd

In [82]:
gat_df_row = pd.Series(dfd)
gat_df_row

city_id                   79116.000000
occ                           0.741935
adr                         262.958333
revenue                    4743.500000
total_hosts                 641.000000
superhosts                  340.000000
multi_unit_hosts            198.000000
single_unit_hosts           443.000000
multi_host_properties      3106.000000
total_properties           3549.000000
single_host_properties      443.000000
available_10_12            2055.000000
available_1_3              1065.000000
available_7_9               668.000000
available_4_6               816.000000
booked_10_12                435.000000
booked_1_3                 1763.000000
booked_7_9                 1231.000000
booked_4_6                 1175.000000
n_private_rooms             103.000000
n_rooms_0                   113.000000
n_rooms_1                   816.000000
n_rooms_2                  1214.000000
n_rooms_3                   642.000000
n_rooms_4                   317.000000
n_rooms_5plus            

In [103]:
# Connect to DB
import psycopg2
import os
from dotenv import load_dotenv
import os


load_dotenv()
RDS_HOSTNAME = os.getenv("RDS_HOSTNAME")
RDS_PORT = os.getenv("RDS_PORT")
RDS_DB_NAME = os.getenv("RDS_DB_NAME")
RDS_USERNAME = os.getenv("RDS_USERNAME")
RDS_PASSWORD = os.getenv("RDS_PASSWORD")

conn = psycopg2.connect(
    dbname=RDS_DB_NAME,
    user=RDS_USERNAME,
    password=RDS_PASSWORD,
    host=RDS_HOSTNAME,
    port=RDS_PORT
    )

In [108]:
conn.close()

In [104]:
curs = conn.cursor()

In [105]:
VALUES = str(tuple(gat_df_row.values))
Q_INSERT_COL_NAMES = """
INSERT INTO market_summary (
city_id, occ, adr, revenue,
total_hosts, superhosts, 
multi_unit_hosts, single_unit_hosts,
multi_host_properties, total_properties,
single_host_properties, available_10_12,
available_1_3, available_7_9, available_4_6,
booked_10_12, booked_1_3, booked_7_9, 
booked_4_6, n_private_rooms, n_rooms_0,
n_rooms_1, n_rooms_2, n_rooms_3, n_rooms_4,
n_rooms_5plus, tot_count_entire_place,
avg_n_rooms, avg_accommodates
)
VALUES """
Q_INSERT_FINAL = Q_INSERT_COL_NAMES + VALUES + ";"

In [106]:
curs.execute(Q_INSERT_FINAL)

In [107]:
conn.commit()

# Listing Data

## /ComparableProperties

In [421]:
# ---- API CALL ----
# API CALL - Comparable Properties

# Get City ID
gatlinburg_id = get_city_id("gatlinburg")

endpoint = "market/property/list"
url = HOST + endpoint

params = {
    "access_token": AIRDNA_TOKEN,
    "city_id": gatlinburg_id,
    "order": "revenue",
    "room_types": "entire_home",
    "show_amenities": "True",
    "show_images": "True",
    "show_location": "True"
}


response = requests.request("GET", url,  
                            params=params)

comp_list = response.json()

# ---- API CALL ----

In [479]:
# Includes 3 Components from the Top:
# Request Info -> Lists out the Request Parameters
# Properties -> 25 Properties
# Area Info -> Name and Location of City
comp_list = response.json()

In [480]:
# ?- How are these 25 chosen? -?
# Answer: I think it is by most recently scraped entries.
len(comp_list['properties'])

25

In [481]:
# Area Info -> Name and Location of City
# Market - Property - List - Area_Info
# COLS: state, city
# comp_list['properties']

In [2]:
comp_list = response.json()
df = pd.DataFrame(comp_list['properties'])
df.shape

NameError: name 'response' is not defined

In [539]:
# df.insert(3, 'new', new)
area_ids = [str(comp_list['area_info']['geom']['id'])] * len(comp_list['properties'])
df.insert(43, 'area_id', area_ids)
zip_codes = [d['zipcode'] for d in df['location']]
df.insert(44, 'zip_code', zip_codes)
city_states = [f"{d['city']}, {d['state']}" for d in df['location']]
df.insert(45, 'city_state', city_states)
location = [str(i) for i in df['location']]
df.shape

(25, 46)

In [540]:
# # Reformat Bool Cols
# assert type(df['instant_book'][0]) == numpy.bool_
# assert type(df['business_ready'][0]) == numpy.bool_
# assert type(df['superhost'][0]) == numpy.bool_

# df['instant_book'] = [str(b).lower() for b in df['instant_book']]
# df['business_ready'] = [str(b).lower() for b in df['business_ready']]
# df['superhost'] = [str(b).lower() for b in df['superhost']]

# assert type(df['instant_book'][0]) == str
# assert type(df['business_ready'][0]) == str
# assert type(df['superhost'][0]) == str

In [541]:
type(df['rating_overall'][0])

numpy.float64

In [586]:
# Connect to DB
import psycopg2
import os
from dotenv import load_dotenv
import os


load_dotenv()
RDS_HOSTNAME = os.getenv("RDS_HOSTNAME")
RDS_PORT = os.getenv("RDS_PORT")
RDS_DB_NAME = os.getenv("RDS_DB_NAME")
RDS_USERNAME = os.getenv("RDS_USERNAME")
RDS_PASSWORD = os.getenv("RDS_PASSWORD")

conn = psycopg2.connect(
    dbname=RDS_DB_NAME,
    user=RDS_USERNAME,
    password=RDS_PASSWORD,
    host=RDS_HOSTNAME,
    port=RDS_PORT
    )
curs = conn.cursor()
data_list = [tuple(row) for row in df.itertuples(index=False)] 

In [587]:
for row in data_list:    
    Q_INSERT_STATEMENT = f"""
        INSERT INTO comp_props (
        instant_book, rating_overall, bathrooms,
        revenue, listing_url, longitude, 
        extra_person_charge, property_type, listed_dt, 
        price_monthly, reviews, check_in, 
        airbnb_host_id, business_ready, price_weekly,
        occ, days_r_ltm, scraped_dt, 
        img_prop, amenities, superhost,
        price_nightly, num_res_ltm, img_count, 
        security_deposit, airbnb_property_id, bedrooms, 
        days_a_ltm, latitude, minimum_stay,
        adr, check_out, response_time,
        days_b_lmt, last_calendar_update, title, 
        loc, accommodates, room_type,  
        img_cover, cleaning_fee, response_rate, 
        cancellation. area_id, zip_code,
        city_state)
        VALUES ({str(row[0]).lower()}, {float(row[1])}, {float(row[2])}, 
                {float(row[3])}, {str(row[4])}, {float(row[5])}, 
                {float(row[6])}, {str(row[7])}, {str(row[8])},           
                {float(row[9])}, {int(row[10])}, '{str(row[11])}',          
                {int(row[12])}, {str(row[13]).lower()}, {float(row[14])}, 
                {float(row[15])}, {int(row[16])}, {str(row[17])}, 
                ARRAY {str(row[18])}, ARRAY {str(row[19])}, '{str(row[20]).lower}', 
                {float(row[21])}, {int(row[22])}, {int(row[23])}, 
                {float(row[24])}, {int(row[25])}, {float(row[26])},
                {int(row[27])}, {float(row[28])}, {int(row[29])},  
                {float(row[30])}, '{str(row[31])}', {float(row[32])}, 
                {int(row[33])}, {str(row[34])}, '{str(row[35])}',
                "{str(row[36])}", {int(row[37])}, {str(row[38])},
                '{str(row[39])}', {float(row[40])}, {float(row[41])},     
                {str(row[42])}, "{str(row[43])}", {int(row[44])},
                {str(row[45])});
        """
    
    curs.execute(Q_INSERT_STATEMENT)

UndefinedColumn: column "house" does not exist
LINE 21:                 'nan', House, 2019-07-15,           
                                ^


In [560]:
#conn.commit()

In [1]:
conn.close()

NameError: name 'conn' is not defined

In [511]:
# a = 45
# b = a + 3
# print(f'--{a}-{a+1}-{b-1}--')
# print(df.dtypes[a:b])
# print('--------------')
# print(data_list[0][a:b])

In [514]:
for i, value in enumerate(data_list[0]):
    print(i)
    print(value)

0
True
1
7.8
2
7.0
3
287435
4
https://www.airbnb.com/rooms/36135973
5
-83.46441
6
nan
7
House
8
2019-07-15
9
nan
10
23
11
After 4:00 PM
12
225787808
13
False
14
nan
15
0.736695
16
263
17
2021-01-18
18
['https://a0.muscache.com/im/pictures/397cac0a-2553-4b12-8725-6339f380642f.jpg', 'https://a0.muscache.com/im/pictures/4fd0f059-244e-4e58-9542-2c8f544c7e08.jpg', 'https://a0.muscache.com/im/pictures/46d38823-df76-4624-8162-e56066ded0d8.jpg', 'https://a0.muscache.com/im/pictures/c6ef99ba-8f5b-4b45-81b7-e9dbb43c99e9.jpg', 'https://a0.muscache.com/im/pictures/62a0b984-f3e4-4939-8e19-ff9a35545f0c.jpg', 'https://a0.muscache.com/im/pictures/35787ed3-2c09-4c6d-99da-1ce5bbbaf368.jpg', 'https://a0.muscache.com/im/pictures/8fde0455-debf-4ba8-927c-44a119c73a10.jpg', 'https://a0.muscache.com/im/pictures/4810c511-f738-4f6f-8185-d2e1d65aa41b.jpg', 'https://a0.muscache.com/im/pictures/77566077-4bf4-46e6-8e1a-50ab1276d68d.jpg', 'https://a0.muscache.com/im/pictures/b83667d3-a9c7-4982-934b-fe19dc42a934.jpg'