In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import folium
import numpy as np
import seaborn as sns
import os
import fastparquet
import warnings
import geopy
from geopy.point import Point
import time
from pandas.core.common import SettingWithCopyWarning
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

#df = pd.read_parquet('Downloads/airbnb_parquets')
#df.to_csv('csv_printout.csv')

In [2]:
def append_fig_to_html(list_of_figs):
    for fig in list_of_figs:
        with open("reports/report_draft.html",'a') as f:
            f.write(fig.to_html(full_html=False, include_plotlyjs='cdn'))

# write this later
"""
def overwrite_html(list_of_figs):
    
    with open("reports/report_draft.html",'w') as f:
    f.write(listing_fig.to_html(full_html=False, include_plotlyjs='cdn'))
    f.write(price_fig.to_html(full_html=False, include_plotlyjs='cdn'))
    f.write(price_v_numlist_fig.to_html(full_html=False, include_plotlyjs='cdn'))
"""

def calculate_mortgage(home_value, interest_rate, num_years, down_payment_pct=0):
    if home_value is None:
        return None
    
    per_payment_interest = 0
    loan_value = 0


    if down_payment_pct >= 1:
        down_payment = down_payment_pct/100 * home_value
        loan_value = home_value - down_payment
    else:
        down_payment = down_payment_pct * home_value
        loan_value = home_value - down_payment
    
    if loan_value/home_value < 0.80: 
        # insert pmi calc here
        pmi = 0.01  # using near average value here
        pmi_cost = 0.0007*home_value    # shot in the dark after interpolating nerdwallet calculator
        if interest_rate >= 1:
            per_payment_interest = interest_rate/100/12
        else:
            per_payment_interest = interest_rate/12
        num_months = num_years*12
        mortgage = loan_value*(per_payment_interest*(1+per_payment_interest)**num_months)/((1+per_payment_interest)**num_months-1) + pmi_cost

        mortgage = np.round(mortgage, 2)
        return mortgage
    else:
        if interest_rate >= 1:
            per_payment_interest = interest_rate/100/12
        else:
            per_payment_interest = interest_rate/12
        num_months = num_years*12
        mortgage = loan_value*(per_payment_interest*(1+per_payment_interest)**num_months)/((1+per_payment_interest)**num_months-1)

        mortgage = np.round(mortgage, 2)
        return mortgage

def calculate_roi(airbnb_daily_price, occupancy_rate, monthly_mortgage, monthly_maintenence=0, monthly_taxes=0):
    gross_rev = airbnb_daily_price * occupancy_rate * 365/12
    net_rev = gross_rev - monthly_maintenence - monthly_taxes
    profit = net_rev - monthly_mortgage
    roi = profit/monthly_mortgage
    roi = roi
    return roi

def set_location_desc(lat,long,geolocator):
    lat = str(lat)
    long = str(long)
    location = geolocator.reverse(lat+","+long)
    address = location.raw['address']
    city = address.get('city', '')
    state = address.get('state', '')
    country = address.get('country')
    country_code = address.get('country_code')
    zipcode = address.get('postcode', '')
    return city, state, country, country_code, zipcode

def set_city(lat, long, geolocator):
    start_time = time.time()
    lat = str(lat)
    print("---Latitude casted to string at %s seconds ---" % (time.time() - start_time))

    long = str(long)
    print("---Longitude casted at %s seconds ---" % (time.time() - start_time))

    location = geolocator.reverse(lat+","+long)
    print("---Get location json from geopy at %s seconds ---" % (time.time() - start_time))

    address = location.raw['address']
    print("---Get address json at %s seconds ---" % (time.time() - start_time))

    city = address.get('city', '')
    print("---Get city value at %s seconds ---" % (time.time() - start_time))
    return city

def set_state(lat, long):
    from geopy.geocoders import Nominatim
    geolocator = Nominatim(user_agent="geoapiExercises")
    lat = str(lat)
    long = str(long)
    location = geolocator.reverse(Point(lat,long))
    address = location.raw['address']
    state = address.get('state', '')
    return state

def set_country(lat, long):
    from geopy.geocoders import Nominatim
    geolocator = Nominatim(user_agent="geoapiExercises")
    lat = str(lat)
    long = str(long)
    location = geolocator.reverse(Point(lat,long))
    address = location.raw['address']
    country = address.get('country')
    return country

def set_country_code(lat, long):
    from geopy.geocoders import Nominatim
    geolocator = Nominatim(user_agent="geoapiExercises")
    lat = str(lat)
    long = str(long)
    location = geolocator.reverse(lat+","+long)
    address = location.raw['address']
    country_code = address.get('country_code')
    return country_code

def set_zipcode(lat, long):
    from geopy.geocoders import Nominatim
    geolocator = Nominatim(user_agent="geoapiExercises")
    lat = str(lat)
    long = str(long)
    location = geolocator.reverse(Point(lat,long))
    address = location.raw['address']
    zipcode = address.get('postcode', '')
    return zipcode

def set_interpolated_state(top_lat,bottom_lat,left_long,right_long):
    lat = (top_lat+bottom_lat)/2
    long = (left_long+right_long)/2
    state = set_state(lat, long)
    return state


def set_avg_home_val(home_values, city, state, num_beds):
    if num_beds <= 5:
        desired_row = home_values[(home_values['RegionName'] == city) & (home_values['State'] == state) & (home_values['num_beds'] == num_beds)]
        avg_value = desired_row['2022-04-30']
        if len(avg_value) == 0:
            return None
        else:
            return avg_value.iloc[0]
    else:
        desired_row = home_values[(home_values['RegionName'] == city) & (home_values['State'] == state) & (home_values['num_beds'] == 5)]
        avg_value = desired_row['2022-04-30']
        if len(avg_value) == 0:
            return None
        else:
            return avg_value.iloc[0]*num_beds/5  # <-- improve this with linear regression later on

def list_options_for_dash(df_series):
    options = []
    value = 0
    for i in df_series:
        if value == 0:
            value = i
        town = {'label':i, 'value':i}
        dict_copy = town.copy()
        options.append(dict_copy)
    return options, value


In [176]:
listing_data_static['state_id'].head()

0    SC
1    SC
2    SC
3    SC
4    SC
Name: state_id, dtype: object

In [3]:
# Load in listing data
nc_dir = 'C:/Users/mattg/Desktop/Hobbies/airbnb_reports/bucket_data/listings/north_carolina'
first_file = ''
for file in os.listdir(nc_dir):
    first_file = os.path.join(nc_dir,file)
    break
listing_data = pd.read_parquet(first_file)
print(listing_data.shape)
for file in os.listdir(nc_dir):
    next_file = os.path.join(nc_dir,file)
    if next_file != first_file:
        next_listing = pd.read_parquet(next_file)
        listing_data = listing_data.append(next_listing)
print(listing_data.shape)

ne_dir = 'C:/Users/mattg/Desktop/Hobbies/airbnb_reports/bucket_data/listings/vt_nh'
for file in os.listdir(ne_dir):
    next_file = os.path.join(ne_dir,file)
    next_listing = pd.read_parquet(next_file)
    listing_data = listing_data.append(next_listing)
print(listing_data.shape)

miami_dir = 'C:/Users/mattg/Desktop/Hobbies/airbnb_reports/bucket_data/listings/miami'
for file in os.listdir(miami_dir):
    next_file = os.path.join(miami_dir,file)
    next_listing = pd.read_parquet(next_file)
    listing_data = listing_data.append(next_listing)
print(listing_data.shape)

# Load in occupancy data
occ_data = pd.read_parquet('C:/Users/mattg/Desktop/Hobbies/airbnb_reports/bucket_data/occupancy')

# Load in price data
price_data = pd.read_parquet('C:/Users/mattg/Desktop/Hobbies/airbnb_reports/bucket_data/prices')

(1, 25)
(2468, 25)
(4224, 25)
(5535, 25)


In [103]:
top_lats = listing_data.top_lat.unique()
bottom_lats = listing_data.bottom_lat.unique()
left_longs = listing_data.left_long.unique()
right_longs = listing_data.right_long.unique()
partitions = {'top_lat': top_lats, 'bottom_lat': bottom_lats, 'left_long': left_longs, 'right_long': right_longs}
partition_df = pd.DataFrame(data=partitions)

ValueError: All arrays must be of the same length

In [106]:
# Clean up listing_data
conditions = [
    listing_data['baths'].str.contains('shared', na=False),
    listing_data['baths'].str.contains('Shared', na=False)
]

values = ['Shared', 'Shared']

listing_data['baths_type'] = np.select(conditions, values, default='Private')

conditions_loc = [
    listing_data['top_lat'] > 35.5,
    listing_data['top_lat'] < 28
]

values_loc = ['New England', 'Miami']

listing_data['Region'] = np.select(conditions_loc, values_loc, default='Carolinas')


conditions_halfbath = [
    listing_data['baths'].str.contains('Half-bath', na=False),
    listing_data['baths'].str.contains('Shared half-bath', na=False),
    listing_data['baths'].str.contains('Private half-bath', na=False)
]

values_halfbath = [0.5,0.5,0.5]

listing_data['baths'] = np.select(conditions_halfbath, values_halfbath, default=listing_data['baths'])

listing_data['baths_no'] = listing_data['baths'].str.split(' ').str[0]
listing_data['beds_no'] = listing_data['beds'].str.split(' ').str[0]
listing_data['guest_no'] = listing_data['title'].str.split(' ').str[0]
listing_data['guest_no'] = listing_data['guest_no'].astype('float')
listing_data['id'] = listing_data['id'].astype('float64')
listing_data['id'] = listing_data['id'].astype('str')
listing_data['town'] = listing_data['town'].str.lower()
listing_data = listing_data[listing_data.lat.isna() == False]
#listing_data['city_geopy'] = listing_data.apply(lambda row: set_city(row['lat'], row['lng']), axis=1)
#listing_data['state_geopy'] = listing_data.apply(lambda row: set_state(row['lat'], row['lng']), axis=1)
#listing_data['country_geopy'] = listing_data.apply(lambda row: set_country(row['lat'], row['lng']), axis=1)
#listing_data['country_code_geopy'] = listing_data.apply(lambda row: set_country_code(row['lat'], row['lng']), axis=1)
#listing_data['zipcode_geopy'] = listing_data.apply(lambda row: set_zipcode(row['lat'], row['lng']), axis=1)

# Clean up occ_data
#occ_data['id'] = occ_data['id'].astype('int')
occ_data['id'] = occ_data['id'].astype('str')

# Clean up price_data
#price_data['id'] = price_data['id'].astype('int')
price_data['id'] = price_data['id'].astype('str')

In [107]:
start_time = time.time()
listing_data['state'] = listing_data.apply(lambda row: set_state(row['lat'], row['lng']), axis=1)
end_time = time.time()
print('Time to run was ' + str(start_time-end_time))

Time to run was -1388.3272874355316


In [109]:
listing_data.to_csv('listings_w_states.csv')

In [160]:
listing_data_static = pd.read_csv('listings_w_states.csv')
city_data = pd.read_csv('resource_data/uscities.csv')
state_convert = city_data[['state_name','state_id']]
state_convert = state_convert.drop_duplicates()
listing_data_static = listing_data_static.merge(state_convert, left_on='state', right_on='state_name')
#home_value = pd.read_csv('resource_data/home_value_zillow.csv')
home_value_1bed = pd.read_csv('resource_data/zillow_city_onebed.csv')
home_value_2bed = pd.read_csv('resource_data/zillow_city_twobed.csv')
home_value_3bed = pd.read_csv('resource_data/zillow_city_threebed.csv')
home_value_4bed = pd.read_csv('resource_data/zillow_city_fourbed.csv')
home_value_5plusbed = pd.read_csv('resource_data/zillow_city_fiveplusbed.csv')

home_value_1bed['RegionName'] = home_value_1bed['RegionName'].str.lower()
home_value_1bed_reduced = home_value_1bed[['RegionName','CountyName', 'Metro', 'StateName','State', '2022-04-30']]
home_value_1bed_reduced['num_beds'] = 1

home_value_2bed['RegionName'] = home_value_2bed['RegionName'].str.lower()
home_value_2bed_reduced = home_value_2bed[['RegionName','CountyName', 'Metro', 'StateName','State', '2022-04-30']]
home_value_2bed_reduced['num_beds'] = 2

home_value_3bed['RegionName'] = home_value_3bed['RegionName'].str.lower()
home_value_3bed_reduced = home_value_3bed[['RegionName','CountyName', 'Metro', 'StateName','State', '2022-04-30']]
home_value_3bed_reduced['num_beds'] = 3

home_value_4bed['RegionName'] = home_value_4bed['RegionName'].str.lower()
home_value_4bed_reduced = home_value_4bed[['RegionName','CountyName', 'Metro', 'StateName','State', '2022-04-30']]
home_value_4bed_reduced['num_beds'] = 4

home_value_5plusbed['RegionName'] = home_value_5plusbed['RegionName'].str.lower()
home_value_5plusbed_reduced = home_value_5plusbed[['RegionName','CountyName', 'Metro', 'StateName','State', '2022-04-30']]
home_value_5plusbed_reduced['num_beds'] = 5

home_values = home_value_1bed_reduced.append(home_value_2bed_reduced)
home_values = home_values.append(home_value_3bed_reduced)
home_values = home_values.append(home_value_4bed_reduced)
home_values = home_values.append(home_value_5plusbed_reduced)

home_values_lookup = home_values[['RegionName','State', 'num_beds', '2022-04-30']]
home_values_lookup = home_values_lookup.drop_duplicates()
#listing_data_static = listing_data_static.merge(home_values, how='inner', left_on = ['state_id','beds_no'], right_on=['State','num_beds'])  # <-- Join to give avg house value for bedroom size
#home_values = [['RegionName', 'CountyName', 'Metro', 'State', 'state_name', '2022-04-30', 'num_beds']] 

In [252]:
listing_data_static['avg_home_value'] = listing_data_static.apply(lambda row: set_avg_home_val(home_values, row['town'], row['state_id'], row['beds_no']),axis=1)
#lambda row: set_avg_home_val(home_values, listing_data_static['town'], listing_data_static['state_id'], listing_data_static['beds_no'], axis=1)

In [228]:
value = set_avg_home_val(home_values, 'clinton', 'NC', 2)


2122    87152.0
Name: 2022-04-30, dtype: float64


In [240]:
listing_data_static.head(5)

Unnamed: 0.1,Unnamed: 0,index,id,name,price,displayPrice,monthlyPriceFactor,weeklyPriceFactor,avgRating,reviewsCount,...,url,baths_type,Region,baths_no,beds_no,guest_no,state,state_name,state_id,avg_home_value
0,0,0,6.303498e+17,Pawleys Island Retreat,100.0,$100,1.0,1.0,,0.0,...,https://www.airbnb.com/s/homes?refinement_path...,Private,Carolinas,3.0,5.0,8.0,South Carolina,South Carolina,SC,1019368.0
1,1,1,5.744629e+17,NEW! Pawleys Paradise Family Beach Home w/Golf...,492.0,$492,0.8,0.86,5.0,8.0,...,https://www.airbnb.com/s/homes?refinement_path...,Private,Carolinas,3.5,8.0,14.0,South Carolina,South Carolina,SC,1630988.8
2,2,2,36442400.0,Peaceful home nestled on golf course,281.0,$281,1.0,0.95,4.64,28.0,...,https://www.airbnb.com/s/homes?refinement_path...,Private,Carolinas,2.0,5.0,8.0,South Carolina,South Carolina,SC,1019368.0
3,3,3,52108690.0,"Golf, Beach and Pool! Open 3Bd 2Bath in True Blue",185.0,$185,0.8,0.9,4.92,13.0,...,https://www.airbnb.com/s/homes?refinement_path...,Private,Carolinas,2.0,6.0,7.0,South Carolina,South Carolina,SC,1223241.6
4,4,4,49887130.0,Hampton,164.0,$164,1.0,1.0,5.0,6.0,...,https://www.airbnb.com/s/homes?refinement_path...,Private,Carolinas,1.0,1.0,2.0,South Carolina,South Carolina,SC,205198.0


In [253]:
listing_data = listing_data_static
listing_data['id'] = listing_data['id'].astype('str')

occ_rate = occ_data.groupby('id')['available'].apply(lambda row: np.sum(row)/len(row))
combined_data = listing_data.merge(occ_rate, on = 'id')
combined_data.rename(columns = {'available':'occupancy_rate'}, inplace = True)

cleaning_fee = price_data.groupby(['id'])['cleaning_fee'].median().reset_index()
cleaning_fee.rename(columns = {'cleaning_fee':'median_cleaning_fee'}, inplace = True)
service_fee = price_data.groupby(['id'])['service_fee'].median().reset_index()
service_fee.rename(columns = {'service_fee':'median_service_fee'}, inplace = True)
combined_data = combined_data.merge(cleaning_fee, on='id')
combined_data = combined_data.merge(service_fee, on='id')
combined_data['median_total_price'] = combined_data['price'] + combined_data['median_cleaning_fee'] + combined_data['median_service_fee']

#home_value['RegionName'] = home_value['RegionName'].str.lower()
#home_value_reduced = home_value[['RegionName','CountyName', 'Metro', 'State', '2022-04-30']]
#combined_data = combined_data.merge(home_value, left_on='town', right_on='RegionName')


In [255]:
# Adding avg mortgage and median ROI
combined_data['avg_30_yr_mort'] = combined_data.apply(lambda row: calculate_mortgage(row['avg_home_value'], 5, 30), axis=1)
combined_data['median_ROI'] = combined_data.apply(lambda row: calculate_roi(row['median_total_price'], row['occupancy_rate'], row['avg_30_yr_mort']), axis=1)

In [257]:
combined_data.head(5)

Unnamed: 0.1,Unnamed: 0,index,id,name,price,displayPrice,monthlyPriceFactor,weeklyPriceFactor,avgRating,reviewsCount,...,state,state_name,state_id,avg_home_value,occupancy_rate,median_cleaning_fee,median_service_fee,median_total_price,avg_30_yr_mort,median_ROI
0,6,6,41953607.0,"DropAnchor, 3BR/2BA Creek Front Beach House",100.0,$100,1.0,1.0,,1.0,...,South Carolina,South Carolina,SC,445710.0,0.384715,180.0,55.0,335.0,2392.67,0.638374
1,13,13,23525446.0,Pawleys Island- Beautiful True Blue Golf Condo,175.0,$175,0.8,0.9,4.82,50.0,...,South Carolina,South Carolina,SC,628440.0,0.042826,185.0,51.0,411.0,3373.6,-0.841303
2,1,1,2095742.0,True Blue Golf and Tennis Resort,125.0,$125,0.48,0.8,4.95,22.0,...,South Carolina,South Carolina,SC,628440.0,0.473973,100.0,35.0,260.0,3373.6,0.111078
3,10,10,19143185.0,Relaxing family beach house in gated commuity,175.0,$175,1.0,1.0,4.91,32.0,...,South Carolina,South Carolina,SC,510785.0,0.041096,150.0,42.0,367.0,2742.0,-0.832695
4,16,16,38490169.0,"Oceanfront, Incredible Water's Edge Views with...",203.0,$203,0.83,1.0,,2.0,...,South Carolina,South Carolina,SC,229353.0,0.658544,170.0,60.0,433.0,1231.22,6.044471


In [None]:
# Table for region and property type
region_proptype_roi = combined_data.groupby(['Region','property_type'])['median_ROI'].median().reset_index()

In [42]:
len(listing_data.id.unique())

2420

In [53]:
combined_data_grouped.head(5)

Unnamed: 0,index,id,name,price,displayPrice,monthlyPriceFactor,weeklyPriceFactor,avgRating,reviewsCount,isNewListing,...,2021-09-30,2021-10-31,2021-11-30,2021-12-31,2022-01-31,2022-02-28,2022-03-31,2022-04-30,avg_30_yr_mort,median_ROI
0,6,41953607.0,"DropAnchor, 3BR/2BA Creek Front Beach House",100.0,$100,1.00,1.0,,1.0,0.0,...,394175.0,404367.0,413364.0,422399.0,430296.0,437008.0,445186.0,454096.0,2437.69,0.608116
1,13,23525446.0,Pawleys Island- Beautiful True Blue Golf Condo,175.0,$175,0.80,0.9,4.82,50.0,0.0,...,394175.0,404367.0,413364.0,422399.0,430296.0,437008.0,445186.0,454096.0,2437.69,-0.780373
2,1,2095742.0,True Blue Golf and Tennis Resort,125.0,$125,0.48,0.8,4.95,22.0,0.0,...,394175.0,404367.0,413364.0,422399.0,430296.0,437008.0,445186.0,454096.0,2437.69,0.537658
3,10,19143185.0,Relaxing family beach house in gated commuity,175.0,$175,1.00,1.0,4.91,32.0,0.0,...,309942.0,321135.0,329764.0,339075.0,347724.0,353249.0,358775.0,366214.0,1965.92,-0.766649
4,16,38490169.0,"Oceanfront, Incredible Water's Edge Views with...",203.0,$203,0.83,1.0,,2.0,0.0,...,309942.0,321135.0,329764.0,339075.0,347724.0,353249.0,358775.0,366214.0,1965.92,3.411824
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
574,1,18975805.0,Luxurious Town Home Steps to the Shore | * Dhalia,217.0,$217,0.80,0.9,4.81,37.0,0.0,...,401843.0,407483.0,413791.0,420294.0,428052.0,434398.0,442541.0,450764.0,2419.80,1.719785
575,1,30378679.0,MODERN AMAZING BEACH GETAWAY! RIGHT ON THE BEACH,95.0,$95,0.90,1.0,4.74,69.0,0.0,...,401843.0,407483.0,413791.0,420294.0,428052.0,434398.0,442541.0,450764.0,2419.80,0.251393
576,16,51669928.0,Stunning 1 BR Beachfront APT @ The Carillon Hotel,367.0,$367,0.95,1.0,4.96,27.0,0.0,...,401843.0,407483.0,413791.0,420294.0,428052.0,434398.0,442541.0,450764.0,2419.80,2.386977
577,13,48435649.0,Unit 102 Byron is a walking distance to the beach,134.0,$134,0.80,0.9,4.63,24.0,0.0,...,401843.0,407483.0,413791.0,420294.0,428052.0,434398.0,442541.0,450764.0,2419.80,2.801078


In [262]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

roi_subplots = make_subplots(rows=3, cols=2)

combined_data_vt = combined_data[(combined_data['state_id'] == 'VT')]
combined_data_nh = combined_data[(combined_data['state_id'] == 'NH')]
combined_data_nc = combined_data[(combined_data['state_id'] == 'NC')]
combined_data_sc = combined_data[(combined_data['state_id'] == 'SC')]
combined_data_fl = combined_data[(combined_data['state_id'] == 'FL')]
combined_data_me = combined_data[(combined_data['state_id'] == 'ME')]

combined_data_vt_mean_roi = combined_data_vt.groupby(['town'])['median_ROI'].mean().reset_index()
combined_data_nh_mean_roi = combined_data_nh.groupby(['town'])['median_ROI'].mean().reset_index()
combined_data_nc_mean_roi = combined_data_nc.groupby(['town'])['median_ROI'].mean().reset_index()
combined_data_sc_mean_roi = combined_data_sc.groupby(['town'])['median_ROI'].mean().reset_index()
combined_data_fl_mean_roi = combined_data_fl.groupby(['town'])['median_ROI'].mean().reset_index()
combined_data_me_mean_roi = combined_data_me.groupby(['town'])['median_ROI'].mean().reset_index()


roi_subplots.append_trace(
    go.Bar(x=combined_data_vt_mean_roi['town'],
    y=combined_data_vt_mean_roi['median_ROI'], name="VT"
), row=1, col=1)

roi_subplots.append_trace(go.Bar(
    x=combined_data_nh_mean_roi['town'],
    y=combined_data_nh_mean_roi['median_ROI'], name="NH"
), row=1, col=2)

roi_subplots.append_trace(go.Bar(
    x=combined_data_nc_mean_roi['town'],
    y=combined_data_nc_mean_roi['median_ROI'], name="NC"
), row=2, col=1)

roi_subplots.append_trace(go.Bar(
    x=combined_data_sc_mean_roi['town'],
    y=combined_data_sc_mean_roi['median_ROI'], name="SC"
), row=2, col=2)

roi_subplots.append_trace(go.Bar(
    x=combined_data_fl_mean_roi['town'],
    y=combined_data_fl_mean_roi['median_ROI'], name="FL"
), row=3, col=1)

roi_subplots.append_trace(go.Bar(
    x=combined_data_me_mean_roi['town'],
    y=combined_data_me_mean_roi['median_ROI'], name="ME"
), row=3, col=2)


roi_subplots.update_layout(height = 1100, title_text="Mean ROI for Cities in each State", barmode='group')
roi_subplots.show()

In [265]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

roi_guest_subplots = make_subplots(rows=3, cols=2)

combined_data_vt = combined_data[(combined_data['state_id'] == 'VT')]
combined_data_nh = combined_data[(combined_data['state_id'] == 'NH')]
combined_data_nc = combined_data[(combined_data['state_id'] == 'NC')]
combined_data_sc = combined_data[(combined_data['state_id'] == 'SC')]
combined_data_fl = combined_data[(combined_data['state_id'] == 'FL')]
combined_data_me = combined_data[(combined_data['state_id'] == 'ME')]

combined_data_vt_mean_roi_guest = combined_data_vt.groupby(['guest_no'])['median_ROI'].mean().reset_index()
combined_data_nh_mean_roi_guest = combined_data_nh.groupby(['guest_no'])['median_ROI'].mean().reset_index()
combined_data_nc_mean_roi_guest = combined_data_nc.groupby(['guest_no'])['median_ROI'].mean().reset_index()
combined_data_sc_mean_roi_guest = combined_data_sc.groupby(['guest_no'])['median_ROI'].mean().reset_index()
combined_data_fl_mean_roi_guest = combined_data_fl.groupby(['guest_no'])['median_ROI'].mean().reset_index()
combined_data_me_mean_roi_guest = combined_data_me.groupby(['guest_no'])['median_ROI'].mean().reset_index()


roi_guest_subplots.append_trace(
    go.Bar(x=combined_data_vt_mean_roi_guest['guest_no'],
    y=combined_data_vt_mean_roi_guest['median_ROI'], name="VT"
), row=1, col=1)

roi_guest_subplots.append_trace(go.Bar(
    x=combined_data_nh_mean_roi_guest['guest_no'],
    y=combined_data_nh_mean_roi_guest['median_ROI'], name="NH"
), row=1, col=2)

roi_guest_subplots.append_trace(go.Bar(
    x=combined_data_nc_mean_roi_guest['guest_no'],
    y=combined_data_nc_mean_roi_guest['median_ROI'], name="NC"
), row=2, col=1)

roi_guest_subplots.append_trace(go.Bar(
    x=combined_data_sc_mean_roi_guest['guest_no'],
    y=combined_data_sc_mean_roi_guest['median_ROI'], name="SC"
), row=2, col=2)

roi_guest_subplots.append_trace(go.Bar(
    x=combined_data_fl_mean_roi_guest['guest_no'],
    y=combined_data_fl_mean_roi_guest['median_ROI'], name="FL"
), row=3, col=1)

roi_guest_subplots.append_trace(go.Bar(
    x=combined_data_me_mean_roi_guest['guest_no'],
    y=combined_data_me_mean_roi_guest['median_ROI'], name="ME"
), row=3, col=2)


roi_guest_subplots.update_layout(height = 1100, title_text="Mean ROI by Guest Number in each State", barmode='group')
roi_guest_subplots.show()

In [266]:
append_fig_to_html([roi_guest_subplots])

In [68]:
combined_data_fl_guests = combined_data_fl.groupby(['guest_no'])['median_total_price'].median().reset_index()
combined_data_fl_occ = combined_data_fl.groupby(['guest_no'])['occupancy_rate'].median().reset_index()

miami_fig = go.Figure(data=[
    go.Bar(name='Total Price', x=combined_data_fl_guests['guest_no'], y=combined_data_fl_guests['median_total_price'], yaxis='y', offsetgroup=1),
    go.Bar(name='Occupancy Rate', x=combined_data_fl_occ['guest_no'], y=combined_data_fl_occ['occupancy_rate'], yaxis='y2', offsetgroup=2),
],
    layout={
        'xaxis': {'title': '# of Guests'},
        'yaxis': {'title': 'Total Price'},
        'yaxis2': {'title': 'Occupancy Rate', 'overlaying': 'y', 'side': 'right'}
    }
)

# Change the bar mode
miami_fig.update_layout(title_text='Median Price and Occupancy by # of Guests in Miami', barmode='group')
miami_fig.show()

In [73]:


towns = combined_data_nc['town'].unique()
towns_for_dash = list_options_for_dash(towns)

In [None]:
lists_per_city = listing_data.groupby(['town'])['id'].count().reset_index()
lists_per_city.rename(columns = {'id':'num_lists_in_town'}, inplace = True)
median_price_per_city = listing_data.groupby(['town'])['price'].median().reset_index()
median_price_per_city.rename(columns = {'price':'median_price_per_city'}, inplace = True)

# Calculate occupancy rate
occ_rate = occ_data.groupby('id')['available'].apply(lambda row: np.sum(row)/len(row))
listing_occ_rate = listing_data.merge(occ_rate, on = 'id')


# Calculate pricing avgs
cleaning_fee = price_data.groupby(['id'])['cleaning_fee'].median().reset_index()
cleaning_fee.rename(columns = {'cleaning_fee':'median_cleaning_fee'}, inplace = True)
service_fee = price_data.groupby(['id'])['service_fee'].median().reset_index()
service_fee.rename(columns = {'service_fee':'median_service_fee'}, inplace = True)
#total_price = price_data.groupby(['id'])['total_price'].median().reset_index()
#total_price.rename(columns = {'total_price':'median_total_price'}, inplace = True)

listing_occ_rate_fees = listing_occ_rate.merge(cleaning_fee, on='id')
listing_occ_rate_fees = listing_occ_rate_fees.merge(service_fee, on='id')
#listing_occ_rate_fees = listing_occ_rate_fees.merge(total_price, on='id')

median_occ_rate_per_city = listing_occ_rate_fees.groupby(['town'])['available'].median().reset_index()
median_occ_rate_per_city.rename(columns = {'available':'occupancy_rate'}, inplace = True)

median_cleaning_fee_per_city = listing_occ_rate_fees.groupby(['town'])['median_cleaning_fee'].median().reset_index()
median_service_fee_per_city = listing_occ_rate_fees.groupby(['town'])['median_service_fee'].median().reset_index()
#median_total_price_per_city = listing_occ_rate_fees.groupby(['town'])['median_total_price'].median().reset_index()
#fees_and_price = cleaning_fee.merge(service_fee, on = 'id')
#fees_and_price = fees_and_price.merge(total_price, on = 'id')

lists_town_price = lists_per_city.merge(median_price_per_city, left_on = 'town', right_on = 'town')
lists_town_price = lists_town_price.merge(median_cleaning_fee_per_city, left_on = 'town', right_on = 'town')
lists_town_price = lists_town_price.merge(median_service_fee_per_city, left_on = 'town', right_on = 'town')
#lists_town_price = lists_town_price.merge(median_total_price_per_city, left_on = 'town', right_on = 'town')
lists_town_price['median_total_price'] = lists_town_price['median_price_per_city'] + lists_town_price['median_cleaning_fee'] + lists_town_price['median_service_fee']
lists_town_price_occ = lists_town_price.merge(median_occ_rate_per_city, left_on = 'town', right_on = 'town')
lists_town_home_price_occ = lists_town_price_occ.merge(city_home, left_on = 'town', right_on = 'city')

lists_town_home_price_occ_vt = lists_town_home_price_occ[(lists_town_home_price_occ['state_id'] == 'VT')]
lists_town_home_price_occ_nh = lists_town_home_price_occ[(lists_town_home_price_occ['state_id'] == 'NH')]
lists_town_home_price_occ_nc = lists_town_home_price_occ[(lists_town_home_price_occ['state_id'] == 'NC')]
lists_town_home_price_occ_sc = lists_town_home_price_occ[(lists_town_home_price_occ['state_id'] == 'SC')]

lists_town_home_price_occ_all = lists_town_home_price_occ_vt.append(lists_town_home_price_occ_nh)
lists_town_home_price_occ_all = lists_town_home_price_occ_all.append(lists_town_home_price_occ_nc)
lists_town_home_price_occ_all = lists_town_home_price_occ_all.append(lists_town_home_price_occ_sc)

lists_town_home_price_occ_all['avg_30_yr_mort'] = lists_town_home_price_occ_all.apply(lambda row: calculate_mortgage(row['2022-04-30'], 5, 30), axis=1)
lists_town_home_price_occ_all['median_ROI'] = lists_town_home_price_occ_all.apply(lambda row: calculate_roi(row['median_price_per_city'], row['occupancy_rate'], row['avg_30_yr_mort']), axis=1)

In [None]:
# grouping by town and guests

city_home['city'] = city_home['city'].str.lower()
#print("city home shape: " + str(city_home.shape))
lists_per_city_guest = listing_data.groupby(['town', 'guest_no'])['id'].count().reset_index()
lists_per_city_guest.rename(columns = {'id':'num_lists_in_town'}, inplace = True)
#print("lists_per_city_guest shape: " + str(lists_per_city_guest.shape))
median_price_per_city_guest = listing_data.groupby(['town', 'guest_no'])['price'].median().reset_index()
median_price_per_city_guest.rename(columns = {'price':'median_price_per_city'}, inplace = True)
#print("median_price_per_city_guest shape: " + str(median_price_per_city_guest.shape))


# Calculate occupancy rate
occ_rate = occ_data.groupby('id')['available'].apply(lambda row: np.sum(row)/len(row))
listing_occ_rate = listing_data.merge(occ_rate, on = 'id')
#print("listing_occ_rate shape: " + str(listing_occ_rate.shape))



# Calculate pricing avgs
cleaning_fee = price_data.groupby(['id'])['cleaning_fee'].median().reset_index()
cleaning_fee.rename(columns = {'cleaning_fee':'median_cleaning_fee'}, inplace = True)
service_fee = price_data.groupby(['id'])['service_fee'].median().reset_index()
service_fee.rename(columns = {'service_fee':'median_service_fee'}, inplace = True)
#total_price = price_data.groupby(['id'])['total_price'].median().reset_index()
#total_price.rename(columns = {'total_price':'median_total_price'}, inplace = True)

listing_occ_rate_fees = listing_occ_rate.merge(cleaning_fee, on='id')
#print("listing_occ_rate_fees shape: " + str(listing_occ_rate_fees.shape))
listing_occ_rate_fees = listing_occ_rate_fees.merge(service_fee, on='id')
#print("listing_occ_rate_fees shape: " + str(listing_occ_rate_fees.shape))

#listing_occ_rate_fees = listing_occ_rate_fees.merge(total_price, on='id')

median_occ_rate_per_city_guest = listing_occ_rate_fees.groupby(['town', 'guest_no'])['available'].median().reset_index()
median_occ_rate_per_city_guest.rename(columns = {'available':'occupancy_rate'}, inplace = True)
#print("median_occ_rate_per_city_guest shape: " + str(median_occ_rate_per_city_guest.shape))


median_cleaning_fee_per_city_guest = listing_occ_rate_fees.groupby(['town', 'guest_no'])['median_cleaning_fee'].median().reset_index()
#print("median_occ_rate_per_city_guest shape: " + str(median_occ_rate_per_city_guest.shape))

median_service_fee_per_city_guest = listing_occ_rate_fees.groupby(['town', 'guest_no'])['median_service_fee'].median().reset_index()
#print("median_service_fee_per_city_guest shape: " + str(median_service_fee_per_city_guest.shape))


lists_town_price_guest = lists_per_city_guest.merge(median_price_per_city_guest, on = ['town', 'guest_no'])
#print("lists_town_price_guest shape: " + str(lists_town_price_guest.shape))

lists_town_price_guest = lists_town_price_guest.merge(median_cleaning_fee_per_city_guest, on = ['town', 'guest_no'])
#print("lists_town_price_guest shape: " + str(lists_town_price_guest.shape))

lists_town_price_guest = lists_town_price_guest.merge(median_service_fee_per_city_guest, on = ['town', 'guest_no'])
#print("lists_town_price_guest shape: " + str(lists_town_price_guest.shape))

#lists_town_price = lists_town_price.merge(median_total_price_per_city, left_on = 'town', right_on = 'town')
lists_town_price_guest['median_total_price'] = lists_town_price_guest['median_price_per_city'] + lists_town_price_guest['median_cleaning_fee'] + lists_town_price_guest['median_service_fee']
#print("lists_town_price_guest shape: " + str(lists_town_price_guest.shape))

lists_town_price_occ_guest = lists_town_price_guest.merge(median_occ_rate_per_city_guest, on = ['town', 'guest_no'])
#print("lists_town_price_guest shape: " + str(lists_town_price_guest.shape))

city_home_needed_cols = city_home[['city','state_id','population','2022-04-30']]
#print("city_home_needed_cols shape: " + str(city_home_needed_cols.shape))

lists_town_home_price_occ_guest = lists_town_price_occ_guest.merge(city_home_needed_cols, how='inner', left_on = 'town', right_on = 'city')
#print("lists_town_home_price_occ_guest shape: " + str(lists_town_home_price_occ_guest.shape))

# Calculate 30 year mortgage for town
lists_town_home_price_occ_guest['avg_30_yr_mort'] = lists_town_home_price_occ_guest.apply(lambda row: calculate_mortgage(row['2022-04-30'], 5, 30), axis=1)
# Calculate median ROI for town
lists_town_home_price_occ_guest['median_ROI'] = lists_town_home_price_occ_guest.apply(lambda row: calculate_roi(row['median_price_per_city'], row['occupancy_rate'], row['avg_30_yr_mort']), axis=1)


wb_data = lists_town_home_price_occ_guest[lists_town_home_price_occ_guest['town'] == 'wrightsville beach']
#print("wb_data shape: " + str(wb_data.shape))

# Create pivot table for nc towns
lists_town_home_price_occ_guest_nc = lists_town_home_price_occ_guest[(lists_town_home_price_occ_guest['state_id'] == 'NC')]
num_lists_pivot = lists_town_home_price_occ_guest_nc.pivot(index='town', columns='guest_no', values='num_lists_in_town')

In [268]:
import dash
import dash_html_components as html
import plotly.graph_objects as go
import dash_core_components as dcc
import plotly.express as px
from dash.dependencies import Input, Output


app = dash.Dash()

df = px.data.stocks()

towns = combined_data_nc['town'].unique()
options_list, value_for_dropdown = list_options_for_dash(towns)


app.layout = html.Div(id = 'parent', children = [
    html.H1(id = 'H1', children = 'Styling using html components', style = {'textAlign':'center',\
                                            'marginTop':40,'marginBottom':40}),

        dcc.Dropdown( id = 'dropdown',
        options = options_list,
        value = value_for_dropdown),
        dcc.Graph(id = 'bar_plot')
    ])
    
    
@app.callback(Output(component_id='bar_plot', component_property= 'figure'),
              [Input(component_id='dropdown', component_property= 'value')])
def graph_update(dropdown_value):
    print(dropdown_value)
    fig = go.Figure([go.Bar(x = combined_data_nc['guest_no'], y = df['{}'.format(dropdown_value)],\
                     line = dict(color = 'firebrick', width = 4))
                     ])
    
    fig.update_layout(title = 'Stock prices over time',
                      xaxis_title = 'Dates',
                      yaxis_title = 'Prices'
                      )
    return fig  



if __name__ == '__main__': 
    app.run_server()

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:8050 (Press CTRL+C to quit)
127.0.0.1 - - [24/May/2022 01:47:53] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [24/May/2022 01:47:54] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [24/May/2022 01:47:54] "GET /_favicon.ico?v=2.4.1 HTTP/1.1" 200 -
127.0.0.1 - - [24/May/2022 01:47:54] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [24/May/2022 01:47:54] "GET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1" 304 -
127.0.0.1 - - [24/May/2022 01:47:54] "GET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1" 304 -
127.0.0.1 - - [24/May/2022 01:47:54] "GET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1" 304 -


bald head island
Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "c:\Users\mattg\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\core\indexes\base.py", line 3621, in get_loc
    return self._engine.get_loc(casted_key)
  File "pandas\_libs\index.pyx", line 136, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\index.pyx", line 163, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\hashtable_class_helper.pxi", line 5198, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas\_libs\hashtable_class_helper.pxi", line 5206, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'bald head island'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "c:\Users\mattg\AppData\Local\Programs\Python\Python310\lib\site-packages\flask\app.py", line 2077, in wsgi_app
    response = self.full_dispatch_request()
  File "c:\Users\mattg\AppData\

127.0.0.1 - - [24/May/2022 01:47:54] "POST /_dash-update-component HTTP/1.1" 500 -


carolina beach
Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "c:\Users\mattg\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\core\indexes\base.py", line 3621, in get_loc
    return self._engine.get_loc(casted_key)
  File "pandas\_libs\index.pyx", line 136, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\index.pyx", line 163, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\_libs\hashtable_class_helper.pxi", line 5198, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas\_libs\hashtable_class_helper.pxi", line 5206, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'carolina beach'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "c:\Users\mattg\AppData\Local\Programs\Python\Python310\lib\site-packages\flask\app.py", line 2077, in wsgi_app
    response = self.full_dispatch_request()
  File "c:\Users\mattg\AppData\Loca

127.0.0.1 - - [24/May/2022 01:47:58] "POST /_dash-update-component HTTP/1.1" 500 -


: 

: 