<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Scraping-Analysis" data-toc-modified-id="Scraping-Analysis-1">Scraping Analysis</a></span><ul class="toc-item"><li><span><a href="#Imports" data-toc-modified-id="Imports-1.1">Imports</a></span></li><li><span><a href="#Read-Data" data-toc-modified-id="Read-Data-1.2">Read Data</a></span></li><li><span><a href="#Filters" data-toc-modified-id="Filters-1.3">Filters</a></span></li><li><span><a href="#Create-Map" data-toc-modified-id="Create-Map-1.4">Create Map</a></span><ul class="toc-item"><li><span><a href="#Generate-Info-Boxes" data-toc-modified-id="Generate-Info-Boxes-1.4.1">Generate Info Boxes</a></span></li><li><span><a href="#Generate-Figure" data-toc-modified-id="Generate-Figure-1.4.2">Generate Figure</a></span></li></ul></li></ul></li></ul></div>

# Scraping Analysis

## Imports

In [1]:
from preparing_scraped_data import df, isNaN
import locale
import gmaps
import math
import pandas as pd
import ipywidgets as widgets
from IPython.display import display

locale.setlocale(locale.LC_ALL, "en_US.UTF-8")

'en_US.UTF-8'

## Read Data

In [2]:
display(df.head())

Unnamed: 0.1,Unnamed: 0,_type,avg_daily_svg,avg_occupancy_svg,avg_revenue_svg,city,current_active_listings,duplicate_error,entire_home,max_monthly_revenue_last_12_months,...,url,zip_meta_rates,city_c,state_city,latitude,longitude,state,avg_monthly_revenue,avg_nightly_revenue,avg_monthly_occupancy
0,0,dict,"M0,NaNC0,NaN,9.916666666666666,NaN,9.916666666...","M0,NaNC0,NaN,10.583333333333332,NaN,10.5833333...","M0,NaNC0,NaN,8.25,NaN,8.25,NaNC8.25,NaN,16.5,N...","Wrightstown,Search neighborhoods",0.0,,,,...,https://www.airdna.co/vacation-rental-data/app...,,Wrightstown,new-jersey/wrightstown,40.06,-74.59,NJ,,,
1,2,dict,"M0,NaNC0,NaN,9.916666666666666,NaN,9.916666666...","M0,NaNC0,NaN,10.583333333333332,NaN,10.5833333...","M0,NaNC0,NaN,8.25,NaN,8.25,NaNC8.25,NaN,16.5,N...","Woodbury Heights,Search neighborhoods",0.0,,,,...,https://www.airdna.co/vacation-rental-data/app...,,Woodbury Heights,new-jersey/woodbury-heights,39.81,-75.15,NJ,,,
2,3,dict,"M0,40L49.5,0","M0,0L71,40","M0,0L49.5,40","Wyckoff,Search neighborhoods",7.0,,190.571429,25000.0,...,https://www.airdna.co/vacation-rental-data/app...,,Wyckoff,new-jersey/wyckoff,40.99,-74.16,NJ,,,
3,4,dict,"M0,0C0,0,6.28662047792926,39.819416484184465,9...","M0,20.50478677110527C0,20.50478677110527,6.878...","M0,25.907692307692308C0,25.907692307692308,5.8...","Woodbridge,Search neighborhoods",23.0,,174.0,2563.0,...,https://www.airdna.co/vacation-rental-data/app...,,Woodbridge,new-jersey/woodbridge,40.55,-74.28,NJ,1379.55,120.26,75.3
4,6,dict,"M0,40C0,40,7.212121212121213,40,10.81818181818...","M0,40C0,40,6.763652180994598,21.56916360518929...","M0,40C0,40,4.700159727382945,18.56768347140979...","Woodstown,Search neighborhoods",2.0,,667.0,3730.0,...,https://www.airdna.co/vacation-rental-data/app...,,Woodstown,new-jersey/woodstown,39.65,-75.32,NJ,2091.0,171.13,63.79


## Filters

In [3]:
# filter scraped by has city and listings
def filter_by_has_city(df):
    df = df[df["city_c"].str.len() > 0]
    return df


def filter_by_has_listings(df, my_min, my_max):
    df = df[(df["current_active_listings"] > my_min)
            & (df["current_active_listings"] < my_max)]
    return df


def filter_by_has_revenue(df, my_min, my_max):
    revenue = ["" if isNaN(i) else i for i in df["avg_monthly_revenue"]]
    revenue = [0 if i == "" else i for i in revenue]
    revenue = [True if i > my_min and i < my_max else False for i in revenue]
    df = df[revenue]
    return df


# add filters
df = filter_by_has_city(df)
df = filter_by_has_listings(df, 0, math.inf)
df = filter_by_has_revenue(df, 0, math.inf)

## Create Map

### Generate Info Boxes

In [4]:
def get_info_boxes(df):
    label_dict = df[[
        "avg_monthly_revenue", "current_active_listings", "city_c"
    ]].to_dict(orient="records")

    info_box_template = """
    <dl>
    <dt>City</dt><dd>{city_c}</dd>
    <dt>Number of Listings</dt><dd>{current_active_listings}</dd>
    <dt>Avg Monthly Revenue (Last 12 Months)</dt><dd>{avg_monthly_revenue}</dd>
    </dl>
    """

    info_boxes = [
        info_box_template.format(**listing) for listing in label_dict
    ]
    return info_boxes

### Generate Figure

In [5]:
gmaps.configure(api_key="AIzaSyDbgbhAOz9od_Lz5dMZGv8fD81MJcMLo_s")


# heatmap active listings as weight
def generate_heat(locations, weight, max_intensity):
    nj = (39.833851, -74.871826)
    fig = gmaps.figure(center=nj, zoom_level=5)
    heat_layer = gmaps.heatmap_layer(locations,
                                     weights=weight,
                                     max_intensity=max_intensity)
    fig.add_layer(heat_layer)
    display(fig)


def generate_marker(locations, info_boxes):
    nj = (39.833851, -74.871826)
    fig = gmaps.figure(center=nj, zoom_level=5)
    current_active_layer = gmaps.symbol_layer(
        locations,
        info_box_content=info_boxes,
        fill_color="blue",
        stroke_color="blue",
        scale=2,
    )

    fig.add_layer(current_active_layer)
    display(fig)


def filter_widget(df, min_monthly, max_monthly, min_listing, max_listing):
    df = filter_by_has_revenue(df, min_monthly, max_monthly)
    df = filter_by_has_listings(df, min_listing, max_listing)
    locations = df[['latitude', 'longitude']]
    info_boxes = get_info_boxes(df)
    generate_marker(locations, info_boxes)





'Filter map by min and max average monthly revenue and number of listings'

interactive(children=(FloatText(value=1000.0, description='min_monthly'), FloatText(value=10000.0, description…

<function __main__.filter_widget(df, min_monthly, max_monthly, min_listing, max_listing)>

'generate list of places sorted by selected column'

interactive(children=(Dropdown(description='sorted_by', options=('current_active_listings', 'avg_monthly_reven…

<function __main__.list_widget(df, sorted_by)>