# Exploring the Data Available from Mashvisor API

In [1]:
import os
from dotenv import load_dotenv
import requests
import json
import matplotlib.pyplot as plt
import pandas as pd

## Mashvisor

In [2]:
# Authentication
# Define Authentication Headers
# --> Referenced Throughout this Notebook

# Load API Keys
load_dotenv()
X_RAPID_API_KEY = os.getenv("X_RAPID_API_KEY")
X_RAPID_API_HOST_MASHVISOR = os.getenv("X_RAPID_API_HOST_MASHVISOR")

# Headers include Authentication Keys

headers = {
    'x-rapidapi-key': X_RAPID_API_KEY,
    'x-rapidapi-host': X_RAPID_API_HOST_MASHVISOR
    }

## Short Term Rentals

### Get Listings
#### List all active short term rentals - Airbnb listings - for a specific location: city, zip code, or a neighborhood

In [29]:
# SHORT TERM RENTALS
# Build Query URL - Get Listings
# Free Limit of 4 Rows

url_beginning = "https://mashvisor-api.p.rapidapi.com"
url_ending = "/airbnb-property/active-listings"

url = url_beginning + url_ending

querystring = {
    "state": "TN",
    "zip_code": "37738"
               }

# Request

response = requests.request("GET", url, headers=headers, params=querystring)
json_response = response.json()

# Build Dataframe

df = pd.DataFrame(json_response['content']['properties'])

print("Shape:", df.shape)
print("Columns:", df.columns)
df.head()

('Shape:', (4, 36))
('Columns:', Index([u'address', u'airbnb_city', u'airbnb_neighborhood',
       u'airbnb_neighborhood_id', u'amenities', u'capacity_of_people',
       u'cleaning_fee_native', u'created_at', u'id', u'image', u'lat', u'lon',
       u'monthly_price', u'name', u'night_price', u'night_priceـnative',
       u'nights_booked', u'num_of_baths', u'num_of_beds', u'num_of_rooms',
       u'occupancy', u'property_id', u'property_type', u'rental_income',
       u'reviews', u'reviews_count', u'room_type', u'room_type_category',
       u'source', u'start_rating', u'state', u'status', u'updated_at', u'url',
       u'weekly_price', u'zip'],
      dtype='object'))


Unnamed: 0,address,airbnb_city,airbnb_neighborhood,airbnb_neighborhood_id,amenities,capacity_of_people,cleaning_fee_native,created_at,id,image,...,room_type,room_type_category,source,start_rating,state,status,updated_at,url,weekly_price,zip
0,"Gatlinburg, TN, United States",Gatlinburg,,18258,,14,190,2021-01-05T16:05:00.000Z,25506065,https://a0.muscache.com/im/pictures/fec13df4-7...,...,Entire home/apt,entire_home,Airbnb,5,TN,ACTIVE,2021-01-05T16:05:00.000Z,,0,37738
1,"Gatlinburg, TN, United States",Gatlinburg,,18258,,6,85,2021-01-05T16:05:51.000Z,25506217,https://a0.muscache.com/im/pictures/83d35763-4...,...,Entire home/apt,entire_home,Airbnb,5,TN,ACTIVE,2021-01-05T16:05:51.000Z,,0,37738
2,"Gatlinburg, TN, United States",Gatlinburg,,18258,,6,90,2021-01-05T16:05:19.000Z,25506122,https://a0.muscache.com/im/pictures/50b50423-3...,...,Entire home/apt,entire_home,Airbnb,5,TN,ACTIVE,2021-01-05T16:05:19.000Z,,0,37738
3,"Gatlinburg, TN, United States",Gatlinburg,,18258,,16,325,2021-01-05T16:05:06.000Z,25506080,https://a0.muscache.com/im/pictures/9e0164fb-c...,...,Entire home/apt,entire_home,Airbnb,5,TN,ACTIVE,2021-01-05T16:05:06.000Z,,0,37738


In [30]:
df['night_price']

0    384
1    155
2    202
3    854
Name: night_price, dtype: int64

In [31]:
df['capacity_of_people']

0    14
1     6
2     6
3    16
Name: capacity_of_people, dtype: int64

In [32]:
df[['capacity_of_people', 'night_price']]

Unnamed: 0,capacity_of_people,night_price
0,14,384
1,6,155
2,6,202
3,16,854


- Why only 4 Rows? -> Data Limitation or Account Limitation?

In [5]:
# TN ApiBnB Neighborhood ID
tn_nb_id = df['airbnb_neighborhood_id'][0]
tn_nb_id

18258

In [6]:
# Check for Cloud9
df['name'].str.contains('judy')

0    False
1    False
2    False
3    False
Name: name, dtype: bool

### Get Market Summary
#### Get a summary an overview for a specific Airbnb market location: city, zip code, or a neighborhood

In [7]:
# SHORT TERM RENTALS
# Build Query URL - Market Summary

url_beginning = "https://mashvisor-api.p.rapidapi.com"
url_ending = "/airbnb-property/market-summary"

url = url_beginning + url_ending

querystring = {
    "state": "TN",
    "zip_code": "37738"
               }

# Request

response = requests.request("GET", url, headers=headers, params=querystring)
json_response = response.json()

content = json_response['content']

# Seems Like this is a Series of Histograms
# that describe a market
print('Availale Histograms for Gberg TN:')
list(json_response['content'].keys())

Availale Histograms for Gberg TN:


[u'occupancy_histogram',
 u'listings_count',
 u'listings_ids',
 u'night_price_histogram',
 u'rental_income_histogram',
 u'property_types']

In [8]:
# plt.hist(content['property_types']['histogram'])

**TODO:** Find n datapoints for each histogram.
- How is his count determined.
- What qualifies an example to be an observation in the histo?
    - Is it just all the data they have for a given market?

### Get Occupancy Rates
#### For each Airbnb listing, we calculate its occupancy rate, month per month, and an annual rate, and we offer our clients a 12-month historical performance for the occupancy rates. Market occupancy rates for a zip code or a neighborhood.

In [9]:
# SHORT TERM RENTALS
# Build Query URL - Get Airbnb Occupancy Rates

url_beginning = "https://mashvisor-api.p.rapidapi.com"
url_ending = "/airbnb-property/occupancy-rates"

url = url_beginning + url_ending

querystring = {
    "state": "TN",
    "zip_code": "37738"
               }

# Request

response = requests.request("GET", url, headers=headers, params=querystring)
json_response = response.json()

content = json_response['content']

# Seems Like this is a Series of Histograms
# that display Occupany Rates against Bedroom Count
print("TODO: Look at Other Keys")
list(json_response['content'].keys())

TODO: Look at Other Keys


[u'occupancy_rates', u'detailed', u'sample_count']

In [10]:
# For each Airbnb listing, we calculate its occupancy rate, month per month, 
# and an annual rate, and we offer our clients a 12-month historical performance for 
# the occupancy rates. Market occupancy rates for a zip code or a neighborhood.
plt.hist(content['detailed']['two_bedrooms_histogram'])

(array([21.,  5.,  7.,  8.,  2.,  5.,  3.,  2.,  1.,  4.]),
 array([ 1. , 10.3, 19.6, 28.9, 38.2, 47.5, 56.8, 66.1, 75.4, 84.7, 94. ]),
 <a list of 10 Patch objects>)

- Again, find sizes of histo data.

### Get Airbnb Super Hosts
#### Obtain a list of all Airbnb market super hosts for a zip code or a city.

In [11]:
# SHORT TERM RENTALS
# Build Query URL - Get Airbnb Super Hosts

url_beginning = "https://mashvisor-api.p.rapidapi.com"
url_ending = "/airbnb-property/super-hosts"

url = url_beginning + url_ending

# Tennessee 
querystring = {
    "state": "TN",
    "zip_code": "37738"
               }

# Request

response = requests.request("GET", url, headers=headers, params=querystring)
json_response = response.json()

content = json_response['content']['super_hosts']

# Build Dataframe

df = pd.DataFrame(content)

print("Shape:", df.shape)
print("Columns:", df.columns)
df.head()

('Shape:', (10, 40))
('Columns:', Index([u'about', u'acceptance_rate', u'all_active_phone_numbers',
       u'created_at', u'first_name', u'friends_count', u'guidebooks_count',
       u'has_available_payout_info', u'has_profile_pic', u'id',
       u'identity_mt_verified', u'identity_v2_verified', u'identity_verified',
       u'is_generated_user', u'is_marketplace_cohost', u'is_superhost',
       u'is_trip_host', u'languages', u'listings_count', u'location',
       u'neighborhood', u'picture_large_url', u'picture_url',
       u'recent_recommendation', u'recent_review', u'recommendation_count',
       u'response_rate', u'response_time', u'reviewee_count', u'school',
       u'show_travel_for_work', u'signup_method', u'smart_name',
       u'thumbnail_medium_url', u'thumbnail_url', u'total_listings_count',
       u'user_flag', u'verification_labels', u'verifications', u'work'],
      dtype='object'))


Unnamed: 0,about,acceptance_rate,all_active_phone_numbers,created_at,first_name,friends_count,guidebooks_count,has_available_payout_info,has_profile_pic,id,...,show_travel_for_work,signup_method,smart_name,thumbnail_medium_url,thumbnail_url,total_listings_count,user_flag,verification_labels,verifications,work
0,Please feel free to ask any questions.,,[],2016-02-21T04:44:06Z,Michael,0,0.0,True,True,59668096,...,True,0,Michael,https://a0.muscache.com/im/pictures/user/84e48...,https://a0.muscache.com/im/pictures/user/84e48...,1,,"[Email address, Phone number, Reviewed, Govern...","[email, phone, reviews, offline_government_id,...",
1,,,[],2015-01-16T01:16:29Z,Vivian,0,1.0,True,True,26179760,...,False,0,Vivian,https://a0.muscache.com/im/pictures/user/a2ad9...,https://a0.muscache.com/im/pictures/user/a2ad9...,4,,"[Email address, Phone number, Reviewed, Govern...","[email, phone, reviews, jumio, government_id]",
2,,,,2016-08-09T20:23:14Z,Darron,0,,True,True,88972889,...,,1,Darron,https://a2.muscache.com/im/pictures/29dd07d2-8...,https://a2.muscache.com/im/pictures/29dd07d2-8...,2,,"[Email address, Phone number, Facebook, Review...","[email, phone, facebook, reviews, jumio, gover...",
3,"Splitting time between Austin, Texas and Tenne...",,[],2014-03-04T13:41:09Z,CP + Rachel,0,,True,True,12789161,...,False,0,CP + Rachel,https://a0.muscache.com/im/pictures/user/64b34...,https://a0.muscache.com/im/pictures/user/64b34...,15,,"[Email address, Phone number, Reviewed, Govern...","[email, phone, reviews, jumio, offline_governm...",
4,,0%,,2014-03-23T01:33:56Z,Charles,0,,,True,13423160,...,,0,,https://a2.muscache.com/im/users/13423160/prof...,https://a2.muscache.com/im/users/13423160/prof...,4,,"[Email address, Phone number, Google, LinkedIn...","[email, phone, google, linkedin, reviews, kba]",


- What qualifies someone for this?
- Why can't I find Cloud9 Cabins?
- How can I search and find these users on AirBnB?

In [12]:
# Find Rows with Least Nans
df.isna().sum(axis=1)

0     3
1     3
2     7
3     3
4    12
5    12
6     4
7     4
8     3
9     3
dtype: int64

In [13]:
df.iloc[1]

about                                                                         
acceptance_rate                                                            N/A
all_active_phone_numbers                                                    []
created_at                                                2015-01-16T01:16:29Z
first_name                                                              Vivian
friends_count                                                                0
guidebooks_count                                                             1
has_available_payout_info                                                 True
has_profile_pic                                                           True
id                                                                    26179760
identity_mt_verified                                                      True
identity_v2_verified                                                      True
identity_verified                                   

### Get Neighborhood Historical Performance
#### Get an Airbnb submarket (neighborhood) short term historical performance for its listings as an array
? -> Does this end point belong in the Short Term Rental's Category? <- ?

In [14]:
# SHORT TERM RENTALS
# Build Query URL - Get Airbnb Historical Performance

url_beginning = "https://mashvisor-api.p.rapidapi.com"
url_ending = "/neighborhood/" + str(tn_nb_id) + "/airbnb/details"

url = url_beginning + url_ending

# Tennessee 
querystring = {
    "state": "TN",
    "id": tn_nb_id
               }

# Request

response = requests.request("GET", url, headers=headers, params=querystring)
json_response = response.json()

content = json_response['content']['properties']

# Build Dataframe

df = pd.DataFrame(content)

print("Shape:", df.shape)
print("Columns:", df.columns)
df.head()

('Shape:', (3, 41))
('Columns:', Index([u'address', u'airbnbCity', u'airbnbNeighborhood',
       u'airbnbNeighborhoodId', u'airbnbZIP', u'amenities',
       u'capacityOfPeople', u'cleaningFeeNative', u'createdAt', u'id',
       u'image', u'lat', u'lon', u'monthlyPrice', u'name', u'neighborhood',
       u'nightPrice', u'nightPriceـnative', u'nightRate', u'nightsBooked',
       u'numOfBaths', u'numOfBeds', u'numOfRooms', u'occupancy', u'propertyId',
       u'propertyType', u'property_id', u'rentalIncome', u'rental_income',
       u'reviews', u'reviewsCount', u'roomType', u'roomTypeCategory',
       u'source', u'startRating', u'state', u'status', u'updatedAt', u'url',
       u'weeklyPrice', u'zip'],
      dtype='object'))


Unnamed: 0,address,airbnbCity,airbnbNeighborhood,airbnbNeighborhoodId,airbnbZIP,amenities,capacityOfPeople,cleaningFeeNative,createdAt,id,...,roomType,roomTypeCategory,source,startRating,state,status,updatedAt,url,weeklyPrice,zip
0,"Gatlinburg, TN, United States",Gatlinburg,,18258,37738,,14,190,2021-01-05T16:05:00.000Z,25506065,...,Entire home/apt,entire_home,Airbnb,5,TN,ACTIVE,2021-01-05T16:05:00.000Z,,0,37738
1,"Gatlinburg, TN, United States",Gatlinburg,,18258,37738,,6,85,2021-01-05T16:05:51.000Z,25506217,...,Entire home/apt,entire_home,Airbnb,5,TN,ACTIVE,2021-01-05T16:05:51.000Z,,0,37738
2,"Gatlinburg, TN, United States",Gatlinburg,,18258,37738,,6,90,2021-01-05T16:05:19.000Z,25506122,...,Entire home/apt,entire_home,Airbnb,5,TN,ACTIVE,2021-01-05T16:05:19.000Z,,0,37738


- What is the limit on _this_ data?

### Get Airbnb Newly Listed Homes
#### List all Airbnb homes that are recently listed for a specific location: city, or a zip code.

In [15]:
# SHORT TERM RENTALS
# Build Query URL - Get Airbnb Newly Listed Homes

url_beginning = "https://mashvisor-api.p.rapidapi.com"
url_ending = "/airbnb-property/newly-listed"

url = url_beginning + url_ending

# Tennessee 
querystring = {
    "state": "TN",
    "zip_code": "37738"
               }

# Request

response = requests.request("GET", url, headers=headers, params=querystring)
json_response = response.json()

content = json_response['content']['list']

# Build Dataframe

df = pd.DataFrame(content)

print("Shape:", df.shape)
print("Columns:", df.columns)
df.head()

('Shape:', (10, 104))
('Columns:', Index([u'access', u'address', u'airbnb_id', u'amenities', u'bathrooms',
       u'bed_type', u'bed_type_category', u'bedrooms', u'beds',
       u'cancel_policy',
       ...
       u'thumbnail_url', u'time_zone_name', u'toto_opt_in', u'transit',
       u'user_id', u'weekly_price_factor', u'weekly_price_native',
       u'wireless_info', u'xl_picture_url', u'zipcode'],
      dtype='object', length=104))


Unnamed: 0,access,address,airbnb_id,amenities,bathrooms,bed_type,bed_type_category,bedrooms,beds,cancel_policy,...,thumbnail_url,time_zone_name,toto_opt_in,transit,user_id,weekly_price_factor,weekly_price_native,wireless_info,xl_picture_url,zipcode
0,,"Gatlinburg, TN, United States",47433118,"[TV, Wifi, Air conditioning, Pool, Kitchen, Fr...",3.0,Real Bed,real_bed,3,3,3,...,https://a0.muscache.com/im/pictures/e67ab20c-d...,America/New_York,,,362047291,,,,https://a0.muscache.com/im/pictures/e67ab20c-d...,37738
1,,"Gatlinburg, TN, United States",45398081,"[TV, Cable TV, Wifi, Air conditioning, Kitchen...",4.0,Real Bed,real_bed,5,5,4,...,https://a0.muscache.com/im/pictures/a7ab732f-c...,America/New_York,,,58616802,,,,https://a0.muscache.com/im/pictures/a7ab732f-c...,37738
2,,"Gatlinburg, TN, United States",45487662,"[TV, Cable TV, Wifi, Air conditioning, Pool, K...",2.0,Real Bed,real_bed,2,2,6,...,https://a0.muscache.com/im/pictures/e678f00e-8...,America/New_York,,,368084741,,,,https://a0.muscache.com/im/pictures/e678f00e-8...,37738
3,,"Gatlinburg, TN, United States",45487646,"[TV, Cable TV, Wifi, Air conditioning, Pool, K...",2.0,Real Bed,real_bed,2,2,6,...,https://a0.muscache.com/im/pictures/cee54fab-9...,America/New_York,,,368084741,,,,https://a0.muscache.com/im/pictures/cee54fab-9...,37738
4,,"Gatlinburg, TN, United States",45487648,"[TV, Cable TV, Wifi, Air conditioning, Pool, K...",3.0,Real Bed,real_bed,3,4,6,...,https://a0.muscache.com/im/pictures/35d1b1cc-0...,America/New_York,,,368084741,,,,https://a0.muscache.com/im/pictures/35d1b1cc-0...,37738


- What is the limit on _this_ data?

## Trends

### Get Top Airbnb Cities
#### Top Airbnb Cities, this endpoint retrieves the cities has the highest occupancy rates with their total Airbnb active listings in a specific state.

In [16]:
# TRENDS
# Build Query URL - Airbnb Top Cities

url_beginning = "https://mashvisor-api.p.rapidapi.com"
url_ending = "/trends/cities"

url = url_beginning + url_ending

querystring = {
    "state": "TN"
               }

# Request

response = requests.request("GET", url, headers=headers, params=querystring)
json_response = response.json()
content = json_response['content']['cities']

df = pd.DataFrame(content)

print("Shape:", df.shape)
print("Columns:", df.columns)
df.head()

('Shape:', (5, 5))
('Columns:', Index([u'city', u'occ_listing', u'occupancy', u'state', u'total_listing'], dtype='object'))


Unnamed: 0,city,occ_listing,occupancy,state,total_listing
0,Nashville,258682.660898,49.823317,TN,5192
1,Chattanooga,111890.991495,66.522587,TN,1682
2,Knoxville,28231.181569,68.689006,TN,411
3,Memphis,24808.749609,61.560173,TN,403
4,Sugar Mountain,23804.9934,59.9622,TN,397


- What does OOC stand for?

## Rental Rates

### Get AirBnb Rental Rates
#### The endpoint retrieves rental income rates for Airbnb or traditional way for a city, zip code, or a neighborhood, you'll be able to fetch Airbnb rental rates - short term rentals, or long term rentals, calculated based on the location Airbnb occupancy rates

In [17]:
# RENTAL RATES
# Build Query URL - Get AirBnb Rental Rates

url_beginning = "https://mashvisor-api.p.rapidapi.com"
url_ending = "/rental-rates"

url = url_beginning + url_ending

querystring = {
    "source": 'airbnb',
    "state": "TN",
    "zip_code": "37738"
               }

# Request

response = requests.request("GET", url, headers=headers, params=querystring)
json_response = response.json()

# Build Dataframe

df = pd.DataFrame(json_response['content']['detailed'])

print("Shape:", df.shape)
print("Columns:", df.columns)
df.head()

('Shape:', (12, 13))
('Columns:', Index([u'adjusted_rental_income', u'avg', u'beds', u'city', u'count', u'max',
       u'median', u'median_night_rate', u'median_occupancy', u'min',
       u'neighborhood', u'state', u'zipcode'],
      dtype='object'))


Unnamed: 0,adjusted_rental_income,avg,beds,city,count,max,median,median_night_rate,median_occupancy,min,neighborhood,state,zipcode
0,2701.101389,2638.833333,0,,6,4071,2517.0,133.0,62.0,1842,,TN,37738
1,2068.941667,2231.7,1,,20,5539,2346.0,124.0,51.0,455,,TN,37738
2,1190.052083,1471.482759,2,,58,4777,1215.0,155.0,25.5,81,,TN,37738
3,1903.475,2368.195122,3,,41,10099,1772.0,242.0,27.0,0,,TN,37738
4,2615.985417,4376.125,4,,24,12136,3011.5,390.5,24.0,90,,TN,37738


- Where does income data come from?

## Investment Analysis

### Get Airbnb Comparable Listings
#### This endpoint retrieves the Airbnb neighborhood's listing data set in Mashvisor database with similarity and distance regarding the target MLS property.

? -> Wht is the Target MLS Property <- ?

In [18]:
# INVESTMENT ANALYSIS
# Build Query URL - Get Airbnb Comparable Listings

url_beginning = "https://mashvisor-api.p.rapidapi.com"
url_ending = "/neighborhood/" + str(tn_nb_id) + "/airbnb/details"

url = url_beginning + url_ending

# Tennessee 
querystring = {
    "state": "TN",
    "id": tn_nb_id
               }

# Request

response = requests.request("GET", url, headers=headers, params=querystring)
json_response = response.json()

content = json_response['content']['properties']

# Build Dataframe

df = pd.DataFrame(content)

print("Shape:", df.shape)
print("Columns:", df.columns)
df.head()

('Shape:', (3, 41))
('Columns:', Index([u'address', u'airbnbCity', u'airbnbNeighborhood',
       u'airbnbNeighborhoodId', u'airbnbZIP', u'amenities',
       u'capacityOfPeople', u'cleaningFeeNative', u'createdAt', u'id',
       u'image', u'lat', u'lon', u'monthlyPrice', u'name', u'neighborhood',
       u'nightPrice', u'nightPriceـnative', u'nightRate', u'nightsBooked',
       u'numOfBaths', u'numOfBeds', u'numOfRooms', u'occupancy', u'propertyId',
       u'propertyType', u'property_id', u'rentalIncome', u'rental_income',
       u'reviews', u'reviewsCount', u'roomType', u'roomTypeCategory',
       u'source', u'startRating', u'state', u'status', u'updatedAt', u'url',
       u'weeklyPrice', u'zip'],
      dtype='object'))


Unnamed: 0,address,airbnbCity,airbnbNeighborhood,airbnbNeighborhoodId,airbnbZIP,amenities,capacityOfPeople,cleaningFeeNative,createdAt,id,...,roomType,roomTypeCategory,source,startRating,state,status,updatedAt,url,weeklyPrice,zip
0,"Gatlinburg, TN, United States",Gatlinburg,,18258,37738,,14,190,2021-01-05T16:05:00.000Z,25506065,...,Entire home/apt,entire_home,Airbnb,5,TN,ACTIVE,2021-01-05T16:05:00.000Z,,0,37738
1,"Gatlinburg, TN, United States",Gatlinburg,,18258,37738,,6,85,2021-01-05T16:05:51.000Z,25506217,...,Entire home/apt,entire_home,Airbnb,5,TN,ACTIVE,2021-01-05T16:05:51.000Z,,0,37738
2,"Gatlinburg, TN, United States",Gatlinburg,,18258,37738,,6,90,2021-01-05T16:05:19.000Z,25506122,...,Entire home/apt,entire_home,Airbnb,5,TN,ACTIVE,2021-01-05T16:05:19.000Z,,0,37738


- ? -> Wht is the Target MLS Property <- ?

In [19]:
json_response['content']['num_of_properties']

219

In [20]:
json_response['content']['avg_occupancy']

41.5114