In [2]:
import pandas as pd
import numpy as np
from datetime import datetime

In [48]:
def get_booked_listings(prev_listings_path, prev_calendar_path, post_listings_path, post_calendar_path, output_path):
    prev_listings = pd.read_csv(prev_listings_path)
    prev_calendar = pd.read_csv(prev_calendar_path)
    post_listings = pd.read_csv(post_listings_path)
    post_calendar = pd.read_csv(post_calendar_path)
    
    prev_listings = prev_listings.drop(["price", "minimum_nights", "last_review"], axis = 1)
    post_listings = post_listings.drop(["price", "minimum_nights", "last_review"], axis = 1)
    
    prev = prev_listings.merge(prev_calendar, left_on = "id", right_on = "listing_id")
    post = post_listings.merge(post_calendar, left_on = "id", right_on = "listing_id")
    
    prev = prev.drop(["listing_id"], axis = 1)
    post = post.drop(["listing_id"], axis = 1)
    
    merged = prev.merge(post, on = ["id", "date"], suffixes = ["_prev", "_post"])
    booking_changes = merged[(merged.available_prev == 't') & (merged.available_post == 'f')]
    booking_changes.to_csv(output_path, index=False)

In [49]:
get_booked_listings("july_listings.csv", "july_calendar.csv", "aug_listings.csv", "aug_calendar.csv", "boston_jul_aug_bookings.csv")

In [55]:
t = pd.read_csv("raw_boston_data/jul_calendar.csv")

In [64]:
t['date'] = pd.to_datetime(t.date)

In [66]:
t.date

0         2019-07-14
1         2019-07-15
2         2019-07-16
3         2019-07-17
4         2019-07-18
             ...    
2286355   2020-07-08
2286356   2020-07-09
2286357   2020-07-10
2286358   2020-07-11
2286359   2020-07-12
Name: date, Length: 2286360, dtype: datetime64[ns]

In [65]:
t.date <= datetime(2019, 7, 15)

0           True
1           True
2          False
3          False
4          False
           ...  
2286355    False
2286356    False
2286357    False
2286358    False
2286359    False
Name: date, Length: 2286360, dtype: bool

In [2]:
reservations = pd.read_csv("boston_reservations.csv")

In [5]:
reservations.head(20)[["id", "reservation_period", "date"]]

Unnamed: 0,id,reservation_period,date
0,3781,jan_2019_feb_2019,2019-02-28
1,3781,jan_2019_feb_2019,2019-03-01
2,3781,jan_2019_feb_2019,2019-03-02
3,3781,jan_2019_feb_2019,2019-03-03
4,3781,jan_2019_feb_2019,2019-03-04
5,3781,jan_2019_feb_2019,2019-03-05
6,3781,jan_2019_feb_2019,2019-03-06
7,3781,jan_2019_feb_2019,2019-03-07
8,3781,jan_2019_feb_2019,2019-03-08
9,3781,jan_2019_feb_2019,2019-03-09


In [9]:
from datetime import datetime
sum(pd.to_datetime(reservations.date) >= datetime(2020,1,1)) 

185706

In [10]:
reservations.shape[0]

896298

In [3]:
a = pd.read_csv("boston_july_cutoff_data.csv")

In [5]:
a.columns

Index(['index', 'id', 'name_prev', 'host_id_prev', 'host_name_prev',
       'neighbourhood_group_prev', 'neighbourhood_prev', 'latitude_prev',
       'longitude_prev', 'room_type_prev', 'number_of_reviews_prev',
       'reviews_per_month_prev', 'calculated_host_listings_count_prev',
       'availability_365_prev', 'date', 'available_prev', 'price_prev',
       'adjusted_price_prev', 'minimum_nights_prev', 'maximum_nights_prev',
       'name_post', 'host_id_post', 'host_name_post',
       'neighbourhood_group_post', 'neighbourhood_post', 'latitude_post',
       'longitude_post', 'room_type_post', 'number_of_reviews_post',
       'reviews_per_month_post', 'calculated_host_listings_count_post',
       'availability_365_post', 'available_post', 'price_post',
       'adjusted_price_post', 'minimum_nights_post', 'maximum_nights_post',
       'reservation_period'],
      dtype='object')

In [8]:
a.head()[["date", "reservation_period"]]

Unnamed: 0,date,reservation_period
0,2019-02-28,jan_2019_feb_2019
1,2019-03-01,jan_2019_feb_2019
2,2019-03-02,jan_2019_feb_2019
3,2019-03-03,jan_2019_feb_2019
4,2019-03-04,jan_2019_feb_2019


In [7]:
a.head()

Unnamed: 0,index,id,name_prev,host_id_prev,host_name_prev,neighbourhood_group_prev,neighbourhood_prev,latitude_prev,longitude_prev,room_type_prev,...,number_of_reviews_post,reviews_per_month_post,calculated_host_listings_count_post,availability_365_post,available_post,price_post,adjusted_price_post,minimum_nights_post,maximum_nights_post,reservation_period
0,19,3781,HARBORSIDE-Walk to subway,4804,Frank,,East Boston,42.365241,-71.029361,Entire home/apt,...,14,0.32,1,122,f,$125.00,$125.00,30.0,1125.0,jan_2019_feb_2019
1,20,3781,HARBORSIDE-Walk to subway,4804,Frank,,East Boston,42.365241,-71.029361,Entire home/apt,...,14,0.32,1,122,f,$150.00,$150.00,30.0,1125.0,jan_2019_feb_2019
2,21,3781,HARBORSIDE-Walk to subway,4804,Frank,,East Boston,42.365241,-71.029361,Entire home/apt,...,14,0.32,1,122,f,$150.00,$150.00,30.0,1125.0,jan_2019_feb_2019
3,22,3781,HARBORSIDE-Walk to subway,4804,Frank,,East Boston,42.365241,-71.029361,Entire home/apt,...,14,0.32,1,122,f,$125.00,$125.00,30.0,1125.0,jan_2019_feb_2019
4,23,3781,HARBORSIDE-Walk to subway,4804,Frank,,East Boston,42.365241,-71.029361,Entire home/apt,...,14,0.32,1,122,f,$125.00,$125.00,30.0,1125.0,jan_2019_feb_2019
