In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

In [2]:
df = pd.read_csv("./data/calendar.csv", low_memory=False)

In [3]:
len(df.listing_id.unique())

41519

In [4]:
df.head()

Unnamed: 0,listing_id,date,available,price,adjusted_price,minimum_nights,maximum_nights
0,5136,2022-12-04,f,$275.00,$275.00,21.0,1125.0
1,2595,2022-12-05,t,$175.00,$175.00,30.0,1125.0
2,2595,2022-12-06,t,$175.00,$175.00,30.0,1125.0
3,2595,2022-12-07,t,$175.00,$175.00,30.0,1125.0
4,2595,2022-12-08,t,$175.00,$175.00,30.0,1125.0


In [5]:
df['date'] = pd.to_datetime(df['date'])


In [6]:
available_listings = df[df['available'] == 't']


In [9]:
available_days = available_listings.groupby('listing_id')['date'].nunique().reset_index()

In [11]:
available_days.rename(columns={'date': 'available_days'}, inplace=True)

In [7]:
available_days.head()

Unnamed: 0,listing_id,available_days
0,2595,365
1,5121,322
2,5136,267
3,5178,79
4,5803,163


In [12]:
total_days = df.groupby('listing_id')['date'].nunique().reset_index()
total_days.rename(columns={'date': 'total_days'}, inplace=True)

In [16]:
occupancy = pd.merge(total_days, available_days, on='listing_id')

In [19]:
occupancy['occupancy_rate'] = ((occupancy['total_days'] - occupancy['available_days']) / occupancy['total_days']) * 100

In [46]:
df = pd.read_csv("./data/listings_eda", low_memory=False)

In [47]:
df.columns

Index(['id', 'name', 'borough', 'neighbourhood', 'room_type_encoded', 'price',
       'bathrooms', 'accommodates', 'bedrooms', 'beds', 'minimum_nights',
       'availability_365'],
      dtype='object')

In [48]:
df_avg_price = df.groupby(['neighbourhood', 'beds'])['price'].mean().reset_index()

df2 = pd.merge(df, df_avg_price, on=['neighbourhood', 'beds'], how='left')
df2.rename(columns={'price_y': 'price_naive', 'price_x':'price'}, inplace=True)


In [49]:
df2.head()

Unnamed: 0,id,name,borough,neighbourhood,room_type_encoded,price,bathrooms,accommodates,bedrooms,beds,minimum_nights,availability_365,price_naive
0,5136,"Spacious Brooklyn Duplex, Patio + Garden",Brooklyn,Sunset Park,0,275,1.0,4,2.0,2.0,21,267,161.134328
1,5203,Cozy Clean Guest Room - Family Apt,Manhattan,Upper West Side,1,75,1.0,1,1.0,1.0,2,0,162.774818
2,5121,BlissArtsSpace!,Brooklyn,Bedford-Stuyvesant,1,60,1.0,2,1.0,1.0,30,322,95.148779
3,5178,Large Furnished Room Near B'way,Manhattan,Midtown,1,68,1.0,2,1.0,1.0,2,79,303.371162
4,6872,Uptown Sanctuary w/ Private Bath (Month to Month),Manhattan,East Harlem,1,65,1.0,1,1.0,1.0,30,300,112.69379


In [50]:
merged_data = pd.merge(occupancy[['occupancy_rate','listing_id']], df2, left_on='listing_id', right_on='id')


In [51]:
merged_data.shape

(25105, 15)

In [52]:
merged_data.head()

Unnamed: 0,occupancy_rate,listing_id,id,name,borough,neighbourhood,room_type_encoded,price,bathrooms,accommodates,bedrooms,beds,minimum_nights,availability_365,price_naive
0,11.780822,5121,5121,BlissArtsSpace!,Brooklyn,Bedford-Stuyvesant,1,60,1.0,2,1.0,1.0,30,322,95.148779
1,26.849315,5136,5136,"Spacious Brooklyn Duplex, Patio + Garden",Brooklyn,Sunset Park,0,275,1.0,4,2.0,2.0,21,267,161.134328
2,78.356164,5178,5178,Large Furnished Room Near B'way,Manhattan,Midtown,1,68,1.0,2,1.0,1.0,2,79,303.371162
3,55.342466,5803,5803,"Lovely Room 1 in BEST AREA; Legal Rental, Spot...",Brooklyn,South Slope,1,124,1.0,2,1.0,1.0,4,163,131.292683
4,17.808219,6872,6872,Uptown Sanctuary w/ Private Bath (Month to Month),Manhattan,East Harlem,1,65,1.0,1,1.0,1.0,30,300,112.69379


In [53]:
merged_data.columns

Index(['occupancy_rate', 'listing_id', 'id', 'name', 'borough',
       'neighbourhood', 'room_type_encoded', 'price', 'bathrooms',
       'accommodates', 'bedrooms', 'beds', 'minimum_nights',
       'availability_365', 'price_naive'],
      dtype='object')

In [54]:
merged_data = merged_data.drop(['listing_id','id','neighbourhood','borough', 'name'], axis=1)

In [55]:
merged_data.head()

Unnamed: 0,occupancy_rate,name,room_type_encoded,price,bathrooms,accommodates,bedrooms,beds,minimum_nights,availability_365,price_naive
0,11.780822,BlissArtsSpace!,1,60,1.0,2,1.0,1.0,30,322,95.148779
1,26.849315,"Spacious Brooklyn Duplex, Patio + Garden",0,275,1.0,4,2.0,2.0,21,267,161.134328
2,78.356164,Large Furnished Room Near B'way,1,68,1.0,2,1.0,1.0,2,79,303.371162
3,55.342466,"Lovely Room 1 in BEST AREA; Legal Rental, Spot...",1,124,1.0,2,1.0,1.0,4,163,131.292683
4,17.808219,Uptown Sanctuary w/ Private Bath (Month to Month),1,65,1.0,1,1.0,1.0,30,300,112.69379
