In [1]:
# Impport dependencies
import pandas as pd
import numpy as np

# Turn off warning messages
import warnings
warnings.filterwarnings("ignore")

In [2]:
# Import CSV datasets and read through them
calendar = pd.read_csv('./Resources/calendar.csv')
listings_cleaned = pd.read_csv('./Resources/listings_cleaned.csv')

In [3]:
# Display calendar DataFrame
calendar.head()

Unnamed: 0,listing_id,date,available,price
0,241032,2016-01-04,t,$85.00
1,241032,2016-01-05,t,$85.00
2,241032,2016-01-06,f,
3,241032,2016-01-07,f,
4,241032,2016-01-08,f,


In [4]:
# Display listings_cleaned DataFrame
listings_cleaned.head(2)

Unnamed: 0,id,name,host_id,street,neighbourhood,zipcode,property_type,room_type,accommodates,bathrooms,...,cleaning_fee,guests_included,extra_people,minimum_nights,maximum_nights,instant_bookable,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,avg_availability
0,241032,Stylish Queen Anne Apartment,956883,"Gilman Dr W, Seattle, WA 98119, United States",Queen Anne,98119,Apartment,Entire home/apt,4,1.0,...,-1.0,2,5.0,1,365,True,moderate,True,True,0.721709
1,953595,Bright & Airy Queen Anne Apartment,5177328,"7th Avenue West, Seattle, WA 98119, United States",Queen Anne,98119,Apartment,Entire home/apt,4,1.0,...,40.0,1,0.0,2,90,True,strict,True,True,0.40626


In [5]:
# Describe the content of the DataFrame
listings_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3818 entries, 0 to 3817
Data columns (total 28 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   id                                3818 non-null   int64  
 1   name                              3818 non-null   object 
 2   host_id                           3818 non-null   int64  
 3   street                            3818 non-null   object 
 4   neighbourhood                     3402 non-null   object 
 5   zipcode                           3818 non-null   int64  
 6   property_type                     3817 non-null   object 
 7   room_type                         3818 non-null   object 
 8   accommodates                      3818 non-null   int64  
 9   bathrooms                         3818 non-null   float64
 10  bedrooms                          3818 non-null   int64  
 11  beds                              3818 non-null   int64  
 12  bed_ty

In [6]:
# Clean listings_cleaned and keep only those columns, that are usd for further analysis
listing_dataset = listings_cleaned[['id', 'neighbourhood', 'property_type']]
listing_dataset.head()

Unnamed: 0,id,neighbourhood,property_type
0,241032,Queen Anne,Apartment
1,953595,Queen Anne,Apartment
2,3308979,Queen Anne,House
3,7421966,Queen Anne,Apartment
4,278830,Queen Anne,House


In [7]:
# Display only dates with available properties
available_df = calendar[calendar['price'].notnull()]
available_df.index.names = ['calendar_no']
available_df.head()

Unnamed: 0_level_0,listing_id,date,available,price
calendar_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,241032,2016-01-04,t,$85.00
1,241032,2016-01-05,t,$85.00
9,241032,2016-01-13,t,$85.00
10,241032,2016-01-14,t,$85.00
14,241032,2016-01-18,t,$85.00


In [8]:
# Change data type of the column "date" and create new columns with month and year
available_df['date'] = pd.to_datetime(available_df['date'])
available_df['year'] = available_df['date'].dt.year
available_df['month'] = available_df['date'].dt.month
available_df.head()

Unnamed: 0_level_0,listing_id,date,available,price,year,month
calendar_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,241032,2016-01-04,t,$85.00,2016,1
1,241032,2016-01-05,t,$85.00,2016,1
9,241032,2016-01-13,t,$85.00,2016,1
10,241032,2016-01-14,t,$85.00,2016,1
14,241032,2016-01-18,t,$85.00,2016,1


In [9]:
# Describe the content of the DataFrame
available_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 934542 entries, 0 to 1393213
Data columns (total 6 columns):
 #   Column      Non-Null Count   Dtype         
---  ------      --------------   -----         
 0   listing_id  934542 non-null  int64         
 1   date        934542 non-null  datetime64[ns]
 2   available   934542 non-null  object        
 3   price       934542 non-null  object        
 4   year        934542 non-null  int32         
 5   month       934542 non-null  int32         
dtypes: datetime64[ns](1), int32(2), int64(1), object(2)
memory usage: 42.8+ MB


In [10]:
# extract and clean price column
price = available_df['price']
price

price.str.replace('$', '').str.replace(',', '').astype('float64')
price_cleaned = price.str.replace('$', '').str.replace(',', '').astype('float64')
available_df['price'] = price_cleaned

In [11]:
available_df.dtypes

listing_id             int64
date          datetime64[ns]
available             object
price                float64
year                   int32
month                  int32
dtype: object

In [12]:
# Filter by 2016 year and display DataFrame
oneyear_df = available_df[available_df['year'] == 2016]
oneyear_df.head()

Unnamed: 0_level_0,listing_id,date,available,price,year,month
calendar_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,241032,2016-01-04,t,85.0,2016,1
1,241032,2016-01-05,t,85.0,2016,1
9,241032,2016-01-13,t,85.0,2016,1
10,241032,2016-01-14,t,85.0,2016,1
14,241032,2016-01-18,t,85.0,2016,1


In [13]:
# Group by listing and month, find monthly average price for rent
grouped = oneyear_df.groupby(['listing_id','month'])['price'].mean()
grouped

grouped_df = grouped.to_frame()
grouped_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,price
listing_id,month,Unnamed: 2_level_1
3335,2,120.0
3335,3,120.0
3335,4,120.0
3335,5,120.0
3335,6,120.0


In [14]:
# Save CSV
grouped_df.reset_index(inplace=True)
grouped_df.to_csv('./Resources/calendar_average pricing.csv')

grouped_df.head()

Unnamed: 0,listing_id,month,price
0,3335,2,120.0
1,3335,3,120.0
2,3335,4,120.0
3,3335,5,120.0
4,3335,6,120.0


In [15]:
# Part 2

In [16]:
merge_df = pd.merge(oneyear_df, listing_dataset, left_on="listing_id", right_on = "id")
cleaned_merge_df = merge_df[['listing_id','price', 'month', 'neighbourhood', 'property_type']]
cleaned_merge_df.head()

Unnamed: 0,listing_id,price,month,neighbourhood,property_type
0,241032,85.0,1,Queen Anne,Apartment
1,241032,85.0,1,Queen Anne,Apartment
2,241032,85.0,1,Queen Anne,Apartment
3,241032,85.0,1,Queen Anne,Apartment
4,241032,85.0,1,Queen Anne,Apartment


In [17]:
# create groups around property_type and neighbourhood for clearer display
grouped_property = cleaned_merge_df.groupby(['property_type','neighbourhood'])['price'].mean().round(1)
grouped_property

grouped_property_df = grouped_property.to_frame()
grouped_property_df

Unnamed: 0_level_0,Unnamed: 1_level_0,price
property_type,neighbourhood,Unnamed: 2_level_1
Apartment,Alki,163.9
Apartment,Atlantic,89.4
Apartment,Ballard,119.8
Apartment,Belltown,206.6
Apartment,Bitter Lake,82.7
...,...,...
Townhouse,University District,79.5
Townhouse,Wedgewood,95.7
Treehouse,Dunlap,48.0
Treehouse,Montlake,200.0


In [18]:
grouped_property_df.to_csv('./Resources/property_avg_pricing.csv')