In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd

from shapely.geometry import Point, Polygon
from datetime import datetime
from colour import Color

ModuleNotFoundError: No module named 'geopandas'

In [None]:
# Read in prepped csv files
lis = pd.read_csv('./data/prepped/lis.csv')
cal = pd.read_csv('./data/raw/calendar.csv')

In [None]:
cal.head()

In [None]:
# Create is_available column to code available into binary 0 or 1
cal['is_available'] = np.where(cal['available'] == 't', 1, 0)

In [None]:
# Create datetime column to convert date strings into type datetime
cal['datetime'] = cal.date.apply(lambda x: datetime.strptime(x, '%Y-%m-%d'))

In [None]:
cal.head()

In [None]:
cal_by_datetime = cal.groupby(['datetime'], as_index = False).sum()

# with pd.option_context('display.max_rows', None, 'display.max_columns', None):
#     display(cal_by_datetime)

In [None]:
fig_num_listings_by_date = plt.figure(figsize = (10, 10))
plt.plot(cal_by_datetime.datetime.dt.to_pydatetime(), cal_by_datetime['is_available'])
plt.xlabel('Date')
plt.ylabel('Number of Available Listings')
plt.title('Number of Available Listings for Seattle in 2016')
plt.show()

In [None]:
lis_by_neighbourhood = lis.groupby(['neighbourhood_cleansed'], as_index = False).count()

lis_by_neighbourhood

In [None]:
fig_num_listings_by_neighbourhood = plt.figure(figsize = (30, 30))
plt.bar(lis_by_neighbourhood['neighbourhood_cleansed'], lis_by_neighbourhood['id'])
plt.xlabel('Date')
plt.ylabel('Number of Available Listings')
plt.title('Number of Available Listings for Seattle in 2016')
plt.show()

In [None]:
# Create geometry column for listings
geometry = [Point(xy) for xy in zip(lis['longitude'], lis['latitude'])]
crs = {'init': 'epsg:4326'}
lis = gpd.GeoDataFrame(lis, crs = crs, geometry = geometry)

In [None]:
# From https://data.seattle.gov/Land-Base/2010-US-Census-Blocks/46cb-j9zb
# Create background map of Seattle
shp = gpd.GeoDataFrame.from_file('data/raw/2010_US_Census_Blocks.shp')

In [None]:
# Map of Seattle Airbnb Listings
fig_map_seattle_listings, ax = plt.subplots(figsize = (15, 15))
base = shp.plot(ax = ax, color = 'black')
lis.plot(ax = base,
         marker = 'o',
         color = 'orange',
         markersize = 5,
         alpha = 0.5)
_ = ax.axis('off')
plt.title('Seattle Airbnb Listings')
plt.show()

In [None]:
# Generate color gradient
violet = Color('violet')
violet_to_indigo = list(violet.range_to(Color('indigo'), 6))

violet_to_indigo

In [None]:
# Convert price 
lis['price_int'] = lis.price[1:]
lis['price_int'] = lis.price.apply(lambda x: int(float(x[1:].replace(',', ''))))

In [None]:
fig_map_seattle_listings_price, ax = plt.subplots(figsize = (15, 15))
base = shp.plot(ax = ax, color = 'grey')
lis[lis.price_int <= 50].plot(ax = base,
                              marker = 'o',
                              color = 'violet',
                              markersize = 5,
                              alpha = 0.5,
                              label = '<$50')
lis[(lis.price_int > 50) & (lis.price_int < 100)].plot(ax = base,
                                                       marker = 'o',
                                                       color = '#e054ed',
                                                       markersize = 5,
                                                       alpha = 0.5,
                                                       label = '$50-99')
lis[(lis.price_int >= 100) & (lis.price_int < 150)].plot(ax = base,
                                                         marker = 'o',
                                                         color = '#cc23ee',
                                                         markersize = 5,
                                                         alpha = 0.5,
                                                         label = '$100-149')
lis[(lis.price_int >= 150) & (lis.price_int < 200)].plot(ax = base,
                                                         marker = 'o',
                                                         color = '#a30bd6',
                                                         markersize = 5,
                                                         alpha = 0.5,
                                                         label = '$150-199')
lis[(lis.price_int >= 200) & (lis.price_int < 250)].plot(ax = base,
                                                         marker = 'o',
                                                         color = '#7404ad',
                                                         markersize = 5,
                                                         alpha = 0.5,
                                                         label = '$200-249')
lis[(lis.price_int >= 250)].plot(ax = base,
                                 marker = 'o',
                                 color = 'indigo',
                                 markersize = 5,
                                 alpha = 0.5,
                                 label = '>$250')



_ = ax.axis('off')
plt.title('Seattle Listings by Price')
plt.legend()
plt.show()

In [None]:
print('Less than $50: ' + str(lis[lis.price_int < 50].count()[0]))
print('$50 - $99: ' + str(lis[(lis.price_int >= 50) & (lis.price_int < 100)].count()[0]))
print('$100 - $149: ' + str(lis[(lis.price_int >= 100) & (lis.price_int < 150)].count()[0]))
print('$150 - $199: ' + str(lis[(lis.price_int >= 150) & (lis.price_int < 200)].count()[0]))
print('$200 - $249: ' + str(lis[(lis.price_int >= 200) & (lis.price_int < 250)].count()[0]))
print('Greater than $250: ' + str(lis[(lis.price_int >= 250)].count()[0]))

In [None]:
# Graph of review rating vs. price
plt.figure(figsize = (15, 15))
plt.scatter(lis.review_scores_rating, lis.price_int, alpha = 0.5)
plt.xlabel('Review Rating')
plt.ylabel('Price')
plt.title('Rating vs. Price')
plt.show()

In [None]:
lis.amenities[0].count(',')

In [None]:
lis.amenities[0]

In [None]:
lis['amenities_count'] = lis.amenities.apply(lambda x: x.count(',') + 1)
#cal['datetime'] = cal.date.apply(lambda x: datetime.strptime(x, '%Y-%m-%d'))

In [None]:
# Graph of number of amenities vs. price
fig_num_amenities_vs_price = plt.figure(figsize = (15, 15))
plt.scatter(lis.amenities_count, lis.price_int, alpha = 0.5)
plt.xlabel('Number of Amenities')
plt.ylabel('Price')
plt.title('Number of Amenities vs. Price')
plt.show()

In [None]:
lis.groupby(lis.zipcode).count()