In [20]:
# Initial imports
import pandas as pd
import hvplot.pandas
from path import Path
import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

In [21]:
# Import data
file_path = Path('resources/data/')

bostonRev = pd.read_csv(f'{file_path}/boston_reviews.csv')
bostonCal = pd.read_csv(f'{file_path}/boston_calendar.csv')
bostonList = pd.read_csv(f'{file_path}/boston_listings.csv')

seattleRev = pd.read_csv(f'{file_path}/seattle_reviews.csv')
seattleCal = pd.read_csv(f'{file_path}/seattle_calendar.csv')
seattleList = pd.read_csv(f'{file_path}/seattle_listings.csv')


In [22]:
# Combine similar dataframes
airbnbRev = pd.concat([bostonRev, seattleRev])
airbnbCal = pd.concat([bostonCal, seattleCal])
airbnbList = pd.concat([bostonList, seattleList])

In [23]:
airbnbRev.head()

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments
0,1178162,4724140,2013-05-21,4298113,Olivier,My stay at islam's place was really cool! Good...
1,1178162,4869189,2013-05-29,6452964,Charlotte,Great location for both airport and city - gre...
2,1178162,5003196,2013-06-06,6449554,Sebastian,We really enjoyed our stay at Islams house. Fr...
3,1178162,5150351,2013-06-15,2215611,Marine,The room was nice and clean and so were the co...
4,1178162,5171140,2013-06-16,6848427,Andrew,Great location. Just 5 mins walk from the Airp...


In [24]:
airbnbCal.head()

Unnamed: 0,listing_id,date,available,price
0,12147973,2017-09-05,f,
1,12147973,2017-09-04,f,
2,12147973,2017-09-03,f,
3,12147973,2017-09-02,f,
4,12147973,2017-09-01,f,


In [25]:
airbnbList.head()

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,summary,space,description,experiences_offered,neighborhood_overview,...,review_scores_value,requires_license,license,jurisdiction_names,instant_bookable,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,reviews_per_month
0,12147973,https://www.airbnb.com/rooms/12147973,20160906204935,2016-09-07,Sunny Bungalow in the City,"Cozy, sunny, family home. Master bedroom high...",The house has an open and cozy feel at the sam...,"Cozy, sunny, family home. Master bedroom high...",none,"Roslindale is quiet, convenient and friendly. ...",...,,f,,,f,moderate,f,f,1,
1,3075044,https://www.airbnb.com/rooms/3075044,20160906204935,2016-09-07,Charming room in pet friendly apt,Charming and quiet room in a second floor 1910...,Small but cozy and quite room with a full size...,Charming and quiet room in a second floor 1910...,none,"The room is in Roslindale, a diverse and prima...",...,9.0,f,,,t,moderate,f,f,1,1.3
2,6976,https://www.airbnb.com/rooms/6976,20160906204935,2016-09-07,Mexican Folk Art Haven in Boston,"Come stay with a friendly, middle-aged guy in ...","Come stay with a friendly, middle-aged guy in ...","Come stay with a friendly, middle-aged guy in ...",none,The LOCATION: Roslindale is a safe and diverse...,...,10.0,f,,,f,moderate,t,f,1,0.47
3,1436513,https://www.airbnb.com/rooms/1436513,20160906204935,2016-09-07,Spacious Sunny Bedroom Suite in Historic Home,Come experience the comforts of home away from...,Most places you find in Boston are small howev...,Come experience the comforts of home away from...,none,Roslindale is a lovely little neighborhood loc...,...,10.0,f,,,f,moderate,f,f,1,1.0
4,7651065,https://www.airbnb.com/rooms/7651065,20160906204935,2016-09-07,Come Home to Boston,"My comfy, clean and relaxing home is one block...","Clean, attractive, private room, one block fro...","My comfy, clean and relaxing home is one block...",none,"I love the proximity to downtown, the neighbor...",...,10.0,f,,,f,flexible,f,f,1,2.25


In [26]:
# Keep important data
airbnbListCleaned = airbnbList.loc[:, ['listing_url', 'id', 'name', 'market', 'summary', 'space', 
                                      'description', 'neighborhood_overview', 'notes', 'transit',
                                      'access', 'host_id', 'host_is_superhost', 'host_listings_count',
                                      'host_total_listings_count', 'street', 'neighbourhood_cleansed',
                                      'city', 'state', 'zipcode', 'latitude', 'longitude', 'property_type',
                                      'room_type', 'accommodates', 'bathrooms', 'bedrooms', 'beds', 'price',
                                      'weekly_price', 'monthly_price', 'security_deposit', 'cleaning_fee',
                                      'guests_included', 'extra_people', 'minimum_nights', 'maximum_nights',
                                      'number_of_reviews', 'review_scores_rating', 'review_scores_accuracy',
                                      'review_scores_cleanliness', 'review_scores_checkin', 'review_scores_communication',
                                      'review_scores_location', 'review_scores_value', 'reviews_per_month']]
airbnbListCleaned.head(10)

Unnamed: 0,listing_url,id,name,market,summary,space,description,neighborhood_overview,notes,transit,...,maximum_nights,number_of_reviews,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,reviews_per_month
0,https://www.airbnb.com/rooms/12147973,12147973,Sunny Bungalow in the City,Boston,"Cozy, sunny, family home. Master bedroom high...",The house has an open and cozy feel at the sam...,"Cozy, sunny, family home. Master bedroom high...","Roslindale is quiet, convenient and friendly. ...",,"The bus stop is 2 blocks away, and frequent. B...",...,1125,0,,,,,,,,
1,https://www.airbnb.com/rooms/3075044,3075044,Charming room in pet friendly apt,Boston,Charming and quiet room in a second floor 1910...,Small but cozy and quite room with a full size...,Charming and quiet room in a second floor 1910...,"The room is in Roslindale, a diverse and prima...","If you don't have a US cell phone, you can tex...",Plenty of safe street parking. Bus stops a few...,...,15,36,94.0,10.0,9.0,10.0,10.0,9.0,9.0,1.3
2,https://www.airbnb.com/rooms/6976,6976,Mexican Folk Art Haven in Boston,Boston,"Come stay with a friendly, middle-aged guy in ...","Come stay with a friendly, middle-aged guy in ...","Come stay with a friendly, middle-aged guy in ...",The LOCATION: Roslindale is a safe and diverse...,I am in a scenic part of Boston with a couple ...,"PUBLIC TRANSPORTATION: From the house, quick p...",...,45,41,98.0,10.0,9.0,10.0,10.0,9.0,10.0,0.47
3,https://www.airbnb.com/rooms/1436513,1436513,Spacious Sunny Bedroom Suite in Historic Home,Boston,Come experience the comforts of home away from...,Most places you find in Boston are small howev...,Come experience the comforts of home away from...,Roslindale is a lovely little neighborhood loc...,Please be mindful of the property as it is old...,There are buses that stop right in front of th...,...,1125,1,100.0,10.0,10.0,10.0,10.0,10.0,10.0,1.0
4,https://www.airbnb.com/rooms/7651065,7651065,Come Home to Boston,Boston,"My comfy, clean and relaxing home is one block...","Clean, attractive, private room, one block fro...","My comfy, clean and relaxing home is one block...","I love the proximity to downtown, the neighbor...",I have one roommate who lives on the lower lev...,From Logan Airport and South Station you have...,...,31,29,99.0,10.0,10.0,10.0,10.0,9.0,10.0,2.25
5,https://www.airbnb.com/rooms/12386020,12386020,Private Bedroom + Great Coffee,Boston,Super comfy bedroom plus your own bathroom in ...,Our sunny condo is located on the second and t...,Super comfy bedroom plus your own bathroom in ...,We love our corner of Roslindale! For quiet wa...,,To reach downtown Boston via public transporta...,...,1125,8,100.0,10.0,10.0,10.0,10.0,9.0,10.0,1.7
6,https://www.airbnb.com/rooms/5706985,5706985,New Lrg Studio apt 15 min to Boston,Boston,It's a 5 minute walk to Rosi Square to catch t...,The whole house was recently redone and it 's ...,It's a 5 minute walk to Rosi Square to catch t...,Roslindale is the new hip area of Boston. Clos...,"Information about the house, wifi pasword and ...",The commuter rail (Needham Line) is a 5 minute...,...,1125,57,90.0,10.0,10.0,10.0,10.0,9.0,9.0,4.0
7,https://www.airbnb.com/rooms/2843445,2843445,"""Tranquility"" on ""Top of the Hill""",Boston,"We can accommodate guests who are gluten-free,...",We provide a bedroom and full shared bath. Ra...,"We can accommodate guests who are gluten-free,...",Our neighborhood is residential with friendly ...,We love having company and meeting people from...,We are a few minutes walk to public bus servic...,...,1125,67,96.0,10.0,10.0,10.0,10.0,10.0,10.0,2.38
8,https://www.airbnb.com/rooms/753446,753446,6 miles away from downtown Boston!,Boston,Nice and cozy apartment about 6 miles away to ...,Nice and cozy apartment about 6 miles away to ...,Nice and cozy apartment about 6 miles away to ...,Roslindale is a primarily residential neighbor...,,,...,1125,65,96.0,10.0,10.0,10.0,10.0,9.0,10.0,5.36
9,https://www.airbnb.com/rooms/849408,849408,Perfect & Practical Boston Rental,Boston,This is a cozy and spacious two bedroom unit w...,Perfect apartment rental for those in town vis...,This is a cozy and spacious two bedroom unit w...,"This neighborhood truly has it all. Good, not...",Please note that this is a second floor apartm...,Plenty of on street parking with no restrictio...,...,10,33,94.0,10.0,9.0,10.0,10.0,9.0,9.0,1.01


In [33]:
# Create ratings DataFrame
ratings = airbnbList.loc[:, ['id', 'market','number_of_reviews', 'review_scores_rating']]

# Use get_dummies() to create variables for text features.
X = pd.get_dummies(ratings, columns=["review_scores_rating", "market"])
X.head(10)

Unnamed: 0,id,number_of_reviews,review_scores_rating_20.0,review_scores_rating_40.0,review_scores_rating_46.0,review_scores_rating_47.0,review_scores_rating_48.0,review_scores_rating_50.0,review_scores_rating_53.0,review_scores_rating_55.0,...,review_scores_rating_96.0,review_scores_rating_97.0,review_scores_rating_98.0,review_scores_rating_99.0,review_scores_rating_100.0,market_Boston,market_Other (Domestic),market_Other (International),market_San Francisco,market_Seattle
0,12147973,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,3075044,36,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,6976,41,0,0,0,0,0,0,0,0,...,0,0,1,0,0,1,0,0,0,0
3,1436513,1,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,0,0,0,0
4,7651065,29,0,0,0,0,0,0,0,0,...,0,0,0,1,0,1,0,0,0,0
5,12386020,8,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,0,0,0,0
6,5706985,57,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
7,2843445,67,0,0,0,0,0,0,0,0,...,1,0,0,0,0,1,0,0,0,0
8,753446,65,0,0,0,0,0,0,0,0,...,1,0,0,0,0,1,0,0,0,0
9,849408,33,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0


In [34]:
# Standardize the data with StandardScaler().
X_scaled = StandardScaler().fit_transform(X)
print(X_scaled[0:5])

[[ 1.28277731 -0.56312936 -0.03076455 -0.04505907 -0.01162319 -0.01643879
  -0.01162319 -0.02013468 -0.01643879 -0.01643879 -0.01162319 -0.01162319
  -0.07820353 -0.01162319 -0.02848051 -0.0232511  -0.0232511  -0.02848051
  -0.0232511  -0.01162319 -0.06050224 -0.03076455 -0.02599728 -0.05333627
  -0.03076455 -0.05072604 -0.04029387 -0.04194203 -0.04653995 -0.04029387
  -0.19493013 -0.03677813 -0.05936723 -0.07643554 -0.08808703 -0.09264506
  -0.0933838  -0.13984468 -0.12616957 -0.12224479 -0.20586993 -0.16009487
  -0.17086559 -0.23550055 -0.20799954 -0.24863367 -0.26184414 -0.24955118
  -0.25289357 -0.17128384 -0.48483853  1.03674097 -0.01162319 -0.01162319
  -0.01162319 -1.03198499]
 [-0.95627833  0.41698139 -0.03076455 -0.04505907 -0.01162319 -0.01643879
  -0.01162319 -0.02013468 -0.01643879 -0.01643879 -0.01162319 -0.01162319
  -0.07820353 -0.01162319 -0.02848051 -0.0232511  -0.0232511  -0.02848051
  -0.0232511  -0.01162319 -0.06050224 -0.03076455 -0.02599728 -0.05333627
  -0.030764

In [35]:
# Using PCA to reduce dimension to three principal components.
pca = PCA(n_components=3)
airbnb_pca = pca.fit_transform(X_scaled)
airbnb_pca

array([[-1.94985475, -0.05700965, -0.01696796],
       [-0.24728665,  1.56284982, -0.489156  ],
       [ 0.12718784,  1.86127868, -1.28722437],
       ...,
       [ 0.55513966, -1.20841532, -0.12073145],
       [ 0.62708789, -1.15780348, -0.12168121],
       [ 0.56151741, -1.2039289 , -0.12081564]])

In [36]:
# Create a DataFrame with the three principal components.
pcs_df = pd.DataFrame(
    data=airbnb_pca, columns=["PC 1", "PC 2", "PC 3"], index=ratings.index
)
pcs_df.head(10)

Unnamed: 0,PC 1,PC 2,PC 3
0,-1.949855,-0.05701,-0.016968
1,-0.247287,1.56285,-0.489156
2,0.127188,1.861279,-1.287224
3,-0.987557,-0.431068,0.591371
4,-0.731982,0.995721,-0.175549
5,-2.128697,-1.163401,0.622524
6,-1.007162,1.194845,-0.9018
7,-0.14581,2.157624,0.322794
8,0.067475,2.287545,0.315382
9,-0.026098,1.688273,-0.498971


In [37]:
# Create an elbow curve to find the best value for K.
inertia = []
k = list(range(1, 11))

# Calculate the ienrtia for the range of K values
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(pcs_df)
    inertia.append(km.inertia_)
    
# Create the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
elbow_df = pd.DataFrame(elbow_data)
elbow_df.hvplot.line(x="k", y="inertia", xticks=k, title="Elbow Curve")

In [40]:
# Initialize the K-Means model.
model = KMeans(n_clusters=4, random_state=0)

# Fit the model
model.fit(pcs_df)

# Predict clusters
predictions = model.predict(pcs_df)
predictions

array([3, 1, 1, ..., 0, 0, 0])

In [None]:
# Create a new DataFrame including predicted clusters and cryptocurrencies features.
# Concatentate the crypto_df and pcs_df DataFrames on the same columns.
clustered_df = pd.concat([ratings, pcs_df], axis=1, join='inner')

#  Add a new column, "CoinName" to the clustered_df DataFrame that holds the names of the cryptocurrencies. 
clustered_df["Name"] = crypto_name_df.CoinName

#  Add a new column, "Class" to the clustered_df DataFrame that holds the predictions.
clustered_df["Class"] = model.labels_

# Print the shape of the clustered_df
print(clustered_df.shape)
clustered_df.head(10)