In [26]:
import pandas as pd
import numpy as np

#Read all dataset of the project and cleaning data
def read_file(filename):
    file_name = 'New York/2019-09-12/{}.csv'.format(filename)
    data = pd.read_csv(file_name, header = 0)
    return pd.DataFrame(data)

def convert_str_to_date(df, column):
    return pd.to_datetime(df[column], format="%Y-%m-%d")

#Convert column type from string to numeric
def convert_str_to_numeric(df, column):
    df[column] = df[column].replace(r'[$,]', '', regex=True)
    return pd.to_numeric(df[column],errors='coerce')

In [88]:
# Read Detailed Calendar Data for listings 
calendar_data = read_file("calendar")
    
calendar_data['date'] = convert_str_to_date(calendar_data, 'date')

calendar_data['price'] = convert_str_to_numeric(calendar_data, 'price')

calendar_data['adjusted_price'] = convert_str_to_numeric(calendar_data, 'adjusted_price')

#Read Summary information and metrics for listings(good for visualisations).
summary_listing_data = read_file("listings")
summary_listing_data['price'] = convert_str_to_numeric(summary_listing_data, 'price')

#Read Detailed Listings data 
detailed_listing_data = read_file("detailed_listings")
detailed_listing_data['first_review'] = convert_str_to_date(detailed_listing_data, 'first_review')

detailed_listing_data['last_review'] = convert_str_to_date(detailed_listing_data, 'last_review')

detailed_listing_data.drop(['scrape_id', 'last_scraped', 'experiences_offered'], axis=1, inplace=True)

# Read Detailed Review Data for listings
#detailed_review_data = read_file("detailed_reviews")
#detailed_review_data['date'] = convert_str_to_date(detailed_review_data, 'date')

# Read Detailed Review Data languages
detailed_review_data = read_file("review_lang")
detailed_review_data['date'] = convert_str_to_date(detailed_review_data, 'date')
detailed_review_data.dtypes



  if (await self.run_code(code, result,  async_=asy)):


In [None]:
from langdetect import detect

def detect_lang(sentence):
    try:
        return detect(sentence)
    except:
        return 'unknown'

detailed_review_data['lang'] = detailed_review_data['comments'].apply(detect_lang)
#detailed_review_data[detailed_review_data['lang']!="en"]
export_csv = detailed_review_data.to_csv(r'review_lang.csv')

In [None]:
# Conduct sentiment analysis on English review only
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
vader = SentimentIntensityAnalyzer()  

def sentiment_analyzer_scores(sentence):  
    score = vader.polarity_scores(sentence)
    return score

eng_review = detailed_review_data[detailed_review_data['lang']=="en"]

eng_review['sentiment_score'] = eng_review['comments'].apply(sentiment_analyzer_scores)

export_csv = eng_review.to_csv(r'review_sentiment_score.csv')

In [56]:
from ast import literal_eval

# Retrieve only the overall review rating from the sentiment analyzer's results
def get_overall_review_rating(sentiment_string):
    comment_dict = literal_eval(sentiment_string)
    return (comment_dict['compound'])
    
    
review_with_sentiment = read_file("review_sentiment_score")
review_with_sentiment['overall_rating'] = review_with_sentiment['sentiment_score'].apply(get_overall_review_rating)

simplified_review_with_sentiment_score = review_with_sentiment.groupby('listing_id')['overall_rating'].mean().reset_index()

#review_with_sentiment[review_with_sentiment["overall_rating"]==0]

In [64]:
# Select particular columns from detailed_listing_data 
simplified_listing_data = detailed_listing_data[['id', 'listing_url', 'name', 'host_id','host_is_superhost','neighbourhood','neighbourhood_cleansed','neighbourhood_group_cleansed','property_type','room_type', 'accommodates','bedrooms','amenities','price','weekly_price', 'monthly_price', 'security_deposit','cleaning_fee','extra_people','minimum_nights','maximum_nights','review_scores_rating','review_scores_accuracy','review_scores_cleanliness','review_scores_checkin','review_scores_communication','review_scores_location','review_scores_value','first_review','last_review','reviews_per_month']]

simplified_listing_data['price'] = convert_str_to_numeric(simplified_listing_data, 'price')

simplified_listing_data['weekly_price'] = convert_str_to_numeric(simplified_listing_data, 'weekly_price')

simplified_listing_data['monthly_price'] = convert_str_to_numeric(simplified_listing_data, 'monthly_price')

simplified_listing_data['security_deposit'] = convert_str_to_numeric(simplified_listing_data, 'security_deposit')

simplified_listing_data['cleaning_fee'] = convert_str_to_numeric(simplified_listing_data, 'cleaning_fee')

simplified_listing_data['extra_people'] = convert_str_to_numeric(simplified_listing_data, 'extra_people')

# Add sentiment score from review data file into the simplified_listing_data 
import math
merged = simplified_listing_data.merge(simplified_review_with_sentiment_score, left_on = 'id', right_on = 'listing_id', how='left')

def get_final_rating(df):
    review_score = df[0]
    comment_score = df[1] 
    if ( (math.isnan(review_score) == False) & (math.isnan(comment_score))):
        return (review_score)
    else:
        return round((comment_score*100),1)

merged['final_rating'] = merged[['review_scores_rating','overall_rating']].apply(get_final_rating, axis =1)

#merged[merged['overall_rating'].isna()][['review_scores_rating','overall_rating', 'final_rating']][merged['final_rating'].notna()]

# drop Null in final_rating column
final_df = merged.dropna(subset=['final_rating'])
final_df['kid_friendly'] = final_df['amenities'].str.contains('Family/kid friendly')

export_csv = final_df.to_csv(r'New York/2019-09-12/airbnb_recommendation_final.csv')

final_df[['review_scores_rating','overall_rating', 'final_rating']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.py

Unnamed: 0,review_scores_rating,overall_rating,final_rating
0,90.0,0.833202,83.3
1,93.0,0.810262,81.0
2,89.0,0.777290,77.7
3,90.0,0.879643,88.0
4,83.0,0.767323,76.7
5,98.0,0.939276,93.9
6,95.0,0.887796,88.8
7,94.0,0.915350,91.5
8,93.0,0.854689,85.5
9,97.0,0.925781,92.6


In [65]:
detailed_listing_data = read_file("airbnb_recommendation_final")

# Further reduce unnecessary columns in airbnb_recommendation_final files
second_listing_data = detailed_listing_data[['id', 'listing_url', 'name','neighbourhood_group_cleansed','property_type','room_type', 'accommodates','bedrooms','amenities','price', 'review_scores_rating','review_scores_accuracy','review_scores_cleanliness','review_scores_checkin','review_scores_communication','review_scores_location','review_scores_value','final_rating', 'kid_friendly', 'host_is_superhost']]

# Remove records in which the price is 0
simplified_listing_data = second_listing_data[second_listing_data['price'] != 0]

simplified_listing_data['price'] = convert_str_to_numeric(simplified_listing_data, 'price')

"""
simplified_listing_data['weekly_price'] = convert_str_to_numeric(simplified_listing_data, 'weekly_price')

simplified_listing_data['monthly_price'] = convert_str_to_numeric(simplified_listing_data, 'monthly_price')

simplified_listing_data['security_deposit'] = convert_str_to_numeric(simplified_listing_data, 'security_deposit')

simplified_listing_data['cleaning_fee'] = convert_str_to_numeric(simplified_listing_data, 'cleaning_fee')

simplified_listing_data['extra_people'] = convert_str_to_numeric(simplified_listing_data, 'extra_people')

"""
simplified_listing_data.rename(columns={"final_rating": "comment_rating","neighbourhood_group_cleansed": "neighbourhood_group" }, inplace = True)
simplified_listing_data.sort_values(by='price', ascending=True, inplace = True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [66]:
# Airbnb Recommendation functions by Python widgets
import ipywidgets as widgets
from IPython.display import display

ALL = 'ALL'
def unique_sorted_values_plus_ALL(array):
    unique = array.unique().tolist()
    unique.sort()
    unique.insert(0, ALL)
    return unique

dropdown_neighbourhood_group = widgets.Dropdown(options = unique_sorted_values_plus_ALL(simplified_listing_data.neighbourhood_group), description ="Location: ")
dropdown_room_type = widgets.Dropdown(options = unique_sorted_values_plus_ALL(simplified_listing_data.room_type), description ="Room Type: ")
style = {'description_width': 'initial'}
slider_review_score = widgets.IntSlider(
    value=70,
    min=20,
    max=100,
    step=1,
    description='Review Score: ',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
    style=style
)
#slider_review_score.style.handle_color = 'lightblue'

checkbox_kid_friendly = widgets.Checkbox(
    value=False,
    description='Family/kid friendly ',
    disabled=False
)

checkbox_superhost = widgets.Checkbox(
    value=False,
    description='Superhost ',
    disabled=False
)

top = widgets.IntSlider(
    value=25,
    min=5,
    max=100,
    step=5,
    description='Top: ',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
#top.style.handle_color = 'lightblue'

airbnb_recommendation_output = widgets.Output(layout=Layout(overflow= 'scroll'))


def common_filtering(neighbourhood_group, room_type, review_score, kid_friendly, superhost, top):
    airbnb_recommendation_output.clear_output()
    simplified_listing_data.loc[simplified_listing_data['review_scores_rating'] >= review_score]
    if (neighbourhood_group == ALL) & (room_type == ALL):
        common_filter = simplified_listing_data
    elif (neighbourhood_group == ALL):
        common_filter = simplified_listing_data[(simplified_listing_data.room_type == room_type)]
    elif (room_type == ALL):
        common_filter = simplified_listing_data[(simplified_listing_data.neighbourhood_group == neighbourhood_group) ]
    else:
        common_filter = simplified_listing_data[(simplified_listing_data.room_type == room_type) & 
                                  (simplified_listing_data.neighbourhood_group == neighbourhood_group)]
    if kid_friendly:
        common_filter = common_filter[(common_filter['kid_friendly']==True)]
        
    if superhost:
        common_filter = common_filter[(common_filter['host_is_superhost']=='t')]
        
    with airbnb_recommendation_output:
        display(common_filter.sort_values(by=['review_scores_rating'], ascending=False).head(top))          
                  
def dropdown_neighbourhood_group_eventhandler(change):
    common_filtering(change.new, dropdown_room_type.value, slider_review_score.value, checkbox_kid_friendly.value, checkbox_superhost.value, top.value)
    
def dropdown_room_type_eventhandler(change):
    common_filtering(dropdown_neighbourhood_group.value, change.new, slider_review_score.value, checkbox_kid_friendly.value, checkbox_superhost.value, top.value)

def slider_review_score_eventhandler(change):
    common_filtering(dropdown_neighbourhood_group.value, dropdown_room_type.value, change.new, checkbox_kid_friendly.value, checkbox_superhost.value, top.value)

def checkbox_kid_friendly_eventhandler(change):
    common_filtering(dropdown_neighbourhood_group.value, dropdown_room_type.value, slider_review_score.value, change.new, checkbox_superhost.value, top.value)

def checkbox_superhost_eventhandler(change):
    common_filtering(dropdown_neighbourhood_group.value, dropdown_room_type.value, slider_review_score.value, checkbox_kid_friendly.value, change.new, top.value)
                             
def top_eventhandler(change):
    common_filtering(dropdown_neighbourhood_group.value, dropdown_room_type.value, slider_review_score.value, checkbox_kid_friendly.value, checkbox_superhost.value, change.new)

    
dropdown_neighbourhood_group.observe(dropdown_neighbourhood_group_eventhandler, names='value')
dropdown_room_type.observe(dropdown_room_type_eventhandler, names='value')
slider_review_score.observe(slider_review_score_eventhandler, names='value')
checkbox_kid_friendly.observe(checkbox_kid_friendly_eventhandler, names='value')
checkbox_superhost.observe(checkbox_superhost_eventhandler, names='value')                            
top.observe(top_eventhandler, names='value')

input_widgets = widgets.HBox([dropdown_neighbourhood_group, dropdown_room_type, checkbox_kid_friendly])
second_input_widgets = widgets.HBox([slider_review_score, top, checkbox_superhost])

from IPython.display import display, Markdown
display(Markdown("## Airbnb Recommendation Services"))
display(input_widgets)
display(second_input_widgets)

display(airbnb_recommendation_output)
common_filtering(dropdown_neighbourhood_group.value, dropdown_room_type.value, slider_review_score.value, checkbox_kid_friendly.value, checkbox_superhost.value, top.value)


## Airbnb Recommendation Services

HBox(children=(Dropdown(description='Location: ', options=('ALL', 'Bronx', 'Brooklyn', 'Manhattan', 'Queens', …

HBox(children=(IntSlider(value=70, continuous_update=False, description='Review Score: ', min=20, style=Slider…

Output(layout=Layout(overflow='scroll'))