In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn import preprocessing
from sklearn import metrics


import numpy as np
import json



airbnb = pd.read_csv('datasets/airbnb.csv')

def clean_data(airbnb):
    colms = [col for col in airbnb.columns if col not in [
    'id','host_id','host_name','neighbourhood_group','last_review','name',
        'calculated_host_listings_count','reviews_per_month','availability_365','minimum_nights','number_of_reviews']]
    
    airbnb.loc[:,['price', 'latitude','longitude']] = airbnb[['price', 'latitude','longitude']].astype(float)
    data = airbnb[colms]
    
    return data

def prepare_for_test(airbnb):
    data = clean_data(airbnb)
    target = data['price']
    colms = [col for col in data.columns if col not in ['price']]
    data = data[colms]

    # create the Labelencoder object
    le = preprocessing.LabelEncoder()
    data['neighbourhood'] = le.fit_transform(data['neighbourhood'])
    neighbourhood_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
    
    data['room_type'] = le.fit_transform(data['room_type'])
    room_type_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
    
    return data,target,neighbourhood_mapping,room_type_mapping

def create_train_data(data,target):
    x_train,x_test,y_train,y_test = train_test_split(data,target,test_size=0.2, random_state=60)
    return x_train,x_test,y_train,y_test

def print_score(alg, score, params):
    print('%s score is %f with params %s' % (alg, score, json.dumps(params)))
    

#prepare data for test and return the mappings 
data,target,neighbourhood_mapping,room_type_mapping = prepare_for_test(airbnb)

#create the training data 
X_train,X_test,y_train,y_test = create_train_data(data,target)

regr = RandomForestRegressor(random_state=60, n_estimators=300)

# Fit the mode
regr.fit(data,target)

# Predict the prices
y_pre = regr.predict(X_test)

# Compare the result
print(r2_score(y_test, y_pre))

0.9060880800824367
