In [1007]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
import joblib
warnings.filterwarnings('ignore')
%matplotlib inline
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 500)


In [1008]:
from jupyterthemes import jtplot
jtplot.style(theme='monokai')

In [1009]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, MinMaxScaler, StandardScaler, RobustScaler
from sklearn.preprocessing import FunctionTransformer
from sklearn.ensemble import RandomForestRegressor

In [1010]:
from sklearn.model_selection import train_test_split,GridSearchCV,RandomizedSearchCV
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

In [1011]:
df=pd.read_csv('forpipeline.csv',index_col=0)

In [1012]:
df.head(1)

Unnamed: 0_level_0,neighbourhood_group,room_type,price,minimum_nights,number_of_reviews,calculated_host_listings_count,availability_365,host_is_superhost,property_type,bedrooms,beds,guests_included,review_scores_rating,instant_bookable,bathrooms,security_deposit,cleaning_fee,len_amenities,budget_dorm
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
49091,North Region,Private room,84,180,1,2,365,f,Apartment,1,1,1,94.0,f,1,0.0,0.0,8,0


In [520]:
# df.drop(columns=['id'],inplace=True)

In [1013]:
X_pipe = df.drop(columns=['price']) ### Features / Soal
y_pipe = df['price']
y_pipe = np.log1p(y_pipe)
X_train_pipe, X_test_pipe, y_train_pipe, y_test_pipe = train_test_split(X_pipe, y_pipe, test_size = .3, random_state = 42 )

In [1014]:
X_train_pipe.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4867 entries, 41766305 to 9959188
Data columns (total 18 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   neighbourhood_group             4867 non-null   object 
 1   room_type                       4867 non-null   object 
 2   minimum_nights                  4867 non-null   int64  
 3   number_of_reviews               4867 non-null   int64  
 4   calculated_host_listings_count  4867 non-null   int64  
 5   availability_365                4867 non-null   int64  
 6   host_is_superhost               4867 non-null   object 
 7   property_type                   4867 non-null   object 
 8   bedrooms                        4867 non-null   int64  
 9   beds                            4867 non-null   int64  
 10  guests_included                 4867 non-null   int64  
 11  review_scores_rating            4867 non-null   float64
 12  instant_bookable        

In [1015]:
cat_columns = ['neighbourhood_group', 'room_type','host_is_superhost', 'property_type', 'instant_bookable']

num_columns = ['minimum_nights', 'number_of_reviews','review_scores_rating', 'calculated_host_listings_count', 'availability_365', 'bedrooms','beds','bathrooms','guests_included','security_deposit','cleaning_fee','len_amenities','budget_dorm']

In [1016]:
len(cat_columns),len(num_columns)

(5, 13)

In [1017]:
numerical_pipeline = Pipeline([    
    ('transformer', FunctionTransformer(np.log1p))])

categorical_pipeline = Pipeline([
    ('encoder', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer([
    ('numeric', numerical_pipeline, num_columns),
    ('categoric', categorical_pipeline, cat_columns)
])

RF_TUNED = Pipeline([
    ('prep', preprocessor),
    ('algo', RandomForestRegressor())
])

In [526]:
RF_TUNED.get_params()

{'memory': None,
 'steps': [('prep', ColumnTransformer(transformers=[('numeric',
                                    Pipeline(steps=[('transformer',
                                                     FunctionTransformer(func=<ufunc 'log1p'>))]),
                                    ['minimum_nights', 'number_of_reviews',
                                     'review_scores_rating',
                                     'calculated_host_listings_count',
                                     'availability_365', 'bedrooms', 'beds',
                                     'bathrooms', 'guests_included',
                                     'security_deposit', 'cleaning_fee',
                                     'len_amenities', 'budget_dorm']),
                                   ('categoric',
                                    Pipeline(steps=[('encoder',
                                                     OneHotEncoder(handle_unknown='ignore'))]),
                                    ['neighbo

In [476]:
# {'n_estimators': 500,
#  'min_samples_split': 30,
#  'min_samples_leaf': 30,
#  'max_features': 15,
#  'max_depth': 35,
#  'bootstrap': False}

In [None]:
# #1
# param_RF_TUNED = {'algo__max_depth': [12],
#                  'algo__max_features': [14],
#                  'algo__min_samples_split': [46],
#                  'algo__n_estimators': [240],
#                  'algo__bootstrap': [True],
#                  'algo__min_samples_leaf': [20]}

In [707]:
# #2
# param_RF_TUNED = {'algo__max_depth': [12],
#                  'algo__max_features': [14],
#                  'algo__min_samples_split': [30],
#                  'algo__n_estimators': [240],
#                  'algo__bootstrap': [True],
#                  'algo__min_samples_leaf': [15]}

In [1018]:
param_RF_TUNED = {'algo__max_depth': [12],
                 'algo__max_features': [14],
                 'algo__min_samples_split': [46],
                 'algo__n_estimators': [240],
                 'algo__bootstrap': [True],
                 'algo__min_samples_leaf': [23]}

In [1019]:
model_RF_TUNED = GridSearchCV(estimator=RF_TUNED, param_grid=param_RF_TUNED, cv=5, n_jobs=-1, verbose=1)

In [1020]:
model_RF_TUNED.fit(X_train_pipe, y_train_pipe)
model_RF_TUNED = model_RF_TUNED.best_estimator_
y_pred = model_RF_TUNED.predict(X_test_pipe)

Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    3.3s finished


In [1021]:
r2_tuned_RF_TUNED = r2_score(y_test_pipe, y_pred)
MAE_tune_RF_TUNED = mean_absolute_error(y_test_pipe, y_pred)
MSE_tune_RF_TUNED = mean_squared_error(y_test_pipe, y_pred)
RMSE_tune_RF_TUNED = np.sqrt(MSE_tune_RF_TUNED)
print(f"R2 Train: {model_RF_TUNED.score(X_train_pipe, y_train_pipe)}")
print(f"R2 Score: {r2_tuned_RF_TUNED}\nMAE: {MAE_tune_RF_TUNED}\nMSE: {MSE_tune_RF_TUNED}\nRMSE: {RMSE_tune_RF_TUNED}")

R2 Train: 0.7481696535199623
R2 Score: 0.7147974895550266
MAE: 0.26020504586584325
MSE: 0.12998198088249696
RMSE: 0.36053013866041345


In [147]:
predicts = pd.DataFrame(data = [['North Region', 'Private room', 20, 20, 30, 12, 't', 'Apartment', 4, 3, 5, 80, 't', 3, 10, 10, 12,1]], columns=['neighbourhood_group','room_type','minimum_nights','reviews_per_month','calculated_host_listings_count','availability_365','host_is_superhost','property_type','bedrooms','beds','guests_included','review_scores_rating','instant_bookable','bathrooms','security_deposit', 'cleaning_fee','len_amenities','budget_dorm'])


In [1027]:
model = joblib.load('ModelAirbnbFinal')

In [1028]:
predicts

Unnamed: 0,neighbourhood_group,room_type,minimum_nights,reviews_per_month,calculated_host_listings_count,availability_365,host_is_superhost,property_type,bedrooms,beds,guests_included,review_scores_rating,instant_bookable,bathrooms,security_deposit,cleaning_fee,len_amenities,budget_dorm
0,North Region,Private room,20,20,30,12,t,Apartment,4,3,5,80,t,3,10,10,12,1


In [1029]:
pred = model.predict(predicts)

ValueError: could not convert string to float: 'North Region'

In [171]:
print("Your Expected Price is S$",(np.expm1(pred)[0]).round(2))

Your Expected Price is S$ 88.42


# Deployment

In [1033]:
import joblib

In [1034]:
joblib.dump(model_RF_TUNED,'AIRBNBMODELFINAL')

['AIRBNBMODELFINAL']