In [225]:
import pandas as pd
import numpy as np
import sklearn
import pickle
import streamlit as st
import joblib

from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, KFold
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error, explained_variance_score
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

from geopy.distance import geodesic

In [226]:
asheville_modeling_data = pd.read_pickle('../Pickles/asheville_modeling_data.pickle')
streamlit_model3_results = joblib.load('../Joblib/streamlit_model3_results.joblib')
streamlit_model3_results

In [227]:
#Create a model 3 X and y variable 
model3_X = asheville_modeling_data.drop(['daily_price', 'listing_id'], axis = 1)
model3_y = asheville_modeling_data['daily_price']

#Train, test, split the X and y variables
model3_X_train, model3_X_test, model3_y_train, model3_y_test = train_test_split(model3_X, 
                                                                                model3_y, 
                                                                                test_size = 0.2)

#Create a list of numeric columns
model3_numeric_cols = ['host_is_superhost', 'accommodates', 'bedrooms', 'beds', 'bathrooms', 
                       'Air conditioning', 'Wifi', 'TV', 'Kitchen', 'Washer', 'Dryer', 'Heating',
                       'distance_to_biltmore', 'distance_to_downtown']

#Create a list of nominal columns
model3_nominal_cols = ['neighborhood', 'room_type', 'day_of_week', 'month', 'week']

#Scale the numeric columns
model3_numeric_pipeline = Pipeline([('scaler', StandardScaler())])

#One hot encode the nominal columns
model3_nominal_pipeline = Pipeline([('ohe', OneHotEncoder(sparse = False))])

#Column tranform the two pipelines
ct = ColumnTransformer([('nominalpipe', model3_nominal_pipeline, model3_nominal_cols ),
                        ('numpipe', model3_numeric_pipeline, model3_numeric_cols)])

#Create a final pipeline with the column transformer and random forest regressor model
model3_final_pipe = Pipeline([('preprocess', ct),
                              ('model', RandomForestRegressor())])

In [228]:
room_options = ['Entire home/apt', 'Private room']
room_options_len = list(range(len(room_options)))

neighborhood_options = ['Asheville', 'Candler', 'Fletcher', 'Woodfin']
neighborhood_options_len = list(range(len(neighborhood_options)))

superhost = 1
room_type = room_options[0]
accommodates = 5
bedrooms = 4
beds = 5
neighborhood = neighborhood_options[0]
bathrooms = 3
air_conditioning = 1
wifi = 1
tv = 1
kitchen = 1
washer = 1
dryer = 1
heating = 1
latitude = 50
longitude = -50
checkin_date = 1/9/2023
checkout_date = 1/16/2023

In [229]:
def user_input_features():
    def yes_no_conversion(input):
        if input == 'Yes':
            return 1
        else:
            return 0
    
    yes_no_conversion(superhost)
    yes_no_conversion(air_conditioning)
    yes_no_conversion(wifi)
    yes_no_conversion(tv)
    yes_no_conversion(kitchen)
    yes_no_conversion(washer)
    yes_no_conversion(dryer)
    yes_no_conversion(heating)
    
    conv_long = longitude
    conv_lat = latitude
    test_coords = (conv_long, conv_lat)
    
    biltmore = (35.54108101423884, -82.55210010496437) 
    downtown = (35.60405939066325, -82.54533225431588)
    
    checkin_data = {'host_is_superhost': superhost,
            'room_type': room_type,
            'accommodates': accommodates,
            'bedrooms': bedrooms,
            'beds': beds,
            'neighborhood': neighborhood,
            'bathrooms': bathrooms,
            'Air conditioning': air_conditioning,
            'Wifi': wifi,
            'TV': tv,
            'Kitchen': kitchen,
            'Washer': washer,
            'Dryer': dryer,
            'Heating': heating,
            'distance_to_biltmore': geodesic(test_coords, biltmore).miles,
            'distance_to_downtown': geodesic(test_coords, downtown).miles,
            'day_of_week': pd.to_datetime(checkin_date).dayofweek,
            'month': pd.to_datetime(checkin_date).month,
            'week': pd.to_datetime(checkin_date).week}

    checkin_features = pd.DataFrame(checkin_data, index = [0])
    
    
#     checkout_data = {'host_is_superhost': superhost,
#             'room_type': room_type,
#             'accommodates': accommodates,
#             'bedrooms': bedrooms,
#             'beds': beds,
#             'neighborhood': neighborhood,
#             'bathrooms': bathrooms,
#             'Air conditioning': air_conditioning,
#             'Wifi': wifi,
#             'TV': tv,
#             'Kitchen': kitchen,
#             'Washer': washer,
#             'Dryer': dryer,
#             'Heating': heating,
#             'coords': test_coords,
#             'distance_to_biltmore': geodesic(test_coords, biltmore).miles,
#             'distance_to_downtown': geodesic(test_coords, downtown).miles,
#             'day_of_week': pd.to_datetime(checkout_date).dayofweek,
#             'month': pd.to_datetime(checkout_date).month,
#             'week': pd.to_datetime(checkout_date).week}
    
#     checkout_features = pd.DataFrame(checkout_data)
  
    return checkin_features
# , checkout_features

In [230]:
user_input_features()


Unnamed: 0,host_is_superhost,room_type,accommodates,bedrooms,beds,neighborhood,bathrooms,Air conditioning,Wifi,TV,Kitchen,Washer,Dryer,Heating,distance_to_biltmore,distance_to_downtown,day_of_week,month,week
0,1,Entire home/apt,5,4,5,Asheville,3,1,1,1,1,1,1,1,9879.03978,9881.41147,3,1,1


In [231]:
checkin_features = user_input_features()
# checkin_features['coords'] = checkin_features['coords'].apply(lambda x: x.split(' '))
# checkin_features['coords'][0][0] = int(checkin_features['coords'][0][0]) 
# checkin_features['coords'][0][1] = int(checkin_features['coords'][0][1]) 


In [232]:
checkin_features.columns

Index(['host_is_superhost', 'room_type', 'accommodates', 'bedrooms', 'beds',
       'neighborhood', 'bathrooms', 'Air conditioning', 'Wifi', 'TV',
       'Kitchen', 'Washer', 'Dryer', 'Heating', 'distance_to_biltmore',
       'distance_to_downtown', 'day_of_week', 'month', 'week'],
      dtype='object')

In [233]:
checkin_price = float(streamlit_model3_results.predict(checkin_features))
# checkout_price = float(streamlit_model3_results.predict(checkout_features))
# average_price = (checkin_price + checkout_price) / 2
# total_stay_days = (pd.to_datetime(checkout_date) - pd.to_datetime(checkin_date)).days
checkin_price

213.77