# <center><b>Predicting Restaurant Success in Bangalore ML</b></center>

In [1]:
# import libraries
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_validate
from sklearn.preprocessing import OneHotEncoder,RobustScaler
from category_encoders import BinaryEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
import joblib

In [2]:
# load data
df = pd.read_pickle("Bangalore_Rest_After_EDA.pkl")
df

Unnamed: 0,online_order,book_table,votes,location,approx_cost(for two people),listed_in(type),listed_in(city),cuisines_counts,rest_type_counts,Target
0,Yes,Yes,775,Banashankari,800.0,Buffet,Banashankari,3,1,1
1,Yes,No,787,Banashankari,800.0,Buffet,Banashankari,3,1,1
2,Yes,No,918,Banashankari,800.0,Buffet,Banashankari,3,2,1
3,No,No,88,Banashankari,300.0,Buffet,Banashankari,2,1,0
4,No,No,166,Basavanagudi,600.0,Buffet,Banashankari,2,1,1
...,...,...,...,...,...,...,...,...,...,...
40913,No,No,34,Whitefield,800.0,Pubs and bars,Whitefield,2,2,0
40914,No,No,81,Whitefield,800.0,Pubs and bars,Whitefield,4,2,0
40915,No,No,27,Whitefield,1500.0,Pubs and bars,Whitefield,1,1,0
40916,No,Yes,236,other,2500.0,Pubs and bars,Whitefield,1,1,1


In [3]:
df.columns

Index(['online_order', 'book_table', 'votes', 'location',
       'approx_cost(for two people)', 'listed_in(type)', 'listed_in(city)',
       'cuisines_counts', 'rest_type_counts', 'Target'],
      dtype='object')

In [4]:
# encoding categorical features using column transformer
Encoder = ColumnTransformer(transformers = [("OHE" , OneHotEncoder(sparse_output = False , drop = "first" ) , ["online_order" , "book_table" , "listed_in(type)"]) , ("BE" , BinaryEncoder() , ["location" , "listed_in(city)"] )] , remainder = "passthrough")

In [5]:
# splitting dataframe into feature and target variables
x = df.drop("Target" , axis = 1)
y = df["Target"]

In [6]:
# Creating a list of classification models for comparison
models = list()
models.append(("LR" , LogisticRegression()))
models.append(("CART" , DecisionTreeClassifier()))
models.append(("RF" , RandomForestClassifier()))
models.append(("KNN" , KNeighborsClassifier()))
models.append(("XG" , XGBClassifier()))

In [7]:
# comparing the performance of different machine learning models using cross-validation and pipeline
for model in models:
    steps = []
    steps.append(("Encoder" , Encoder))
    steps.append(("Scaler" , RobustScaler()))
    steps.append(model)
    pipeline = Pipeline(steps = steps)
    scores = cross_validate(pipeline , x , y , cv = 5 , scoring = "accuracy" , return_train_score = True)
    print(model[0])
    print("Train_accuracy =" , scores["train_score"].mean())
    print("Test_accuracy =" , scores["test_score"].mean())
    print("*" * 50)   

LR
Train_accuracy = 0.7782822189243335
Test_accuracy = 0.7695401524439301
**************************************************
CART
Train_accuracy = 0.9989063504782228
Test_accuracy = 0.8882164033892834
**************************************************
RF
Train_accuracy = 0.9989002408096723
Test_accuracy = 0.8809090150538591
**************************************************
KNN
Train_accuracy = 0.8595911988931073
Test_accuracy = 0.7891159180233106
**************************************************
XG
Train_accuracy = 0.9188438306173439
Test_accuracy = 0.8619439557655293
**************************************************


In [19]:
# define the hyperparameters to tune
# params = {
#      'XG__learning_rate': [0.1, 0.12, 0.15], 
#      'XG__n_estimators': [80, 90, 100], 
#      'XG__reg_alpha': [1.6, 1.8, 2.0]
#  }
params = {
    'XG__learning_rate': [0.2, 0.25],
    'XG__n_estimators': [150, 200],
    'XG__reg_alpha': [1.0, 1.5],
}

In [20]:
# pipeline for classification using xgbclassifier, feature encoding using encoder created before, scaling using robust scaler
steps = []
steps.append(("Encoder" , Encoder))
steps.append(("Scaler" , RobustScaler()))
steps.append(("XG" , XGBClassifier()))
pipeline = Pipeline(steps = steps)

In [21]:
# perform grid search cross-validation
grid_search = GridSearchCV(param_grid = params , estimator = pipeline , cv = 5 , scoring = "accuracy" , return_train_score = True , n_jobs = -1)

In [22]:
grid_search.fit(x,y)

In [25]:
print("Best hyperparameters: ", grid_search.best_params_)
print("Mean train score: ", grid_search.cv_results_["mean_train_score"].mean())
print("Mean test score: ", grid_search.cv_results_["mean_test_score"].mean())

Best hyperparameters:  {'XG__learning_rate': 0.25, 'XG__n_estimators': 200, 'XG__reg_alpha': 1.0}
Mean train score:  0.9355326499341621
Mean test score:  0.8725719423413034


In [26]:
grid_search.best_estimator_

In [27]:
final_model = grid_search.best_estimator_

In [28]:
joblib.dump(final_model , "best_model.pkl")
joblib.dump(x.columns , "input_features.pkl")

['input_features.pkl']

In [29]:
%%writefile streamlit_app.py
import streamlit as st
import pandas as pd 
import joblib
import sklearn
import xgboost
import category_encoders

Inputs = joblib.load("input_features.pkl")
Model = joblib.load("best_model.pkl")

def prediction(online_order , book_table , votes , location , approx_cost , listed_in_type , listed_in_city , cuisines_counts , rest_type_counts):
    test_df = pd.DataFrame(columns = Inputs)
    test_df.at[0 , "online_order"] = online_order
    test_df.at[0 , "book_table"] = book_table
    test_df.at[0,"votes"] = votes
    test_df.at[0 , "location"] = location
    test_df.at[0 , "rest_type_counts"] = rest_type_counts
    test_df.at[0 , "approx_cost(for two people)"] = approx_cost
    test_df.at[0 , "cuisines_counts"] = cuisines_counts
    test_df.at[0 , "listed_in(type)"] = listed_in_type
    test_df.at[0 , "listed_in(city)"] = listed_in_city
    st.dataframe(test_df)
    result = Model.predict(test_df)[0]
    return result
    
def main():
    st.title("Bangalore Restaurant Success Predictor")
    online_order = st.selectbox("Online Ordering" , ['Yes', 'No'])
    book_table = st.selectbox("Table Booking" , ['Yes', 'No'])
    votes = st.slider("votes" , min_value= 0 , max_value=16832 , value=0,step=1)
    location = st.selectbox("Location" ,['Banashankari', 'Basavanagudi', 'other', 'Jayanagar', 'JP Nagar',
       'Bannerghatta Road', 'BTM', 'Electronic City', 'Shanti Nagar',
       'Koramangala 5th Block', 'Richmond Road', 'HSR',
       'Koramangala 7th Block', 'Bellandur', 'Sarjapur Road',
       'Marathahalli', 'Whitefield', 'Old Airport Road', 'Indiranagar',
       'Koramangala 1st Block', 'Frazer Town', 'MG Road', 'Brigade Road',
       'Lavelle Road', 'Church Street', 'Ulsoor', 'Residency Road',
       'Shivajinagar', 'St. Marks Road', 'Cunningham Road',
       'Commercial Street', 'Vasanth Nagar', 'Domlur',
       'Koramangala 8th Block', 'Ejipura', 'Jeevan Bhima Nagar',
       'Kammanahalli', 'Koramangala 6th Block', 'Brookefield',
       'Koramangala 4th Block', 'Banaswadi', 'Kalyan Nagar',
       'Malleshwaram', 'Rajajinagar', 'New BEL Road'] )
    rest_type_counts = st.selectbox("Number of Restaurant Type " , [1,2])
    approx_cost = st.slider( "Approximate Cost for Two Persons" , min_value = 40 , max_value = 6000 , value = 0 , step = 1)
    cuisines_counts = st.selectbox("Number of Cuisines" , [3 , 2 , 1 , 4 , 5 , 8 , 7 , 6])
    listed_in_type = st.selectbox("Type" , ['Buffet', 'Cafes', 'Delivery', 'Desserts', 'Dine-out',
       'Drinks & nightlife', 'Pubs and bars'])
    listed_in_city = st.selectbox("City" , ['Banashankari', 'Bannerghatta Road', 'Basavanagudi', 'Bellandur',
       'Brigade Road', 'Brookefield', 'BTM', 'Church Street',
       'Electronic City', 'Frazer Town', 'HSR', 'Indiranagar',
       'Jayanagar', 'JP Nagar', 'Kalyan Nagar', 'Kammanahalli',
       'Koramangala 4th Block', 'Koramangala 5th Block',
       'Koramangala 6th Block', 'Koramangala 7th Block', 'Lavelle Road',
       'Malleshwaram', 'Marathahalli', 'MG Road', 'New BEL Road',
       'Old Airport Road', 'Rajajinagar', 'Residency Road',
       'Sarjapur Road', 'Whitefield'])
    
    if st.button("Predict"):
        results = prediction(online_order , book_table , votes , location , approx_cost , listed_in_type , listed_in_city, cuisines_counts , rest_type_counts)
        label = ["Unsuccessful" , "Successful"]
        st.text(f"The restaurant will be {label[results]}.")
        
if __name__ == '__main__':
    main()    
    

Overwriting streamlit_app.py
