##  Lazy Predict

## Final Project Submission

Please fill out:
* __Student name:__ Kristine Petrosyan
* __Student pace:__ part time
* __Scheduled project review date/time:__ TBD
* __Instructor name:__ Victor



> In this notebook we will use the 'Lazy Predict' algorithm to build baseline models with basic feature engineering. This study will examine the hotel booking dataset from Kaggle. The main goal is to build a lot of basic models without much code which helps to understand which models work better without any parameter tuning.

# Import Libraries

In [1]:
import numpy as np 
import pandas as pd 
import sys
import os 

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier


from sklearn.model_selection import StratifiedKFold

from sklearn.metrics import auc, classification_report,  confusion_matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import RobustScaler
#from sklearn.preprocessing import Imputer

from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
sns.set(style="whitegrid")

import warnings
warnings.filterwarnings('ignore')

In [2]:
import datetime
start = datetime.datetime.now()

In [3]:
pd.set_option('display.max_columns', None)

# Load Dataset

In [5]:
data = pd.read_csv('../input/hotel-booking-demand/hotel_bookings.csv')
data.head()

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,children,babies,meal,country,market_segment,distribution_channel,is_repeated_guest,previous_cancellations,previous_bookings_not_canceled,reserved_room_type,assigned_room_type,booking_changes,deposit_type,agent,company,days_in_waiting_list,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status,reservation_status_date
0,Resort Hotel,0,342,2015,July,27,1,0,0,2,0.0,0,BB,PRT,Direct,Direct,0,0,0,C,C,3,No Deposit,,,0,Transient,0.0,0,0,Check-Out,2015-07-01
1,Resort Hotel,0,737,2015,July,27,1,0,0,2,0.0,0,BB,PRT,Direct,Direct,0,0,0,C,C,4,No Deposit,,,0,Transient,0.0,0,0,Check-Out,2015-07-01
2,Resort Hotel,0,7,2015,July,27,1,0,1,1,0.0,0,BB,GBR,Direct,Direct,0,0,0,A,C,0,No Deposit,,,0,Transient,75.0,0,0,Check-Out,2015-07-02
3,Resort Hotel,0,13,2015,July,27,1,0,1,1,0.0,0,BB,GBR,Corporate,Corporate,0,0,0,A,A,0,No Deposit,304.0,,0,Transient,75.0,0,0,Check-Out,2015-07-02
4,Resort Hotel,0,14,2015,July,27,1,0,2,2,0.0,0,BB,GBR,Online TA,TA/TO,0,0,0,A,A,0,No Deposit,240.0,,0,Transient,98.0,0,1,Check-Out,2015-07-03


In [6]:
data.shape

(119390, 32)

# EDA

In [7]:
data.columns

Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year',
       'arrival_date_month', 'arrival_date_week_number',
       'arrival_date_day_of_month', 'stays_in_weekend_nights',
       'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
       'country', 'market_segment', 'distribution_channel',
       'is_repeated_guest', 'previous_cancellations',
       'previous_bookings_not_canceled', 'reserved_room_type',
       'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
       'company', 'days_in_waiting_list', 'customer_type', 'adr',
       'required_car_parking_spaces', 'total_of_special_requests',
       'reservation_status', 'reservation_status_date'],
      dtype='object')

# Lazy Predict documentation
> Lazy Predict help build a lot of basic models without much code and helps understand which models works better without any parameter tuning.
- Free software: MIT license | __Documentation:__ https://lazypredict.readthedocs.io.

#### Documentation

> URL: https://lazypredict.readthedocs.io/en/latest/installation.html

#### Installation

In [8]:
!pip install lazypredict
!pip install catboost
!pip install xgboost
!pip install lightgbm
!pip install --upgrade scikit-learn

Collecting lazypredict
  Downloading lazypredict-0.2.7-py2.py3-none-any.whl (11 kB)
Installing collected packages: lazypredict
Successfully installed lazypredict-0.2.7
Requirement already up-to-date: scikit-learn in /opt/conda/lib/python3.7/site-packages (0.23.2)


#### Import Library

In [9]:
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.svm import SVC
enable_hist_gradient_boosting
import scipy
from sklearn.utils import deprecated
from sklearn.utils._fast_dict import IntFloatDict
from sklearn.utils.fixes import _astype_copy_false

In [10]:
import lazypredict
from lazypredict.Supervised import LazyClassifier   #LazyRegressor
from sklearn.model_selection import train_test_split

import sys

### Classification Example - Hotel Booking Cancellations

#### Drop Columns

In [11]:
y= data['is_canceled']
X = data.drop(['is_canceled', 'company', 'booking_changes', 'reservation_status','days_in_waiting_list'], axis=1)
display(y.head())
display(X.head())

0    0
1    0
2    0
3    0
4    0
Name: is_canceled, dtype: int64

Unnamed: 0,hotel,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,children,babies,meal,country,market_segment,distribution_channel,is_repeated_guest,previous_cancellations,previous_bookings_not_canceled,reserved_room_type,assigned_room_type,deposit_type,agent,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status_date
0,Resort Hotel,342,2015,July,27,1,0,0,2,0.0,0,BB,PRT,Direct,Direct,0,0,0,C,C,No Deposit,,Transient,0.0,0,0,2015-07-01
1,Resort Hotel,737,2015,July,27,1,0,0,2,0.0,0,BB,PRT,Direct,Direct,0,0,0,C,C,No Deposit,,Transient,0.0,0,0,2015-07-01
2,Resort Hotel,7,2015,July,27,1,0,1,1,0.0,0,BB,GBR,Direct,Direct,0,0,0,A,C,No Deposit,,Transient,75.0,0,0,2015-07-02
3,Resort Hotel,13,2015,July,27,1,0,1,1,0.0,0,BB,GBR,Corporate,Corporate,0,0,0,A,A,No Deposit,304.0,Transient,75.0,0,0,2015-07-02
4,Resort Hotel,14,2015,July,27,1,0,2,2,0.0,0,BB,GBR,Online TA,TA/TO,0,0,0,A,A,No Deposit,240.0,Transient,98.0,0,1,2015-07-03


#### Split train_test data

In [12]:
# Create train_test split
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=.5,random_state =123)

#### Run Lazy Predict - LazyClassifier()

In [None]:
# Execute LazyClassifier()
clf = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None, predictions=True) 
models,predictions = clf.fit(X_train, X_test, y_train, y_test)
models

 40%|████      | 12/30 [1:30:12<7:41:45, 1539.17s/it]

In [None]:
end = datetime.datetime.now()
elapsed_time = end - start
print(f'Elapsed Time: {elapsed_time}')