In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings("ignore")

sns.set()

In [4]:
final_data = pd.read_csv('model_data.csv')

pd.set_option('display.max_columns', None)

In [5]:
fd = final_data 

In [6]:
fd.head()

Unnamed: 0,Total_Stops,Price,Journey_date,Journey_month,Departure_hour,Departure_minute,Arrival_hour,Arrival_minute,duration_hour,duration_minute,Weekend,Night,Day_of_Week,Airline_Air India,Airline_GoAir,Airline_IndiGo,Airline_Jet Airways,Airline_Multiple carriers,Airline_SpiceJet,Airline_Trujet,Airline_Vistara,Source_Chennai,Source_Delhi,Source_Kolkata,Source_Mumbai,Destination_Cochin,Destination_Delhi,Destination_Hyderabad,Destination_Kolkata
0,0,3897,24,3,22,20,1,10,2,50,1,1,7,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0
1,2,7662,1,5,5,50,13,15,7,25,0,0,3,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
2,2,13882,9,6,9,25,4,25,19,0,1,0,7,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0
3,1,6218,12,5,18,5,23,30,5,25,1,0,7,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0
4,1,13302,1,3,16,50,21,35,4,45,0,0,5,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0


In [7]:
fd.shape

(10462, 29)

In [8]:
X= fd.drop(columns=['Price'],axis=1)

In [9]:
X.head()

Unnamed: 0,Total_Stops,Journey_date,Journey_month,Departure_hour,Departure_minute,Arrival_hour,Arrival_minute,duration_hour,duration_minute,Weekend,Night,Day_of_Week,Airline_Air India,Airline_GoAir,Airline_IndiGo,Airline_Jet Airways,Airline_Multiple carriers,Airline_SpiceJet,Airline_Trujet,Airline_Vistara,Source_Chennai,Source_Delhi,Source_Kolkata,Source_Mumbai,Destination_Cochin,Destination_Delhi,Destination_Hyderabad,Destination_Kolkata
0,0,24,3,22,20,1,10,2,50,1,1,7,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0
1,2,1,5,5,50,13,15,7,25,0,0,3,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
2,2,9,6,9,25,4,25,19,0,1,0,7,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0
3,1,12,5,18,5,23,30,5,25,1,0,7,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0
4,1,1,3,16,50,21,35,4,45,0,0,5,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0


In [10]:
y=fd['Price']

In [11]:
y.head()

0     3897
1     7662
2    13882
3     6218
4    13302
Name: Price, dtype: int64

In [12]:
y.shape


(10462,)

In [13]:
X.shape

(10462, 28)

## Train-Test Split

In [14]:
# now we do the train-test split 
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state=42)


## Importing Model For Training 

In [15]:
# Now we import all require model

from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor, AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LogisticRegression,LinearRegression, Ridge, Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from sklearn.metrics import (r2_score,mean_absolute_error, mean_squared_error)

In [16]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((8369, 28), (2093, 28), (8369,), (2093,))

## Evaluation Metrics

In [17]:
# creating Evaluate Function
def evaluate_model(true,predicted):
    mae = mean_absolute_error(true,predicted)
    mse = mean_squared_error(true,predicted)
    rmse = np.sqrt(mean_squared_error(true,predicted))
    r2_square = r2_score(true, predicted)
    return mae,rmse, r2_square

## Model Training With Evaluation Metric

In [18]:
# Now we make the dict for model in key-value pair
models={
    "Linear Regression":LinearRegression(),
    "Ridge":Ridge(),
    "Lasso":Lasso(),
    "Logistic Regression":LogisticRegression(),
    "Decision Tree":DecisionTreeRegressor(),
    "Random Forest":RandomForestRegressor(),
    "Gradient Boost":GradientBoostingRegressor(),
    "Ada Boost":AdaBoostRegressor(),
    "K Neighbors":KNeighborsRegressor(),
    "CatBoost Regressor":CatBoostRegressor(),
    "XGBoost Regressor":XGBRegressor(),
    "Support Vector Regressor":SVR()
}
model_list = []
r2_list = []

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train, y_train) # training the model
    
    # Making predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    
    
    # Evaluating Train and Test Dataset
    model_train_mae , model_train_rmse , model_train_r2 = evaluate_model(y_train, y_train_pred)
    model_test_mae , model_test_rmse , model_test_r2 = evaluate_model(y_test, y_test_pred)
    
    
    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])
    
    print('Model Performance for Training set')
    print('- Root Mean Squared Error: {: .4f}'.format(model_train_rmse))
    print('- Mean Absolute Error:{: .4f}'.format(model_train_mae)) 
    print('- R2 Score: {: .4f}'.format(model_train_r2))
   
    print('-------------------------------------------------------------')
    
    print('Model Performance for Test set')
    print('- Root Mean Squared Error: {: .4f}'.format(model_test_rmse))
    print('- Mean Absolute Error:{: .4f}'.format(model_test_mae)) 
    print('- R2 Score: {: .4f}'.format(model_test_r2))
    r2_list.append(model_test_r2)
    
    print('='*45)
    print('\n')

Linear Regression
Model Performance for Training set
- Root Mean Squared Error:  3060.6216
- Mean Absolute Error: 2004.2007
- R2 Score:  0.5647
-------------------------------------------------------------
Model Performance for Test set
- Root Mean Squared Error:  2975.0599
- Mean Absolute Error: 2028.8404
- R2 Score:  0.5755


Ridge
Model Performance for Training set
- Root Mean Squared Error:  3060.6911
- Mean Absolute Error: 2004.2456
- R2 Score:  0.5647
-------------------------------------------------------------
Model Performance for Test set
- Root Mean Squared Error:  2974.8627
- Mean Absolute Error: 2028.5756
- R2 Score:  0.5756


Lasso
Model Performance for Training set
- Root Mean Squared Error:  3060.9410
- Mean Absolute Error: 2003.6947
- R2 Score:  0.5646
-------------------------------------------------------------
Model Performance for Test set
- Root Mean Squared Error:  2975.0868
- Mean Absolute Error: 2028.1073
- R2 Score:  0.5755


Logistic Regression
Model Performa

In [19]:
# Now we see the score of prediction model wise 
pd.DataFrame(list(zip(model_list, r2_list)),columns=['Model Name', 'R2_Score']).sort_values(by=['R2_Score'],ascending=False)

Unnamed: 0,Model Name,R2_Score
9,CatBoost Regressor,0.831231
10,XGBoost Regressor,0.811277
5,Random Forest,0.795619
6,Gradient Boost,0.767266
4,Decision Tree,0.660332
1,Ridge,0.575554
0,Linear Regression,0.575498
2,Lasso,0.57549
8,K Neighbors,0.547318
3,Logistic Regression,0.220905


#### Observation:
1) We see that this three models giving good accuracy with
   * CatBoost Regressor = 83 %
   * XGBoost Regressor = 81 %
   * Random Forest = 79 % 
   .
2) So we can give more attention on this three models for better model performance.

## Cross Validation Before HyperParameter Tunning

In [20]:
from sklearn.model_selection import cross_val_score


In [21]:

models={
    "Linear Regression":LinearRegression(),
    "Ridge":Ridge(),
    "Lasso":Lasso(),
    "Logistic Regression":LogisticRegression(),
    "Decision Tree":DecisionTreeRegressor(),
    "Random Forest":RandomForestRegressor(),
    "Gradient Boost":GradientBoostingRegressor(),
    "Ada Boost":AdaBoostRegressor(),
    "K Neighbors":KNeighborsRegressor(),
    "CatBoost Regressor":CatBoostRegressor(),
    "XGBoost Regressor":XGBRegressor(),
    "Support Vector Regressor":SVR()
}            

model_list = [DecisionTreeRegressor(),RandomForestRegressor(),GradientBoostingRegressor(),
              AdaBoostRegressor(),CatBoostRegressor(),XGBRegressor()
             ]


In [22]:
def compare_models_cross_validation():
    for models in model_list:
        cv_score = cross_val_score(models,X,y, cv=15)
        
        mean_accuracy = sum(cv_score)/len(cv_score)
        mean_accuracy = mean_accuracy*100
        mean_accuracy = round(mean_accuracy,2)
        
        print('Cross validation for ', models,'= ', cv_score)
        print('Accuracy of the ',models,mean_accuracy)
        print('='*45)
    

In [23]:
compare_models_cross_validation()

Cross validation for  DecisionTreeRegressor() =  [0.58593762 0.73453783 0.52263394 0.59014502 0.62729655 0.6622948
 0.70515623 0.71988922 0.75524963 0.57633848 0.74800327 0.67435779
 0.71349074 0.70321021 0.64637433]
Accuracy of the  DecisionTreeRegressor() 66.43
Cross validation for  RandomForestRegressor() =  [0.80461395 0.84467007 0.79989878 0.74785303 0.79257811 0.79435367
 0.80659358 0.76778602 0.84035411 0.77272045 0.84270525 0.80189175
 0.79683938 0.78569783 0.69557253]
Accuracy of the  RandomForestRegressor() 79.29
Cross validation for  GradientBoostingRegressor() =  [0.73180066 0.804372   0.71402337 0.69375361 0.6820369  0.76750056
 0.79676768 0.71028341 0.80346763 0.74795254 0.76533789 0.76320744
 0.74737049 0.71081405 0.69471004]
Accuracy of the  GradientBoostingRegressor() 74.22
Cross validation for  AdaBoostRegressor() =  [0.27790957 0.23639407 0.33682144 0.37974485 0.60569205 0.24061786
 0.24740802 0.41277141 0.44218482 0.31590951 0.38689402 0.18074892
 0.29106616 0.21985

#### Observation:
1) After Cross validation we see that there is not much improvement in accuracy.
2) Although we have done this Cross validation without Hyperparameter tunning 
3) After Cross Validation XGBRegressor and CatBoostRegressor are giving good R2 score that means this 
   Algorithm work well on unseen data 

## HyperParameter Tunning

In [24]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

### Hypreparameter tunning for CatBoostRegressor 

In [25]:
# we making dictionary for parameter 
param_grid = {
    'depth': [6,8,10],
    'iterations':[100, 200, 300],
    'learning_rate': [0.01,0.05, 0.1],
    'l2_leaf_reg': [1,3,5]
    
}

In [26]:
catboost = CatBoostRegressor()

#### First we use RandomizedSearchCV and see what is the hyperparameter we get for model

In [27]:
random_grid_search = RandomizedSearchCV(catboost, param_grid, cv= 5 , scoring = 'neg_mean_squared_error',
                           random_state=42, n_jobs =-1)

In [28]:
random_grid_search.fit(X_train, y_train)

0:	learn: 4507.4773850	total: 5.86ms	remaining: 1.17s
1:	learn: 4371.6733826	total: 20.3ms	remaining: 2.01s
2:	learn: 4239.2622556	total: 34.2ms	remaining: 2.25s
3:	learn: 4111.7950043	total: 49.2ms	remaining: 2.41s
4:	learn: 3995.1920031	total: 62.4ms	remaining: 2.43s
5:	learn: 3895.0132426	total: 69.6ms	remaining: 2.25s
6:	learn: 3798.6589567	total: 83.4ms	remaining: 2.3s
7:	learn: 3701.2469365	total: 96.7ms	remaining: 2.32s
8:	learn: 3613.4747164	total: 108ms	remaining: 2.3s
9:	learn: 3528.9809457	total: 121ms	remaining: 2.3s
10:	learn: 3450.0048151	total: 134ms	remaining: 2.3s
11:	learn: 3374.6406918	total: 146ms	remaining: 2.29s
12:	learn: 3298.1262896	total: 159ms	remaining: 2.29s
13:	learn: 3237.6440190	total: 165ms	remaining: 2.19s
14:	learn: 3175.3787472	total: 176ms	remaining: 2.17s
15:	learn: 3121.8710708	total: 189ms	remaining: 2.17s
16:	learn: 3068.9918018	total: 203ms	remaining: 2.19s
17:	learn: 3017.3899100	total: 218ms	remaining: 2.2s
18:	learn: 2969.2385273	total: 232m

In [29]:
best_param = random_grid_search.best_params_

In [30]:
best_param

{'learning_rate': 0.05, 'l2_leaf_reg': 5, 'iterations': 200, 'depth': 10}

In [31]:
catboost = CatBoostRegressor(iterations=200,learning_rate= 0.05,depth= 10,l2_leaf_reg= 5)

In [32]:
catboost.fit(X_train, y_train)

0:	learn: 4507.4773850	total: 4.93ms	remaining: 982ms
1:	learn: 4371.6733826	total: 21.2ms	remaining: 2.1s
2:	learn: 4239.2622556	total: 34.1ms	remaining: 2.24s
3:	learn: 4111.7950043	total: 47.2ms	remaining: 2.31s
4:	learn: 3995.1920031	total: 60.3ms	remaining: 2.35s
5:	learn: 3895.0132426	total: 67.1ms	remaining: 2.17s
6:	learn: 3798.6589567	total: 80ms	remaining: 2.21s
7:	learn: 3701.2469365	total: 93.8ms	remaining: 2.25s
8:	learn: 3613.4747164	total: 109ms	remaining: 2.31s
9:	learn: 3528.9809457	total: 123ms	remaining: 2.33s
10:	learn: 3450.0048151	total: 136ms	remaining: 2.34s
11:	learn: 3374.6406918	total: 149ms	remaining: 2.33s
12:	learn: 3298.1262896	total: 161ms	remaining: 2.32s
13:	learn: 3237.6440190	total: 168ms	remaining: 2.23s
14:	learn: 3175.3787472	total: 180ms	remaining: 2.22s
15:	learn: 3121.8710708	total: 192ms	remaining: 2.21s
16:	learn: 3068.9918018	total: 204ms	remaining: 2.2s
17:	learn: 3017.3899100	total: 218ms	remaining: 2.21s
18:	learn: 2969.2385273	total: 232

<catboost.core.CatBoostRegressor at 0x11d905b54d0>

In [33]:
y_pred = catboost.predict(X_test)

In [34]:
from sklearn.metrics import accuracy_score

In [35]:
r2 = r2_score(y_test, y_pred)

In [36]:
print('R-squared score:',r2)

R-squared score: 0.8246072596676103


In [37]:
y_pred

array([15304.2777165 ,  3990.11533549,  4177.87586552, ...,
       11379.57092514,  9682.57520506, 11095.93102818])

In [38]:
param_grid = {
    'depth': [6,8,10],
    'iterations':[100, 200, 300],
    'learning_rate': [0.01,0.05, 0.1],
    'l2_leaf_reg': [1,3,5]
    
}

In [39]:
catboost = CatBoostRegressor()

#### Now we use GridSearchCV for best hyperparameter for model

In [40]:
grid_search = GridSearchCV(catboost, param_grid, cv= 5 , scoring = 'neg_mean_squared_error',
                           n_jobs =-1)

In [41]:
grid_search.fit(X_train, y_train)

0:	learn: 4504.2496848	total: 5.13ms	remaining: 1.53s
1:	learn: 4360.6372807	total: 17.6ms	remaining: 2.62s
2:	learn: 4218.7371993	total: 29.3ms	remaining: 2.9s
3:	learn: 4087.3958893	total: 41.2ms	remaining: 3.05s
4:	learn: 3960.6254675	total: 53.6ms	remaining: 3.16s
5:	learn: 3857.2549262	total: 60.2ms	remaining: 2.95s
6:	learn: 3743.1182206	total: 72.2ms	remaining: 3.02s
7:	learn: 3637.1755412	total: 84.3ms	remaining: 3.08s
8:	learn: 3535.8342115	total: 96.7ms	remaining: 3.13s
9:	learn: 3444.2335658	total: 109ms	remaining: 3.15s
10:	learn: 3369.5707809	total: 116ms	remaining: 3.04s
11:	learn: 3290.6088440	total: 128ms	remaining: 3.07s
12:	learn: 3214.8960246	total: 140ms	remaining: 3.1s
13:	learn: 3140.2284042	total: 152ms	remaining: 3.12s
14:	learn: 3067.2328295	total: 164ms	remaining: 3.12s
15:	learn: 3004.6753090	total: 176ms	remaining: 3.12s
16:	learn: 2954.9660933	total: 188ms	remaining: 3.13s
17:	learn: 2897.4988787	total: 203ms	remaining: 3.19s
18:	learn: 2841.5998997	total: 

In [42]:
best_param = random_grid_search.best_params_

In [43]:
best_param

{'learning_rate': 0.05, 'l2_leaf_reg': 5, 'iterations': 200, 'depth': 10}

#### Conclusion : 
1) After using both searching technique I find that both giving the same best hyperparameter
2) But the only difference is that GridSearchCV is more time consuming as compare to                      RandomizedSearchCV

### Hyperparameter Tunning for Gradient_boosting

In [44]:
gradient_boosting_params = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'subsample': [0.8, 0.9, 1.0],
    'loss': ['ls', 'lad', 'huber', 'quantile'],
    'max_features': ['auto', 'sqrt', 'log2']
}

In [45]:
gradient_boosting = GradientBoostingRegressor()

In [46]:
random_grid_search = RandomizedSearchCV(gradient_boosting, gradient_boosting_params, cv= 5 , scoring = 'neg_mean_squared_error',
                           random_state=42, n_jobs =-1)

In [47]:
random_grid_search.fit(X_train, y_train)

In [48]:
best_param = random_grid_search.best_params_

In [49]:
best_param

{'subsample': 0.9,
 'n_estimators': 200,
 'max_features': 'log2',
 'max_depth': 7,
 'loss': 'huber',
 'learning_rate': 0.2}

In [50]:
gradient_boosting = GradientBoostingRegressor(loss= 'huber', learning_rate = 0.2, n_estimators= 200, subsample= 0.9, 
                                              max_depth= 7, max_features = 'log2')

In [51]:
gradient_boosting.fit(X_train, y_train)

In [52]:
y_pred = gradient_boosting.predict(X_test)

In [53]:
r2 = r2_score(y_test, y_pred)

In [54]:
print('R-squared score:',r2)

R-squared score: 0.807111857025378


### Hyperparameter Tunning for XGBoostRegressor

In [55]:
xgboost_params = {
    'learning_rate': [0.01, 0.1, 0.2],
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 7],
    'min_child_weight': [1, 5, 10],
    'gamma': [0, 0.1, 0.2],
    'subsample': [0.8, 0.9, 1.0],
    'colsample_bytree': [0.8, 0.9, 1.0],
}

In [56]:
xgboost = XGBRegressor()

In [57]:
random_grid_search = RandomizedSearchCV(xgboost, xgboost_params, cv= 10 , scoring = 'neg_mean_squared_error',
                           verbose=2)

In [58]:
random_grid_search.fit(X_train, y_train)

Fitting 10 folds for each of 10 candidates, totalling 100 fits
[CV] END colsample_bytree=1.0, gamma=0.1, learning_rate=0.2, max_depth=3, min_child_weight=5, n_estimators=200, subsample=1.0; total time=   0.3s
[CV] END colsample_bytree=1.0, gamma=0.1, learning_rate=0.2, max_depth=3, min_child_weight=5, n_estimators=200, subsample=1.0; total time=   0.3s
[CV] END colsample_bytree=1.0, gamma=0.1, learning_rate=0.2, max_depth=3, min_child_weight=5, n_estimators=200, subsample=1.0; total time=   0.3s
[CV] END colsample_bytree=1.0, gamma=0.1, learning_rate=0.2, max_depth=3, min_child_weight=5, n_estimators=200, subsample=1.0; total time=   0.3s
[CV] END colsample_bytree=1.0, gamma=0.1, learning_rate=0.2, max_depth=3, min_child_weight=5, n_estimators=200, subsample=1.0; total time=   0.3s
[CV] END colsample_bytree=1.0, gamma=0.1, learning_rate=0.2, max_depth=3, min_child_weight=5, n_estimators=200, subsample=1.0; total time=   0.3s
[CV] END colsample_bytree=1.0, gamma=0.1, learning_rate=0.2, 

In [59]:
best_param = random_grid_search.best_params_

In [60]:
best_param

{'subsample': 0.8,
 'n_estimators': 200,
 'min_child_weight': 1,
 'max_depth': 5,
 'learning_rate': 0.2,
 'gamma': 0.1,
 'colsample_bytree': 0.8}

In [61]:
xgboost = XGBRegressor(n_estimators= 100,max_depth= 7,learning_rate = 0.2,gamma=0.2, min_child_weight= 5,subsample= 1.0,
                       colsample_bytree= 0.9)

In [62]:
xgboost.fit(X_train, y_train)

In [63]:
y_pred = gradient_boosting.predict(X_test)

In [64]:
r2 = r2_score(y_test, y_pred)

In [65]:
print('R-squared score:',r2)

R-squared score: 0.807111857025378


###  Hyperparameter Tunning for RandomForest

In [66]:

random_forest_params = {
    'criterion':['squared_error','friedman_mse','absolute_error','poisson'],
    'n_estimators': [10, 50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2'],
    'bootstrap': [True, False]
}

In [67]:
rf_regressor = RandomForestRegressor()

In [68]:
random_search = RandomizedSearchCV(estimator=rf_regressor, param_distributions=random_forest_params, cv= 10 , 
                                        scoring = 'neg_mean_squared_error', random_state=42, n_jobs =-1,verbose=2)

In [69]:
random_search.fit(X_train,y_train)

Fitting 10 folds for each of 10 candidates, totalling 100 fits


In [70]:
best_param = random_search.best_params_

In [71]:
best_param

{'n_estimators': 10,
 'min_samples_split': 10,
 'min_samples_leaf': 4,
 'max_features': 'sqrt',
 'max_depth': None,
 'criterion': 'poisson',
 'bootstrap': False}

In [72]:
rf_regressor = RandomForestRegressor(n_estimators= 10,criterion= 'poisson', max_depth= None,
                                     min_samples_split=10,min_samples_leaf=4,max_features='sqrt',
                                     bootstrap=False)

In [73]:
rf_regressor.fit(X_train, y_train)

In [74]:
y_pred = gradient_boosting.predict(X_test)

In [75]:
r2 = r2_score(y_test, y_pred)

In [76]:
print('R-squared score:',r2)

R-squared score: 0.807111857025378


### Hyperparameter For Decision Tree

In [82]:
# Define the parameter grid for DecisionTreeRegressor
dtree_param_grid = {
    'criterion': ['mse', 'friedman_mse', 'mae', 'poisson'],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt','log2']
}

Decisiontree = DecisionTreeRegressor()

random_grid_search = GridSearchCV(Decisiontree, dtree_param_grid, cv= 5 ,
                                        scoring = 'neg_mean_squared_error')
                           


random_grid_search.fit(X_train, y_train)

best_param = random_grid_search.best_params_

best_param

{'criterion': 'friedman_mse',
 'max_depth': 30,
 'max_features': 'sqrt',
 'min_samples_leaf': 4,
 'min_samples_split': 10}

In [83]:
Decisiontree = DecisionTreeRegressor(criterion= 'friedman_mse', max_depth= 30,
                                     min_samples_split=10,min_samples_leaf=4,max_features='sqrt')
                                     

In [84]:
y_pred = gradient_boosting.predict(X_test)

In [85]:
r2 = r2_score(y_test, y_pred)

In [86]:
print('R-squared score:',r2)

R-squared score: 0.807111857025378
