In [70]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from mlxtend.plotting import plot_decision_regions

from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

In [71]:
salary_data = pd.read_csv('level_salary.csv')
salary_data.head(3)

Unnamed: 0,Level,Salary
0,1,14902.0979
1,2,78759.90676
2,3,94960.37296


In [72]:
x = salary_data[['Level']]
y = salary_data['Salary']

In [73]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=42)

### 1. Using Decision Tree

In [74]:
dt = DecisionTreeRegressor()
dt.fit(x_train,y_train)

dt.score(x_test, y_test)*100, dt.score(x_train, y_train)*100

(98.97886124600971, 100.0)

#### 1.1 HyperparameterTuning using GridSearchCV

In [75]:
df = {'criterion': ['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
      'splitter': ['best', 'random'],
      'max_depth': [i for i in range(2,20)]
    }

In [76]:
gd = GridSearchCV(DecisionTreeRegressor(), param_grid=df)
gd.fit(x_train, y_train)

In [77]:
gd.best_params_

{'criterion': 'squared_error', 'max_depth': 7, 'splitter': 'best'}

In [78]:
dt1 = DecisionTreeRegressor(criterion='squared_error', max_depth=7, splitter='best')
dt1.fit(x_train,y_train)

dt1.score(x_test, y_test)*100, dt1.score(x_train, y_train)*100

(98.97886124600971, 99.9999973649491)

#### Even though the accuracy is same, but in the case of low accurracy or under/over fitting it will help.

#### 1.2 HyperparameterTuning using RandomizedSearchCV

In [79]:
df = {'criterion': ['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
      'splitter': ['best', 'random'],
      'max_depth': [i for i in range(2,20)]
    }

In [80]:
rd = RandomizedSearchCV(DecisionTreeRegressor(), param_distributions=df, n_iter=20)
rd.fit(x_train, y_train)


In [81]:
rd.best_params_

{'splitter': 'best', 'max_depth': 14, 'criterion': 'friedman_mse'}

In [83]:
dt2 = DecisionTreeRegressor(criterion='friedman_mse', max_depth=14, splitter='best')
dt2.fit(x_train,y_train)

dt2.score(x_test, y_test)*100, dt2.score(x_train, y_train)*100

(98.97886124600971, 100.0)

**Note :** : Grid Search Check (4 * 2 * 18=144) combinations, while Random Search only 20 in this case

In [84]:
gd.best_score_, rd.best_score_

(0.9869200914567451, 0.9869200838109358)