# Decision Tree Regression

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('insurance.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Encoding categorical data

In [3]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
le = LabelEncoder()
X[:, 1] = le.fit_transform(X[:, 1])
X[:, 4] = le.fit_transform(X[:, 4])
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [5])], remainder='passthrough')
X = np.array(ct.fit_transform(X))
print(X)

[[0.0 0.0 0.0 ... 27.9 0 1]
 [0.0 0.0 1.0 ... 33.77 1 0]
 [0.0 0.0 1.0 ... 33.0 3 0]
 ...
 [0.0 0.0 1.0 ... 36.85 0 0]
 [0.0 0.0 0.0 ... 25.8 0 0]
 [0.0 1.0 0.0 ... 29.07 0 1]]


## Splitting the dataset into the Training set and Test set

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Training the Decision Tree Regression model on the Training set

In [5]:
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor(random_state = 0)
regressor.fit(X_train, y_train)

## Predicting the Test set results

In [6]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[10797.34  9724.53]
 [ 8871.15  8547.69]
 [42983.46 45702.02]
 [13429.04 12950.07]
 [ 9264.8   9644.25]
 [21984.47  4500.34]
 [ 2196.47  2198.19]
 [10848.13 11436.74]
 [ 7151.09  7537.16]
 [ 4433.92  5425.02]
 [ 7228.22  6753.04]
 [ 8932.08 10493.95]
 [ 8823.28  7337.75]
 [ 4415.16  4185.1 ]
 [35147.53 18310.74]
 [10560.49 10702.64]
 [12323.94 12523.6 ]
 [24671.66  3490.55]
 [ 6455.86  6457.84]
 [33750.29 33475.82]
 [24667.42 23967.38]
 [12269.69 12643.38]
 [10355.64 23045.57]
 [27533.91 23065.42]
 [ 1391.53  1674.63]
 [18903.49  4667.61]
 [ 2680.95  3732.63]
 [ 7151.09  7682.67]
 [ 3645.09  3756.62]
 [ 8116.27  8413.46]
 [ 7151.09  8059.68]
 [47896.79 48970.25]
 [13393.76 12979.36]
 [10085.85 20630.28]
 [14283.46 14571.89]
 [ 3866.86  4137.52]
 [ 8978.19  8347.16]
 [38511.63 51194.56]
 [39983.43 40003.33]
 [ 2207.7   1880.49]
 [21984.47  5458.05]
 [ 3866.86  2867.12]
 [21659.93 20149.32]
 [49577.66 47496.49]
 [36219.41 36149.48]
 [ 3579.83 26018.95]
 [10560.49 19749.38]
 [ 6389.38  6

## Evaluating the Model Performance

In [7]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.7246164969024855