# Random Forest Regression

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('insurance.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Encoding categorical data

In [3]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
le = LabelEncoder()
X[:, 1] = le.fit_transform(X[:, 1])
X[:, 4] = le.fit_transform(X[:, 4])
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [5])], remainder='passthrough')
X = np.array(ct.fit_transform(X))
print(X)

[[0.0 0.0 0.0 ... 27.9 0 1]
 [0.0 0.0 1.0 ... 33.77 1 0]
 [0.0 0.0 1.0 ... 33.0 3 0]
 ...
 [0.0 0.0 1.0 ... 36.85 0 0]
 [0.0 0.0 0.0 ... 25.8 0 0]
 [0.0 1.0 0.0 ... 29.07 0 1]]


## Splitting the dataset into the Training set and Test set

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Training the Random Forest Regression model on the whole dataset

In [5]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
regressor.fit(X_train, y_train)

## Predicting the Test set results

In [6]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[11812.3   9724.53]
 [ 8487.68  8547.69]
 [44973.46 45702.02]
 [13294.77 12950.07]
 [ 9306.01  9644.25]
 [ 9930.7   4500.34]
 [ 2031.53  2198.19]
 [10563.36 11436.74]
 [ 7210.22  7537.16]
 [ 8008.23  5425.02]
 [ 6598.43  6753.04]
 [15902.45 10493.95]
 [ 9720.64  7337.75]
 [ 6948.49  4185.1 ]
 [23600.31 18310.74]
 [13997.1  10702.64]
 [13509.14 12523.6 ]
 [ 8706.11  3490.55]
 [ 6593.29  6457.84]
 [34043.87 33475.82]
 [23612.29 23967.38]
 [12546.88 12643.38]
 [13588.75 23045.57]
 [27910.82 23065.42]
 [ 1558.35  1674.63]
 [11156.06  4667.61]
 [10384.21  3732.63]
 [ 7137.49  7682.67]
 [ 3779.35  3756.62]
 [10536.38  8413.46]
 [ 7442.53  8059.68]
 [48502.61 48970.25]
 [14026.01 12979.36]
 [10427.79 20630.28]
 [15464.59 14571.89]
 [ 3881.92  4137.52]
 [ 8362.    8347.16]
 [37406.27 51194.56]
 [40938.03 40003.33]
 [ 2814.59  1880.49]
 [ 9419.51  5458.05]
 [ 2941.16  2867.12]
 [20534.03 20149.32]
 [47337.51 47496.49]
 [37046.89 36149.48]
 [ 7664.52 26018.95]
 [13997.1  19749.38]
 [ 6738.33  6

## Evaluating the Model Performance

In [7]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.8710634276101877