# Multiple Linear Regression

## Importing the libraries

In [17]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [18]:
dataset = pd.read_csv('insurance.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Encoding categorical data

In [19]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
le = LabelEncoder()
X[:, 1] = le.fit_transform(X[:, 1])
X[:, 4] = le.fit_transform(X[:, 4])
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [5])], remainder='passthrough')
X = np.array(ct.fit_transform(X))
print(X)

[[0.0 0.0 0.0 ... 27.9 0 1]
 [0.0 0.0 1.0 ... 33.77 1 0]
 [0.0 0.0 1.0 ... 33.0 3 0]
 ...
 [0.0 0.0 1.0 ... 36.85 0 0]
 [0.0 0.0 0.0 ... 25.8 0 0]
 [0.0 1.0 0.0 ... 29.07 0 1]]


## Splitting the dataset into the Training set and Test set

In [20]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Training the Multiple Linear Regression model on the Training set

In [21]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

## Predicting the Test set results

In [22]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[1.12e+04 9.72e+03]
 [9.49e+03 8.55e+03]
 [3.82e+04 4.57e+04]
 [1.63e+04 1.30e+04]
 [6.91e+03 9.64e+03]
 [3.96e+03 4.50e+03]
 [1.58e+03 2.20e+03]
 [1.44e+04 1.14e+04]
 [9.01e+03 7.54e+03]
 [7.51e+03 5.43e+03]
 [4.49e+03 6.75e+03]
 [1.03e+04 1.05e+04]
 [8.80e+03 7.34e+03]
 [3.80e+03 4.19e+03]
 [2.79e+04 1.83e+04]
 [1.07e+04 1.07e+04]
 [1.13e+04 1.25e+04]
 [6.11e+03 3.49e+03]
 [8.24e+03 6.46e+03]
 [2.71e+04 3.35e+04]
 [3.36e+04 2.40e+04]
 [1.44e+04 1.26e+04]
 [1.17e+04 2.30e+04]
 [3.21e+04 2.31e+04]
 [4.17e+03 1.67e+03]
 [9.25e+03 4.67e+03]
 [1.08e+03 3.73e+03]
 [9.80e+03 7.68e+03]
 [3.77e+03 3.76e+03]
 [1.04e+04 8.41e+03]
 [9.01e+03 8.06e+03]
 [4.01e+04 4.90e+04]
 [1.57e+04 1.30e+04]
 [1.39e+04 2.06e+04]
 [2.48e+04 1.46e+04]
 [5.17e+03 4.14e+03]
 [1.26e+04 8.35e+03]
 [3.08e+04 5.12e+04]
 [3.35e+04 4.00e+04]
 [3.67e+03 1.88e+03]
 [3.98e+03 5.46e+03]
 [3.99e+03 2.87e+03]
 [3.05e+04 2.01e+04]
 [3.95e+04 4.75e+04]
 [2.78e+04 3.61e+04]
 [5.09e+03 2.60e+04]
 [1.06e+04 1.97e+04]
 [7.83e+03 6.

## Evaluating the Model Performance

In [23]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.7999876970680434