# Multiple Linear Regression

## Importing the libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [None]:
dataset = pd.read_csv('insurance.csv')
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [None]:
print(x)

[[19 'female' 27.9 0 'yes' 'southwest']
 [18 'male' 33.77 1 'no' 'southeast']
 [28 'male' 33.0 3 'no' 'southeast']
 ...
 [18 'female' 36.85 0 'no' 'southeast']
 [21 'female' 25.8 0 'no' 'southwest']
 [61 'female' 29.07 0 'yes' 'northwest']]


In [None]:
print(y)

[16884.924   1725.5523  4449.462  ...  1629.8335  2007.945  29141.3603]


## Encoding categorical data

#### 10 -----> Female | 01 -----> Male
#### 010 ----> Yes    | 100 ----> NO
#### 001  ----> Southwest | 010 ---> Southeast
#### 100 ----> Northwest  | 000 ----> Northeast

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1,4,5])], remainder='passthrough')
x = np.array(ct.fit_transform(x))

In [None]:
print(x)

[[1.0 0.0 0.0 ... 19 27.9 0]
 [0.0 1.0 1.0 ... 18 33.77 1]
 [0.0 1.0 1.0 ... 28 33.0 3]
 ...
 [1.0 0.0 1.0 ... 18 36.85 0]
 [1.0 0.0 1.0 ... 21 25.8 0]
 [1.0 0.0 0.0 ... 61 29.07 0]]


## Splitting the dataset into the Training set and Test set

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 1)

## Training the Multiple Linear Regression model on the Training set

In [None]:
# we don't have to do anything for dummy variable trap
# we dont' have to select between backward elimination or forward or bidirectional

from sklearn.linear_model import LinearRegression
mlr = LinearRegression()    # model built
mlr.fit(x_train, y_train)   # training the model

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

## Predicting the Test set results

In [None]:
# we can't plot this model as it is a multidimensional(5D) and we can't visualize that
# instead we will display 2 vectors
# 1st vector  ---> real profits of test set
# 2nd vector  ---> predicted profits of test set

y_prediction = mlr.predict(x_test)
np.set_printoptions(precision=2)  # to avoid longer decimals
print(np.concatenate((y_prediction.reshape(len(y_prediction), 1), y_test.reshape(len(y_test), 1)), 1))

[[ 4383.68  1646.43]
 [12885.04 11353.23]
 [12589.22  8798.59]
 [13286.23 10381.48]
 [  544.73  2103.08]
 [32117.58 38746.36]
 [12919.04  9304.7 ]
 [12318.62 11658.12]
 [ 3784.29  3070.81]
 [29468.46 19539.24]
 [11002.81 12629.9 ]
 [17539.69 11538.42]
 [ 8681.35  6338.08]
 [ 8349.04  7050.64]
 [ 3130.13  1137.47]
 [10445.84  8968.33]
 [ 3863.74 21984.47]
 [ 6944.63  6414.18]
 [15009.63 28287.9 ]
 [14441.6  13462.52]
 [12543.66  9722.77]
 [32958.73 40932.43]
 [ 9072.64  8026.67]
 [ 8986.86  8444.47]
 [ 3022.86  2203.47]
 [ 8164.97  6664.69]
 [ 9556.08  8606.22]
 [10743.2   8283.68]
 [ 7694.02  5375.04]
 [ 4373.44  3645.09]
 [14140.94 11674.13]
 [ 5811.79 11737.85]
 [34631.91 24873.38]
 [27009.11 33750.29]
 [33348.14 24180.93]
 [ 9532.97  9863.47]
 [30421.65 36837.47]
 [26648.91 17942.11]
 [15157.78 11856.41]
 [33895.76 39725.52]
 [ 6303.39  4349.46]
 [14059.15 11743.93]
 [10713.45 19749.38]
 [15089.36 12347.17]
 [ 4187.95  4931.65]
 [13106.43 30260.  ]
 [ 4336.2  27724.29]
 [28607.06 34

####Predicting Single Values


In [11]:
print(x[:10, :12])

[[1.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 19 27.9 0]
 [0.0 1.0 1.0 0.0 0.0 0.0 1.0 0.0 18 33.77 1]
 [0.0 1.0 1.0 0.0 0.0 0.0 1.0 0.0 28 33.0 3]
 [0.0 1.0 1.0 0.0 0.0 1.0 0.0 0.0 33 22.705 0]
 [0.0 1.0 1.0 0.0 0.0 1.0 0.0 0.0 32 28.88 0]
 [1.0 0.0 1.0 0.0 0.0 0.0 1.0 0.0 31 25.74 0]
 [1.0 0.0 1.0 0.0 0.0 0.0 1.0 0.0 46 33.44 1]
 [1.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 37 27.74 3]
 [0.0 1.0 1.0 0.0 1.0 0.0 0.0 0.0 37 29.83 2]
 [1.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 60 25.84 0]]


## Predicting value for Male of age 33 having BMI value 28.7 with 0 children with no smoking habits living in northwest

In [15]:
print(mlr.predict([[0,1,1,0,0,1,0,0,33,28.7,0]]))

[4678.26]
