# Multiple Linear Regression

## Importing the libraries

In [0]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [0]:
dataset = pd.read_csv('50_Startups.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Encoding categorical data

In [0]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
## OneHotEncode the countries into vectors to prevent numerical orders
ct = ColumnTransformer(transformers=[("encoder", OneHotEncoder(), [3])], remainder="passthrough")
## Fit and transform the result
X = np.array(ct.fit_transform(X))

## Feature scaling is not needed as multiple linear regression 
## Because of the coefficients that is multiplied to each independent variable of each feature 
## Therefore it doesn't matter that some features have higher values than others
## Because the coefficients will compensate to put everything on the same scale 

## No need for checking assumptions of linear regression as well.

## Splitting the dataset into the Training set and Test set

In [0]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

## Training the Multiple Linear Regression model on the Training set

In [11]:
## Dummy variable trap will be automatically avoid by this class
## Backward elimination is taken care by sklearn as well

from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

## Predicting the Test set results

In [12]:
y_pred = regressor.predict(X_test)
## display in vectors

## display any numerical values with only two decimals
np.set_printoptions(precision=2)

## Concatenate vertically
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

## Prediction VS Real Profit

[[103015.2  103282.38]
 [132582.28 144259.4 ]
 [132447.74 146121.95]
 [ 71976.1   77798.83]
 [178537.48 191050.39]
 [116161.24 105008.31]
 [ 67851.69  81229.06]
 [ 98791.73  97483.56]
 [113969.44 110352.25]
 [167921.07 166187.94]]
