# Multiple Linear Regression

## Import Libraries

In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

## Import Dataset

In [3]:
dataset = pd.read_csv("datasets/startup.csv")
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Encode Categorical Data

In [4]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

ct = ColumnTransformer(transformers=[("encoder", OneHotEncoder(), [3])], remainder="passthrough")
X = np.array(ct.fit_transform(X))

## Split Dataset

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/4, random_state=42)

## Feature Scaling
No need to use feature scaling because coefficients in front of "x" variables will compensate

## Train Model

In [6]:
from sklearn.linear_model import LinearRegression

regressor = LinearRegression()
regressor.fit(X_train, y_train)

LinearRegression()

## Make Predictions

In [9]:
y_predict = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_predict.reshape(len(y_predict), 1), y_test.reshape(len(y_test), 1)), 1))

[[126464.76 134307.35]
 [ 85538.23  81005.76]
 [ 99367.08  99937.59]
 [ 45864.37  64926.08]
 [128321.88 125370.37]
 [ 51947.23  35673.41]
 [108901.08 105733.54]
 [100508.67 107404.34]
 [ 97668.72  97427.84]
 [112423.71 122776.86]
 [128907.88 141585.52]
 [174736.83 166187.94]
 [ 93516.9   89949.14]]


### Single Prediction
R&D Spend = 160000, Administration Spend = 130000, Marketing Spend = 300000 and State = 'California'

In [10]:
print(regressor.predict([[1, 0, 0, 160000, 130000, 300000]]))

[183515.56]
