# Multiple Linear Regression

## Importing the libraries

In [37]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [38]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [39]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Training the Multiple Linear Regression model on the Training set

In [40]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

## Predicting the Test set results

In [41]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[431.43 431.23]
 [458.56 460.01]
 [462.75 461.14]
 ...
 [469.52 473.26]
 [442.42 438.  ]
 [461.88 463.28]]


## Evaluating the Model Performance

In [42]:
from sklearn.metrics import r2_score
r2_multiple_linear_regression = r2_score(y_test, y_pred)

# Polynomial Regression

## Importing the libraries

In [43]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [44]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [45]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Training the Polynomial Regression model on the Training set

In [46]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
poly_reg = PolynomialFeatures(degree = 4)
X_poly = poly_reg.fit_transform(X_train)
regressor = LinearRegression()
regressor.fit(X_poly, y_train)

## Predicting the Test set results

In [47]:
y_pred = regressor.predict(poly_reg.transform(X_test))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[433.94 431.23]
 [457.9  460.01]
 [460.52 461.14]
 ...
 [469.53 473.26]
 [438.27 438.  ]
 [461.66 463.28]]


## Evaluating the Model Performance

In [48]:
from sklearn.metrics import r2_score
r2_polynomial_regression = r2_score(y_test, y_pred)

# Support Vector Regression (SVR)

## Importing the libraries

In [49]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [50]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [51]:
y = y.reshape(len(y),1)

## Splitting the dataset into the Training set and Test set

In [52]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Feature Scaling

In [53]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
X_train = sc_X.fit_transform(X_train)
y_train = sc_y.fit_transform(y_train)

## Training the SVR model on the Training set

In [54]:
from sklearn.svm import SVR
regressor = SVR(kernel = 'rbf')
regressor.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


## Predicting the Test set results

In [55]:
y_pred = sc_y.inverse_transform(regressor.predict(sc_X.transform(X_test)).reshape(-1,1))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[434.05 431.23]
 [457.94 460.01]
 [461.03 461.14]
 ...
 [470.6  473.26]
 [439.42 438.  ]
 [460.92 463.28]]


## Evaluating the Model Performance

In [56]:
from sklearn.metrics import r2_score
r2_support_vector_regression = r2_score(y_test, y_pred)

# Decision Tree Regression

## Importing the libraries

In [57]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [58]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [59]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Training the Decision Tree Regression model on the Training set

In [60]:
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor(random_state = 0)
regressor.fit(X_train, y_train)

## Predicting the Test set results

In [61]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[431.28 431.23]
 [459.59 460.01]
 [460.06 461.14]
 ...
 [471.46 473.26]
 [437.76 438.  ]
 [462.74 463.28]]


## Evaluating the Model Performance

In [62]:
from sklearn.metrics import r2_score
r2_decision_tree_regression = r2_score(y_test, y_pred)

# Random Forest Regression

## Importing the libraries

In [63]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [64]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [65]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Training the Random Forest Regression model on the whole dataset

In [66]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
regressor.fit(X_train, y_train)

## Predicting the Test set results

In [67]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[434.05 431.23]
 [458.79 460.01]
 [463.02 461.14]
 ...
 [469.48 473.26]
 [439.57 438.  ]
 [460.38 463.28]]


## Evaluating the Model Performance

In [68]:
from sklearn.metrics import r2_score
r2_random_forest_regression = r2_score(y_test, y_pred)

# Which is the Best Regression Model ? (for this dataset)

In [71]:
print("--------------- R2 Score ---------------")
print(f"Multiple Linear Regression: {r2_multiple_linear_regression}")
print(f"Polynomial Regression: {r2_polynomial_regression}")
print(f"Support Vector Regression: {r2_support_vector_regression}")
print(f"Decision Tree Regression: {r2_decision_tree_regression}")
print(f"Random Forest Regression: {r2_random_forest_regression}")
max_r2_score = max(r2_multiple_linear_regression, r2_polynomial_regression, r2_support_vector_regression,
                   r2_decision_tree_regression, r2_random_forest_regression)
best_model = ''
if max_r2_score == r2_multiple_linear_regression:
  best_model = 'Multiple Linear Regression'
elif max_r2_score == r2_polynomial_regression:
  best_model = 'Polynomial Regression'
elif max_r2_score == r2_support_vector_regression:
  best_model = 'Support Vector Regression'
elif max_r2_score == r2_decision_tree_regression:
  best_model = 'Decision Tree Regression'
else:
  best_model = 'Random Forest Regression'
print("----------------------------------------")
print(f"Best Regression Model: {best_model}\nMax R2 Score: {max_r2_score}")

--------------- R2 Score ---------------
Multiple Linear Regression: 0.9325315554761303
Polynomial Regression: 0.9458192809530098
Support Vector Regression: 0.9480784049986258
Decision Tree Regression: 0.922905874177941
Random Forest Regression: 0.9615908334363876
----------------------------------------
Best Regression Model: Random Forest Regression
Max R2 Score: 0.9615908334363876
