## 1. Import libraries, test data

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

dataset = pd.read_csv('50_Startups.csv')
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## 2. Encode the data and split into training/test data sets

In [2]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [3])], remainder='passthrough')
x = np.array(ct.fit_transform(x))

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.35, random_state = 0)

## 3. Train the Multi Linear Regression model

In [3]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(x_train[:,3:], y_train)

LinearRegression()

## 4. Start predictions

In [4]:
y_pred = model.predict(x_test[:, 3:])

## 5. Display details

In [5]:
from sklearn.metrics import mean_squared_error
from itertools import zip_longest

print('Coefficient b0: ', model.intercept_, '\n')

for i in range(0, len(model.coef_)):
    print(f'Coefficient b{i+1}: ', model.coef_[i], '\n')

df = pd.DataFrame(list(zip_longest(y_test, y_pred, fillvalue=0)), columns=['y-value', 'y-predicted-value'])
df["residual_epsilon"] = (df['y-value'] - df['y-predicted-value']).abs()

print('Q(b0, b1, b2, b3): ', mean_squared_error(y_test, y_pred) * len(x_test), '\n')

print(pd.DataFrame(ct.get_feature_names()[:3]).transpose(), "\n")
print(pd.DataFrame(x_test), "\n")
print(df)

Coefficient b0:  38423.195890010946 

Coefficient b1:  0.8148000830222396 

Coefficient b2:  0.04623723651346395 

Coefficient b3:  0.03417789018903377 

Q(b0, b1, b2, b3):  1822304430.699811 

                        0                    1                     2
0  encoder__x0_California  encoder__x0_Florida  encoder__x0_New York 

      0    1    2          3          4          5
0   0.0  1.0  0.0   66051.52  182645.56   118148.2
1   1.0  0.0  0.0  100671.96   91790.61  249744.55
2   0.0  1.0  0.0  101913.08  110594.11  229160.95
3   0.0  1.0  0.0   27892.92   84710.77  164470.71
4   0.0  1.0  0.0  153441.51  101145.55  407934.54
5   0.0  0.0  1.0    72107.6  127864.55  353183.81
6   0.0  0.0  1.0   20229.59   65947.93   185265.1
7   0.0  0.0  1.0   61136.38  152701.92   88218.23
8   0.0  1.0  0.0   73994.56  122782.75  303319.26
9   0.0  1.0  0.0  142107.34   91391.77  366168.42
10  0.0  1.0  0.0   55493.95  103057.49  214634.81
11  0.0  0.0  1.0   46014.02   85047.44  205517.64
12 