# Multiple Linear Regression

## Importing the libraries

In [7]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('pc_build_dataset_regression.csv')
X = dataset.iloc[:, :2].values
y = dataset.iloc[:, 2:].values

print(X)

[[29854 'Gaming']
 [17953 'Gaming']
 [9879 'Gaming']
 [12753 'Gaming']]


## Splitting the dataset into the Training set and Test set

In [3]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

print(X)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.5, random_state = 0)


[[1.0 29854]
 [1.0 17953]
 [1.0 9879]
 [1.0 12753]]


## Training the Multiple Linear Regression model on the Training set

In [4]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

## Predicting the Test set results

In [5]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
# print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))
print(y_pred)
print(y_test)
print(regressor.predict([[1, 29854]]))

[[ 1.04e+03  7.22e+00  1.19e+01  3.84e+00  4.63e+00 -7.67e+01  1.90e+03
   1.43e+03  5.22e+02  1.03e+01  2.00e+00  6.00e+03  4.27e+01  4.92e+03
   2.51e+03  1.06e+01  4.81e+02  3.29e+02  4.88e+02  1.08e+03]
 [ 1.53e+03  9.63e+00  1.48e+01  3.72e+00  4.79e+00  6.09e+01  1.93e+03
   1.52e+03  7.28e+02  1.80e+01  2.00e+00  6.00e+03  4.17e+01  6.58e+03
   2.51e+03  1.25e+01  5.80e+02  5.77e+02  7.06e+02  1.05e+03]]
[[1.60e+03 1.00e+01 1.60e+01 2.50e+00 4.60e+00 1.25e+02 1.55e+03 1.10e+03
  5.50e+02 3.20e+01 2.00e+00 3.60e+03 1.80e+01 4.80e+03 2.49e+03 1.20e+01
  7.59e+02 1.02e+03 4.99e+02 4.49e+02]
 [1.57e+03 1.00e+01 1.60e+01 2.50e+00 4.60e+00 3.10e+02 2.00e+03 1.30e+03
  8.78e+02 3.20e+01 2.00e+00 5.60e+03 4.60e+01 6.48e+03 2.62e+03 1.20e+01
  7.59e+02 1.02e+03 7.49e+02 4.49e+02]]
[[4.40e+03 2.40e+01 3.20e+01 3.00e+00 5.80e+00 8.80e+02 2.15e+03 2.10e+03
  1.95e+03 6.40e+01 2.00e+00 6.00e+03 3.60e+01 1.65e+04 2.54e+03 2.40e+01
  1.17e+03 2.05e+03 2.00e+03 8.60e+02]]


## Evaluating the Model Performance

In [6]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

-29.16190157302463

In [12]:
from joblib import dump, load

In [13]:
dump(regressor, 'regressor.joblib')
model_in = load('regressor.joblib')
model_in.predict(np.array([[1, 13000]]))


array([[1.57e+03, 9.84e+00, 1.50e+01, 3.71e+00, 4.81e+00, 7.28e+01,
        1.94e+03, 1.53e+03, 7.45e+02, 1.87e+01, 2.00e+00, 6.00e+03,
        4.17e+01, 6.73e+03, 2.51e+03, 1.27e+01, 5.88e+02, 5.98e+02,
        7.24e+02, 1.04e+03]])