In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
from sklearn.metrics import (
    mean_absolute_error, mean_squared_error, 
    median_absolute_error, explained_variance_score, r2_score
)
from sklearn.utils import shuffle
from sklearn import linear_model
from sklearn.preprocessing import PolynomialFeatures

In [2]:
# Input file containing data
input_file = '../aiwp-data/data_multivar_regr.txt'
# Load the data from the input file
data = np.loadtxt(input_file, delimiter=',')

In [3]:
X, y = data[:, :-1], data[:, -1]

In [4]:
# X, y = shuffle(data[:, :-1], data[:, -1], random_state=7)

In [10]:
# Split data into training and testing 
num_training = int(0.8 * len(X))
num_test = len(X) - num_training

# Training data
X_train, y_train = X[:num_training], y[:num_training]

# Test data
X_test, y_test = X[num_training:], y[num_training:]

# Create the linear regressor model
linear_regressor = linear_model.LinearRegression()

# Train the model using the training sets
linear_regressor.fit(X_train, y_train)

# Predict the output
y_test_pred = linear_regressor.predict(X_test)

In [13]:
X_test.shape, y_test.shape

((140, 3), (140,))

In [14]:
# # Plot outputs
# plt.scatter(X_test, y_test, color='red')
# # plt.plot(X_test, y_test_pred, color='orange', linewidth=4)
# plt.xticks(())
# plt.yticks(())
# plt.show()

In [None]:
# Measure performance
print("Linear Regressor performance:")
print("Mean absolute error =", round(mean_absolute_error(y_test, y_test_pred), 2))
print("Mean squared error =", round(mean_squared_error(y_test, y_test_pred), 2))
print("Median absolute error =", round(median_absolute_error(y_test, y_test_pred), 2))
print("Explained variance score =", round(explained_variance_score(y_test, y_test_pred), 2))
print("R2 score =", round(r2_score(y_test, y_test_pred), 2))

Linear Regressor performance:
Mean absolute error = 3.58
Mean squared error = 20.31
Median absolute error = 2.99
Explained variance score = 0.86
R2 score = 0.86


In [None]:
# Polynomial regression
polynomial = PolynomialFeatures(degree=10)
X_train_transformed = polynomial.fit_transform(X_train)

In [None]:
datapoint = [[7.75, 6.35, 5.56]]
poly_datapoint = polynomial.fit_transform(datapoint)

In [None]:
poly_linear_model = linear_model.LinearRegression()
poly_linear_model.fit(X_train_transformed, y_train)
print("\nLinear regression:\n", linear_regressor.predict(datapoint))
print("\nPolynomial regression:\n", poly_linear_model.predict(poly_datapoint))


Linear regression:
 [36.05286276]

Polynomial regression:
 [41.46258549]
