In [16]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression 
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from joblib import dump  

### Prepare Data

In [2]:
#Load the saved sets from data/processed using numpy
X_train = np.load('../data/processed/X_train.npy')
X_val   = np.load('../data/processed/X_val.npy'  )
X_test  = np.load('../data/processed/X_test.npy' )
y_train = np.load('../data/processed/y_train.npy')
y_val   = np.load('../data/processed/y_val.npy'  )
y_test  = np.load('../data/processed/y_test.npy' )

In [3]:
#Display the dimensions of X_train
X_train.shape

(25372, 59)

In [5]:
# Instantiate a PolynomialFeatures with degree 2
poly = PolynomialFeatures(2)

In [6]:
#Fit the PolynomialFeatures and perform transformation on X_train
X_train = poly.fit_transform(X_train)

In [7]:
#Display the dimensions of X_train
X_train.shape

(25372, 1830)

In [8]:
#Perform transformation on X_val and X_test with PolynomialFeatures
X_val = poly.transform(X_val)
X_test = poly.transform(X_test)

### Train Linear Regression model

In [11]:
reg = LinearRegression()

In [12]:
#Fit the model with the prepared data
reg.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [14]:
#Save the fitted model into the folder models as a file called linear_poly_2
dump(reg,  '../models/linear_poly_2.joblib')

['../models/linear_poly_2.joblib']

In [15]:
#Save the predictions from this model for the training and validation sets into 2 variables called y_train_preds and y_val_preds
y_train_preds = reg.predict(X_train)
y_val_preds = reg.predict(X_val)

In [17]:
#Display the RMSE and MAE scores of this model on the training set
print(mse(y_train, y_train_preds, squared=False))
print(mae(y_train, y_train_preds))

10653.42149650835
3797.0692730837022


In [18]:
#Display the RMSE and MAE scores of this model on the validation set
print(mse(y_val, y_val_preds, squared=False))
print(mae(y_val, y_val_preds))

10239.679613233702
4162.8676365216825
