In [2]:
import pandas as pd  
import numpy as np  
import matplotlib.pyplot as plt  
from sklearn.preprocessing import StandardScaler, PolynomialFeatures 
from sklearn.model_selection import train_test_split, GridSearchCV 
from sklearn.pipeline import make_pipeline 
from sklearn.linear_model import Lasso
from sklearn import metrics

def outlier_removal(column):
    # Capping the outlier rows with Percentiles
    upper_limit = column.quantile(.95)
    # set upper limit to 95percentile
    lower_limit = column.quantile(.05)
    # set lower limit to 5 percentile
    column.loc[(column > upper_limit)] = upper_limit
    column.loc[(column < lower_limit)] = lower_limit
    return column

dataset=pd.read_csv("https://raw.githubusercontent.com/phattarin-kitbumrung/machinelearning-python/dataset/main/goldprice.csv")

# train & test set
dataset[['SPX', 'GLD', 'USO', 'EUR/USD']] = dataset[['SPX', 'GLD', 'USO', 'EUR/USD']].apply(outlier_removal)
# select the features and target variable
x = dataset.drop(['Date', 'EUR/USD'], axis=1)
y = dataset['EUR/USD']
# dividing dataset in to train test 80% - 20%
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=0)

# Create an instance of the StandardScaler
scaler = StandardScaler()
# Fit the StandardScaler on the training dataset
scaler.fit(x_train)
# Transform the training dataset
# using the StandardScaler
x_train_scaled = scaler.transform(x_train)
x_test_scaled = scaler.transform(x_test)

# Create a PolynomialFeatures object of degree 2
poly = PolynomialFeatures(degree=2)
  
# Create a Lasso object
lasso = Lasso()
  
# Define a dictionary of parameter
#values to search over
param_grid = {'lasso__alpha': [1e-4, 1e-3, 1e-2,
                               1e-1, 1, 5, 10, 
                               20, 30, 40]}
  
# Create a pipeline that first applies 
# polynomial features and then applies Lasso regression
pipeline = make_pipeline(poly, lasso)
  
# Create a GridSearchCV object with 
#the pipeline and parameter grid
lasso_grid_search = GridSearchCV(pipeline,
                                 param_grid, 
                                 scoring='r2', cv=3)
  
# Fit the GridSearchCV object to the training data
lasso_grid_search.fit(x_train_scaled, y_train)

# Predict the target variable using
# the fitted model and the test data
y_pred = lasso_grid_search.predict(x_test_scaled)

# Print the best parameter values and score
print('Best parameter values: ',
      lasso_grid_search.best_params_)
print('Best score: ',
      lasso_grid_search.best_score_)

print("MAE = ",metrics.mean_absolute_error(y_test,y_pred))
print("MSE = ",metrics.mean_squared_error(y_test,y_pred))
print("RMSE = ",np.sqrt(metrics.mean_squared_error(y_test,y_pred)))
print("Score = ",metrics.r2_score(y_test,y_pred))


Best parameter values:  {'lasso__alpha': 0.0001}
Best score:  0.8924378466432591
MAE =  0.031572240432425146
MSE =  0.001721875661406321
RMSE =  0.04149548965136236
Score =  0.8834354206334446
