In [None]:
import pandas as pd  
import numpy as np  
import matplotlib.pyplot as plt  
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import Lasso
from sklearn import metrics

def outlier_removal(column):
    # Capping the outlier rows with Percentiles
    upper_limit = column.quantile(.95)
    # set upper limit to 95percentile
    lower_limit = column.quantile(.05)
    # set lower limit to 5 percentile
    column.loc[(column > upper_limit)] = upper_limit
    column.loc[(column < lower_limit)] = lower_limit
    return column

dataset = pd.read_csv("https://raw.githubusercontent.com/phattarin-kitbumrung/machinelearning-python/dataset/main/goldprice.csv")

# train & test set
dataset[['SPX', 'GLD', 'USO', 'EUR/USD']] = dataset[['SPX', 'GLD', 'USO', 'EUR/USD']].apply(outlier_removal)
# select the features and target variable
x = dataset.drop(['Date', 'EUR/USD'], axis=1)
y = dataset['EUR/USD']
# dividing dataset in to train test 80% - 20%
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=0)

#training
model = Lasso(alpha = 0.001)
model.fit(x_train, y_train)

#test
y_pred = model.predict(x_test)

# get ridge coefficient and print them
lasso_coeff = pd.DataFrame()
lasso_coeff["Columns"]= x_train.columns
lasso_coeff['Coefficient Estimate'] = pd.Series(model.coef_)
print(lasso_coeff)

print("MAE = ",metrics.mean_absolute_error(y_test,y_pred))
print("MSE = ",metrics.mean_squared_error(y_test,y_pred))
print("RMSE = ",np.sqrt(metrics.mean_squared_error(y_test,y_pred)))
print("Score = ",metrics.r2_score(y_test,y_pred))


  Columns  Coefficient Estimate
0     SPX             -0.000055
1     GLD             -0.000850
2     USO              0.004568
3     SLV              0.004911
MAE =  0.05106631075915923
MSE =  0.0037023191533025208
RMSE =  0.06084668563942099
Score =  0.7493667606446223
