In [14]:
import pandas as pd  
import numpy as np  
import pickle
import matplotlib.pyplot as plt  
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn import metrics

def outlier_removal(column):
    # Capping the outlier rows with Percentiles
    upper_limit = column.quantile(.95)
    # set upper limit to 95percentile
    lower_limit = column.quantile(.05)
    # set lower limit to 5 percentile
    column.loc[(column > upper_limit)] = upper_limit
    column.loc[(column < lower_limit)] = lower_limit
    return column

dataset=pd.read_csv("https://raw.githubusercontent.com/phattarin-kitbumrung/machinelearning-python/dataset/main/goldprice.csv")

# train & test set
dataset[['SPX', 'GLD', 'USO', 'EUR/USD']] = dataset[['SPX', 'GLD', 'USO', 'EUR/USD']].apply(outlier_removal)
# select the features and target variable
x = dataset.drop(['Date', 'EUR/USD'], axis=1)
y = dataset['EUR/USD']
# dividing dataset in to train test 80% - 20%
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=0)

#training
model=LinearRegression()
model.fit(x_train,y_train)

#test
y_pred=model.predict(x_test)

# compare true data & predict data
df=pd.DataFrame({'Actually':y_test,'Predicted':y_pred})
print(df)

print("MAE = ",metrics.mean_absolute_error(y_test,y_pred))
print("MSE = ",metrics.mean_squared_error(y_test,y_pred))
print("RMSE = ",np.sqrt(metrics.mean_squared_error(y_test,y_pred)))
print("Score = ",metrics.r2_score(y_test,y_pred))


      Actually  Predicted
1255  1.351698   1.305168
2100  1.147394   1.123986
711   1.410596   1.417558
1328  1.361804   1.282291
53    1.491295   1.532452
...        ...        ...
2085  1.119620   1.124834
1362  1.387867   1.287181
828   1.376500   1.334338
567   1.284406   1.321168
1693  1.129089   1.175643

[458 rows x 2 columns]
MAE =  0.05108715964785287
MSE =  0.003702872851620724
RMSE =  0.06085123541573108
Score =  0.7493292773274445
