In [1]:
import pandas as pd 
import numpy as np 
import seaborn as sns 
import matplotlib.pyplot as plt 
import plotly.graph_objs as go 

import warnings 

warnings.filterwarnings("ignore")
plt.style.use('ggplot')

df = pd.read_csv("datasets/usd_brl_historical.csv")

In [2]:
#Rename columns 
df.columns = ['date', 'price', 'open', 'high', 'low', 'change']

#Date transform  
def convert_month(date):
    months = {
        'Jan': '01',
        'Feb': '02',
        'Mar': '03',
        'Apr': '04',
        'May': '05',
        'Jun': '06',
        'Jul': '07',
        'Aug': '08',
        'Sep': '09',
        'Oct': '10',
        'Nov': '11',
        'Dec': '12',
    }
    date[0] = months[date[0]]
    date = '-'.join(date)
    
    return date.replace(',', '') 

df.date = df.date.apply(lambda date: convert_month(date.split(' ')))

#convert to datetime 
df.date = pd.to_datetime(df.date, format='%m-%d-%Y')

df.index = df.pop('date')
df = df.iloc[::-1]

df.head()

Unnamed: 0_level_0,price,open,high,low,change
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2006-03-13,2.1339,2.1375,2.1445,2.1275,-0.16%
2006-03-14,2.1208,2.1315,2.1495,2.1155,-0.61%
2006-03-15,2.1157,2.124,2.1291,2.1135,-0.24%
2006-03-16,2.114,2.1157,2.127,2.097,-0.08%
2006-03-17,2.124,2.108,2.1286,2.107,0.47%


In [36]:
ts = df[(df.index >= pd.to_datetime('2020-01-01'))]

ts['forecasting'] = ts.price.shift(3)
ts.drop(['open', 'high', 'low', 'change'], axis=1, inplace=True)
ts.dropna(inplace=True)

ts.head()

Unnamed: 0_level_0,price,forecasting
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-06,4.0618,4.0195
2020-01-07,4.0679,4.0263
2020-01-08,4.0649,4.0672
2020-01-09,4.0928,4.0618
2020-01-10,4.0962,4.0679


In [49]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=ts.index, y=ts.price.values, mode='lines', name='Price'))
fig.add_trace(go.Scatter(x=ts.index, y=ts.forecasting.values, mode='lines', name='Predicted'))

fig.update_layout(title='Naive Forecasting', xaxis_title='Date', yaxis_title='Price')

In [93]:
import math 

#Validation 
def mean_squared_error(y, y_pred):
    return ((y - y_pred) ** 2).sum() / len(y)

def root_mean_squared_error(y, y_pred):
    return math.sqrt(mean_squared_error(y, y_pred))

naive_mse = mean_squared_error(ts.price.values, ts.forecasting.values)
naive_rmse = root_mean_squared_error(ts.price.values, ts.forecasting.values)

print("MSE:", naive_mse)
print("RMSE:", naive_rmse)

MSE: 0.011126072084690554
RMSE: 0.10548019759504887


In [94]:
from sklearn.linear_model import LinearRegression 
from sklearn.model_selection import train_test_split

df = df[(df.index >= pd.to_datetime('2020-01-01'))]
df['price'] = df['price'].shift(1)
df.dropna(inplace=True)


split = int(len(df) * 0.7)

X_train, X_test = df[['open', 'high', 'low']].iloc[0:split], df[['open', 'high', 'low']].iloc[split:]
y_train, y_test = df['price'].iloc[0:split], df['price'].iloc[split:]


reg = LinearRegression().fit(X_train, y_train)
y_pred = reg.predict(X_test)

reg_mse = mean_squared_error(y_test, y_pred)
reg_rmse = root_mean_squared_error(y_test, y_pred)

print("MSE:", reg_mse)
print("RMSE:", reg_rmse)

MSE: 0.027252970666693386
RMSE: 0.1650847378369466


In [95]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=y_train.index, y=y_test.values, mode='lines', name='True'))
fig.add_trace(go.Scatter(x=y_train.index, y=y_pred, mode='lines', name='Predicted'))

fig.update_layout(title='Linear Regression Model', xaxis_title='Date', yaxis_title='Price')

In [150]:
df.index = pd.DatetimeIndex(df.index.values, freq=df.index.inferred_freq)
y = df.price.iloc[:-1]

In [151]:
from statsmodels.tsa.ar_model import AutoReg

out = 'AIC: {0:0.3f}, HQIC: {1:0.3f}, BIC: {2:0.3f}'

res = AutoReg(y, lags=1).fit()
print(out.format(res.aic, res.hqic, res.bic))

AIC: -1687.063, HQIC: -1681.915, BIC: -1673.828


In [152]:
y_pred_AR = res.predict(start='2022-05-12', end='2022-05-12')

print("True:", df.price.iloc[-1])
print("Predicted:", y_pred_AR.values[0])

True: 4.9989
Predicted: 4.884521703476483


In [155]:
ar_mse = (df.price.iloc[-1] - y_pred_AR.values[0]) ** 2
ar_rmse = math.sqrt((df.price.iloc[-1] - y_pred_AR.values[0]) ** 2)

print("MSE:", ar_mse)
print("RMSE:", ar_rmse)

MSE: 0.013082394715621618
RMSE: 0.11437829652351716
