In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from tqdm import trange
import statsmodels.api as sm
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
pd.options.plotting.backend = "plotly"

In [None]:
def smape_kun(y_true, y_pred):
    return np.mean((np.abs(y_pred - y_true) * 200/ (np.abs(y_pred) + np.abs(y_true))))

##EDA

In [None]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00247/data_akbilgic.xlsx'
df = pd.read_excel(url)
pd.options.mode.chained_assignment = None

headers = df.iloc[0]
headers[1] = 'ISE(TL)'
headers[2] = 'ISE(USD)'
print(headers)

Unnamed: 0        date
TL BASED       ISE(TL)
USD BASED     ISE(USD)
imkb_x              SP
Unnamed: 4         DAX
Unnamed: 5        FTSE
Unnamed: 6      NIKKEI
Unnamed: 7     BOVESPA
Unnamed: 8          EU
Unnamed: 9          EM
Name: 0, dtype: object



Unknown extension is not supported and will be removed



In [None]:
new_df  = pd.DataFrame(df.values[1:], columns=headers)
new_df['Price'] = new_df['ISE(USD)'].cumsum()

new_df.head()


Unnamed: 0,date,ISE(TL),ISE(USD),SP,DAX,FTSE,NIKKEI,BOVESPA,EU,EM,Price
0,2009-01-05,0.035754,0.038376,-0.004679,0.002193,0.003894,0.0,0.03119,0.012698,0.028524,0.038376
1,2009-01-06,0.025426,0.031813,0.007787,0.008455,0.012866,0.004162,0.01892,0.011341,0.008773,0.070189
2,2009-01-07,-0.028862,-0.026353,-0.030469,-0.017833,-0.028735,0.017293,-0.035899,-0.017073,-0.020015,0.043836
3,2009-01-08,-0.062208,-0.084716,0.003391,-0.011726,-0.000466,-0.040061,0.028283,-0.005561,-0.019424,-0.04088
4,2009-01-09,0.00986,0.009658,-0.021533,-0.019873,-0.01271,-0.004474,-0.009764,-0.010989,-0.007802,-0.031222


In [None]:
# Price Diff
fig = new_df.plot(x='date', y=['ISE(USD)', 'SP', 'DAX', 'FTSE', 'NIKKEI', 'BOVESPA', 'EU', 'EM'], title='Price Changes')
fig.show()

In [None]:
# Price
fig = new_df.plot(x='date', y=['Price'], title='Price')
fig.show()

In [None]:
# Correlation
corr_df = new_df[['ISE(USD)', 'SP', 'DAX', 'FTSE', 'NIKKEI', 'BOVESPA', 'EU', 'EM']]
corr = corr_df.corr(method='pearson', numeric_only=True)
print(corr)
# print("corr_df")
# print(corr_df)

Empty DataFrame
Columns: []
Index: []


In [None]:
# fig = px.imshow(corr, text_auto=True, title='Correlation Map')
# fig.show()

In [None]:
# Price
split_rate = 0.8
train_df, test_df = new_df[0:int(len(new_df) * split_rate)], new_df[int(len(new_df) * split_rate):]

train_ar = train_df['Price'].values
test_ar = test_df['Price'].values

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train_df['date'], y=train_df['Price'],
                    mode='lines',
                    name='train'))
fig.add_trace(go.Scatter(x=test_df['date'], y=test_df['Price'],
                    mode='lines',
                    name='test'))

fig.update_layout(title='Dataset',
                   xaxis_title='Date',
                   yaxis_title='ISE(USD)')

fig.show()

## Dummy Variables

In [None]:
ise_usd_dummy = pd.get_dummies(new_df['ISE(USD)'])

ise_usd_dummy.head()


In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences (matching Series behavior)



Unnamed: 0,-0.084716,-0.076967,-0.073526,-0.057319,-0.053827,-0.050687,-0.049776,-0.046440,-0.044349,-0.043907,...,0.045764,0.046046,0.050339,0.051331,0.051558,0.052522,0.061285,0.061708,0.073005,0.100621
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
ise_usd_dummy_concat = pd.concat([new_df, ise_usd_dummy], axis=1)

ise_usd_dummy_concat

Unnamed: 0,date,ISE(TL),ISE(USD),SP,DAX,FTSE,NIKKEI,BOVESPA,EU,EM,...,0.045764354,0.046046123,0.050339385,0.051330657,0.05155751,0.052522395,0.06128487,0.061708176,0.073005406,0.100620694
0,2009-01-05,0.035754,0.038376,-0.004679,0.002193,0.003894,0,0.03119,0.012698,0.028524,...,0,0,0,0,0,0,0,0,0,0
1,2009-01-06,0.025426,0.031813,0.007787,0.008455,0.012866,0.004162,0.01892,0.011341,0.008773,...,0,0,0,0,0,0,0,0,0,0
2,2009-01-07,-0.028862,-0.026353,-0.030469,-0.017833,-0.028735,0.017293,-0.035899,-0.017073,-0.020015,...,0,0,0,0,0,0,0,0,0,0
3,2009-01-08,-0.062208,-0.084716,0.003391,-0.011726,-0.000466,-0.040061,0.028283,-0.005561,-0.019424,...,0,0,0,0,0,0,0,0,0,0
4,2009-01-09,0.00986,0.009658,-0.021533,-0.019873,-0.01271,-0.004474,-0.009764,-0.010989,-0.007802,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
531,2011-02-16,0.008599,0.0134,0.006238,0.001925,0.007952,0.005717,0.018371,0.006975,0.003039,...,0,0,0,0,0,0,0,0,0,0
532,2011-02-17,0.00931,0.015977,0.003071,-0.001186,0.000345,0.00262,0.001686,-0.000581,0.001039,...,0,0,0,0,0,0,0,0,0,0
533,2011-02-18,0.000191,-0.001653,0.001923,0.002872,-0.000723,0.000568,0.005628,0.000572,0.006938,...,0,0,0,0,0,0,0,0,0,0
534,2011-02-21,-0.013069,-0.013706,-0.020742,-0.014239,-0.011275,0.001358,-0.011942,-0.012615,-0.000958,...,0,0,0,0,0,0,0,0,0,0


## ARIMA

In [None]:
history = [x for x in train_ar]
print(type(history))

predictions = list()

for t in trange(len(test_ar)):
    model = sm.tsa.arima.ARIMA(history, order=(5,1,0))
    model_fit = model.fit()
    output = model_fit.forecast()
    yhat = output[0]
    predictions.append(yhat)
    obs = test_ar[t]
    history.append(obs)
    # print('predicted=%f, expected=%f' % (yhat, obs))

<class 'list'>



Maximum Likelihood optimization failed to converge. Check mle_retvals

100%|██████████| 108/108 [00:57<00:00,  1.87it/s]


In [None]:
rsq_list, rmse_list, mape_list, mae_list = [],[],[],[]
# R2
Arima_r2 = r2_score(test_ar, predictions)
# print('R2: %.3f' % Arima_r2)

#RMSE
Arima_RMSE = mean_squared_error(test_ar, predictions)
# print('Testing Mean Squared Error: %.6f' % Arima_RMSE)

#MAE
Arima_MAPE = smape_kun(test_ar, predictions)
# print('Symmetric mean absolute percentage error: %.3f' % Arima_MAPE)

# MAPE 계산
Arima_MAE = np.mean(np.abs((test_ar - predictions) / test_ar)) * 100
# print("MAE: ", Arima_MAE)

# print(model_eval(test_ar, predictions))

rsq_list.append(Arima_r2)
rmse_list.append(Arima_RMSE)
mape_list.append(Arima_MAPE)
mae_list.append(Arima_MAE)

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train_df['date'], y=train_df['Price'],
                    mode='lines',
                    name='train'))
fig.add_trace(go.Scatter(x=test_df['date'], y=test_df['Price'],
                    mode='lines',
                    name='test'))
fig.add_trace(go.Scatter(x=test_df['date'], y=predictions,
                    mode='lines+markers',
                    name='predictions'))

fig.update_layout(title='ARIMA_Training Result',
                   xaxis_title='Date',
                   yaxis_title='ISE(USD)')

fig.show()

In [None]:
# model = ExponentialSmoothing(train_ar, seasonal='mul', seasonal_periods=120) # It needs every single data point must be positive -> +1 & -1
model = ExponentialSmoothing(train_ar, trend='add', seasonal='add', seasonal_periods=150, damped_trend=True)
hw_model = model.fit(optimized=True, remove_bias=False)
predictions = hw_model.forecast(len(test_ar))

In [None]:
holt_r2 = r2_score(test_ar, predictions)
# print('R2: %.3f' % Arima_r2)

holt_RMSE = mean_squared_error(test_ar, predictions)
print('Testing Mean Squared Error: %.6f' % holt_RMSE)

holt_MAPE = smape_kun(test_ar, predictions)
print('Symmetric mean absolute percentage error: %.3f' % holt_MAPE)

# MAPE 계산
holt_MAE = np.mean(np.abs((test_ar - predictions) / test_ar)) * 100
print("MAE: ", holt_MAE)

# print(model_eval(test_ar, predictions))

rsq_list.append(holt_r2)
rmse_list.append(holt_RMSE)
mape_list.append(holt_MAPE)
mae_list.append(holt_MAE)

Testing Mean Squared Error: 0.003450
Symmetric mean absolute percentage error: 5.309
MAE:  5.400223172653935


## Holt-winters

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train_df['date'], y=train_df['Price'],
                    mode='lines',
                    name='train'))
fig.add_trace(go.Scatter(x=test_df['date'], y=test_df['Price'],
                    mode='lines',
                    name='test'))
fig.add_trace(go.Scatter(x=test_df['date'], y=predictions,
                    mode='lines+markers',
                    name='predictions'))

fig.update_layout(title='holt-winters_Training Result',
                   xaxis_title='Date',
                   yaxis_title='ISE(USD)')

fig.show()

##Result

In [None]:
meterics_df = pd.DataFrame()# records: models , columns: rsq, rmse, mape,mae

data = {'R_sq': rsq_list,
        'RMSE': rmse_list,
        'MAPE':mape_list,
        'MAE':mae_list}

meterics_df = pd.DataFrame(data, index=['ARIMA','holt-winters'])

print(meterics_df)

                  R_sq      RMSE      MAPE       MAE
ARIMA         0.943831  0.000261  1.364354  1.367230
holt-winters  0.258683  0.003450  5.309373  5.400223
