In [None]:
import requests, numpy as np, pandas as pd, datetime as dt
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from statsmodels.tsa.seasonal import seasonal_decompose
from pmdarima import auto_arima
import concurrent.futures

# Set globals
API_KEY = 'fbf2a3cac76ec733ee2b8c01ab036950'
URL_BASE = 'https://api.stlouisfed.org/fred/series/observations'
START = pd.Timestamp('1983-01-01').date()
END = pd.Timestamp('2022-12-31').date()
BUSDAYS_IN_RANGE = np.busday_count(START, END)
META_INDEX = ['observation_start', 'observation_end', 'busdays_in_range', 'actual_start', 'actual_end', 'actual_days', 'nan_count']
SERIES_LIST = ['DEXUSUK', 'DEXCAUS', 'DEXCHUS', 'DEXJPUS', 'DEXINUS', 'DEXSFUS']
CCY_LIST = ['GBP', 'CAD', 'CNY', 'JPY', 'INR', 'ZAR']

# Create function to get raw json return from FRED database
def get_series_json(series_id, start, end, api_key=API_KEY, file_type='json', url_base=URL_BASE):
    url = f'{url_base}?series_id={series_id}&observation_start={start}&observation_end={end}'
    url += f'&api_key={api_key}&file_type={file_type}'
    try:
        resp = requests.get(url)
        resp.raise_for_status()  # Raise exception if invalid response
        return resp
    except Exception as e:
        errmsg = resp.json()['error_message'].replace('series', f'series {series_id}')
        print(f'Error: {resp.status_code}\n{errmsg}')
        return None

# Create function to transform valid json response from FRED into a dataframe
def transform_series_json(resp, series_id):
    resp = resp.json()
    obs = pd.DataFrame(resp.pop('observations'))[['date', 'value']]
    obs['date'] = pd.to_datetime(obs['date'])
    obs.set_index('date', inplace=True)
    meta = pd.DataFrame({
        series_id: {'observation_start': resp['observation_start'],
            'observation_end': resp['observation_end'],
            'busdays_in_range': BUSDAYS_IN_RANGE,
            'actual_days': resp['count'],
            'actual_start': obs.index.min().date(),
            'actual_end': obs.index.max().date(),
            'nan_count': obs[obs.value == '.'].count().value}})
    meta = meta.reindex(META_INDEX)
    obs.loc[obs.value == '.'] = np.nan
    obs.columns = [series_id]
    obs[series_id] = obs[series_id].astype(float, errors='raise')
    return obs, meta

# Create function to fill missing values in FRED series datafame
def fill_series_na(df):
    df.fillna(method='ffill', inplace=True)  # Fill missing values with last observation
    df.fillna(method='bfill', inplace=True)  # Then, fill with next observation
    return df

# Create a function to get a time series from FRED and return a clean dataframe
def get_series(series_id, start, end, api_key=API_KEY, file_type='json', fill_na=None):
    fill_na = True if fill_na is None else fill_na  # Default
    try:
        resp = get_series_json(series_id=series_id, start=start, end=end, api_key=api_key, file_type=file_type)
        df, meta = transform_series_json(resp, series_id=series_id)
        df = fill_series_na(df) if fill_na else df
    except Exception as e:
        print(f'Error retrieving {series_id}.\n{e}')
        return None
    return df, meta

# Convenience function to get multiple series at once
def get_multiple_series(series_list, start, end, fill_na=None):
    fill_na = True if fill_na is None else fill_na  # Default
    df_list = []
    meta_list = []
    for series in series_list:
        df, meta = get_series(series_id=series, start=start, end=end)
        df_list.append(df)
        meta_list.append(meta)
    dfs = pd.concat(df_list, axis=1)
    metas = pd.concat(meta_list, axis=1)
    
    print(f'\nDownloaded {len(df_list)} / {len(series_list)} series') 
    print(f'\nMeta Info on downloaded series: \n{metas.to_markdown()}')
    print(f'\nCombined series dataframe: \n{dfs.set_index(dfs.index.date).head().to_markdown()}')
    return dfs, metas



dfs, metas = get_multiple_series(series_list=SERIES_LIST, start=START, end=END, fill_na=True)


dfs.describe()
dfs.head()
rates = dfs.copy()
rates.columns = CCY_LIST
rates.index.freq = 'B'
aligned = dfs.copy()
aligned.iloc[:, 1:] = aligned.iloc[:, 1:].rdiv(1)
aligned.head()
layout = {'title': '<b>Currency 40-Year Daily Rates</b><br><sup><i>(1983-2022)</i></sup>',
          'width': 1800,
          'height': 800,
          'template': 'seaborn',
          'hovermode': 'x unified'}

def plot_all_rates(df, layout, x_title, y_title, ht, yshared=False):
    fig = make_subplots(rows=2, cols=3, shared_xaxes=True, vertical_spacing=0.05, horizontal_spacing=0.02,
                        subplot_titles=([ccy for ccy in df]), shared_yaxes=yshared, x_title=x_title, y_title=y_title)
    for i, ccy in enumerate(df):
        trace = go.Scatter(x=df.index, y=df[ccy], mode='lines', name=ccy, hovertemplate=ht)
        if i // 3 < 1:
            fig.add_trace(trace, row=1, col=i+1)
        else:
            fig.add_trace(trace, row=2, col=i-2)
    fig.update_layout(layout)
    return fig

x_title = 'Date 1983-2022'
y_title = 'Daily Rate Against US Dollar<br><sup><i>Except in the case of GBP which is reverse</i></sup>'
rate_fig = plot_all_rates(df=rates, layout=layout, x_title=x_title, y_title=y_title, ht='%{y:,.1%}')
rate_fig.show()


def plot_all_hist(df, layout, x_title, y_title, ht, yshared=False):
    fig = make_subplots(rows=2, cols=3, shared_xaxes=False, vertical_spacing=0.05, horizontal_spacing=0.02,
                        subplot_titles=([ccy for ccy in df]), shared_yaxes=yshared, x_title=x_title, y_title=y_title)
    for i, ccy in enumerate(df):
        trace = go.Histogram(x=rates[ccy], name=ccy, nbinsx=25)
        if i // 3 < 1:
            fig.add_trace(trace, row=1, col=i+1)
        else:
            fig.add_trace(trace, row=2, col=i-2)
    fig.update_layout(layout)
    return fig

x_title = 'Date 1983-2022'
y_title = 'Histogram of Daily Rates Against US Dollar'
hist_fig = plot_all_hist(df=rates, layout=layout, x_title=x_title, y_title=y_title, ht='%{y}')
hist_fig.show()


aligned_diff = aligned.pct_change().dropna().add(1).cumprod()
aligned_diff.tail()
layout['title'] = '<b>Currency 40-Year Cumulative Percentage Change</b><br><sup><i>(1983-2022)</i></sup>'
x_title = 'Date 1983-2022'
y_title = 'Cumulative Foreign Currency Rate Percentage Change VS US Dollar'
diff_fig = plot_all_rates(df=aligned_diff, layout=layout, x_title=x_title, y_title=y_title, ht='%{y:,.2f}', yshared=True)
diff_fig.show()


def plot_seasonal(series, resample=None):
    if resample is not None:
        series = series.resample(resample).mean()
    decomp = seasonal_decompose(series)
    decomp_fig = make_subplots(rows=4, cols=1, shared_xaxes=True)
    decomp_fig.add_trace(go.Scatter(x=decomp.observed.index, y=decomp.observed.values, name='Observed'), row=1, col=1)
    decomp_fig.add_trace(go.Scatter(x=decomp.trend.index, y=decomp.trend.values, name='Trend'), row=2, col=1)
    decomp_fig.add_trace(go.Scatter(x=decomp.seasonal.index, y=decomp.seasonal.values, name='Seasonal'), row=3, col=1)
    decomp_fig.add_trace(go.Scatter(x=decomp.resid.index, y=decomp.resid.values, name='Residuals'), row=4, col=1)
    decomp_fig.update_layout(width=1800, height=800, title=f'{series.name} Seasonal Decomposition Plot', template='seaborn')
    ynames = ['Observed', 'Trend', 'Seasonal', 'Residuals']
    for i, name in enumerate(ynames):
        decomp_fig.update_yaxes(title_text=name, row=i+1)
    return decomp_fig

def multi_plot_seasonal(df, resample=None):
    if resample is not None:
        df = df.resample(resample).mean()
    ncols = df.shape[1]
    decomp_fig = make_subplots(rows=4, cols=ncols, subplot_titles=df.columns, shared_xaxes=True, 
                               vertical_spacing=0.01, horizontal_spacing=0.03)
    for col in df:
        decomp = seasonal_decompose(df[col])
        figcol = df.columns.get_loc(col) + 1
        decomp_fig.add_trace(go.Scatter(x=decomp.observed.index, y=decomp.observed.values, name=f'{col}: Observed'), row=1, col=figcol)
        decomp_fig.add_trace(go.Scatter(x=decomp.trend.index, y=decomp.trend.values, name=f'{col}: Trend'), row=2, col=figcol)
        decomp_fig.add_trace(go.Scatter(x=decomp.seasonal.index, y=decomp.seasonal.values, name=f'{col}: Seasonal'), row=3, col=figcol)
        decomp_fig.add_trace(go.Scatter(x=decomp.resid.index, y=decomp.resid.values, name=f'{col}: Residuals'), row=4, col=figcol)
    decomp_fig.update_layout(width=2200, height=800, title='Seasonal Decomposition Plot', template='seaborn', showlegend=False)
    ynames = ['Observed', 'Trend', 'Seasonal', 'Residuals']
    for i, name in enumerate(ynames):
        decomp_fig.update_yaxes(title_text=name, row=i+1, col=1)
        decomp_fig.update_yaxes(tickformat='.1f')
    return decomp_fig

multiplot = multi_plot_seasonal(rates, resample='M')
multiplot.show()


# Build train/test split 
def train_test_split(df, days=90):
    end = df.index[-1]
    start = end - dt.timedelta(days=days)
    end = df.index[df.index.get_indexer([start], method='nearest')][0]
    start = df.index[df.index.get_indexer([start], method='nearest') + 1][0]
    train = df.loc[:end].copy()
    test = df.loc[start:].copy()
    return train, test

trains, tests = train_test_split(rates, days=90)

arimafits = {}
with concurrent.futures.ProcessPoolExecutor(max_workers=8) as executor:
    future_to_arima = {executor.submit(auto_arima, trains[ccy]): ccy for ccy in trains}
    for future in concurrent.futures.as_completed(future_to_arima):
        ccy = future_to_arima[future]
        try:
            arimafits[ccy] = future.result()
        except Exception as e:
            print(f'{ccy} generated an exception: {e}')
        else:
            print(f'{ccy} ARIMA Summary:\n{arimafits[ccy].summary().as_text()}\n\n{"-"*100}')
            
            
            
layout = {'title': 'Currency Rate 90-Day Forecast',
          'width': 2000,
          'height': 1200,
          'template': 'seaborn',
          'hovermode': 'x unified'}
hovertemp = '%{y:,.4f}'

def plot_all_forecasts(trains, arimafits, layout, x_title, y_title, historical, sma, sma_df):
    fcfig = make_subplots(rows=2, cols=3, shared_xaxes=True, vertical_spacing=0.05, horizontal_spacing=0.02, 
                          subplot_titles=([ccy for ccy in trains]), shared_yaxes=False, x_title=x_title, y_title=y_title)
    for i, ccy in enumerate(trains):
        train = trains[ccy]
        test = tests[ccy]
        year = str(trains.index[-1].year)
        fc = arimafits[ccy].arima_res_.get_prediction(start=train.index[-1], end=test.index[-1]).summary_frame()
        if i // 3 < 1:
            row = 1
            col = i+1
        else:
            row = 2
            col = i-2
        fcfig.add_trace(go.Scatter(name='Forecast', x=fc.index, y=fc['mean'], mode='lines', line=dict(color='#e66830'), showlegend=False,
                                   hovertemplate=hovertemp), row=row, col=col)
        fcfig.add_trace(go.Scatter(name='Upper CI', x=fc.index, y=fc['mean_ci_upper'], line=dict(width=0), mode='lines', showlegend=False,
                                   hovertemplate=hovertemp), row=row, col=col)
        fcfig.add_trace(go.Scatter(name='Lower CI', x=fc.index, y=fc['mean_ci_lower'], marker=dict(color="#444"), line=dict(width=0), mode='lines', 
                                   fillcolor='rgba(66, 107, 133, 0.3)', fill='tonexty', showlegend=False, hovertemplate=hovertemp), row=row, col=col)
        fcfig.add_trace(go.Scatter(name='Actual', x=test.index, y=test, mode='lines', line=dict(color='#00b2c9'), hovertemplate=hovertemp, 
                                   showlegend=False), row=row, col=col)
        if historical:
            fcfig.add_trace(go.Scatter(name='Historical', x=train.loc[year].index, y=train.loc[year], mode='lines', line=dict(color='#200040'),
                                       showlegend=False, hovertemplate=hovertemp), row=row, col=col)
        if sma:
            fcfig.add_trace(go.Scatter(name='SMA', x=sma_df.index, y=sma_df[ccy], mode='lines', line=dict(color='#d3de00'), hovertemplate=hovertemp, 
                                       showlegend=False), row=row, col=col)
        
    fcfig.update_layout(layout)
    return fcfig
    
fcfig = plot_all_forecasts(trains, arimafits, layout, x_title='Date range for 2022 year', 
                           y_title='Daily Rate Against US Dollar<br><sup><i>with forecast, actual, and upper/lower confidence bounds for last 90-days</i></sup>',
                           historical=True, sma=False, sma_df=None)
fcfig.show()


from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error


# Create function to evaluate forecasts using MSE, RMSE, and MAPE
def eval_forecasts(preds, tests, arima):
    eval_results = {}
    metric = ['MSE', 'RMSE', 'Mean Act', 'RMSE / Mean Act', 'MAPE']
    for ccy in preds:
        if arima:
            pred = preds[ccy].arima_res_.get_prediction(start=tests.index[0], end=tests.index[-1])._predicted_mean
        else:
            pred = preds[ccy]
        act = tests[ccy]
        mse = mean_squared_error(act, pred)
        rmse = mean_squared_error(act, pred, squared=False)  # squared=False actually returns RMSE as default is MSE (squared=True)
        mape = mean_absolute_percentage_error(act, pred)  # Mean Absolute Percentage Error
        results = [mse, rmse, act.mean(), rmse / act.mean(), mape]
        eval_results[ccy] = results
    return pd.DataFrame(eval_results, index=metric).T.sort_values('MAPE', ascending=False)
    
md_formats = (',.4f', ',.4f', ',.4f', ',.4f', ',.2%', '.2%')
arima_eval = eval_forecasts(preds=arimafits, tests=tests, arima=True)
print(f'ARIMA Evaluation:\n{arima_eval.to_markdown(floatfmt=md_formats)}')
ma_window = 90
sma = rates.rolling(ma_window).mean()
sma = sma.loc[start:]

layout['title'] = 'Currency Rate 90-Day Forecast with 90-Day SMA'

fcfig = plot_all_forecasts(trains, arimafits, layout, x_title='Date range for 2022 year', 
                           y_title='Daily Rate Against US Dollar<br><sup><i>with forecast, Simple Moving Average actual, and upper/lower confidence bounds for last 90-days</i></sup>',
                           historical=False, sma=True, sma_df=sma)
fcfig.show()
sma_eval = eval_forecasts(sma, tests, arima=False)
print(f'ARIMA Evaluation: \n{arima_eval.to_markdown(floatfmt=md_formats)}')
print(f'SMA Evaluation: \n{sma_eval.to_markdown(floatfmt=md_formats)}')

combined_eval = sma_eval[['MAPE']].merge(arima_eval[['MAPE']], left_index=True, right_index=True, suffixes=('_SMA', '_ARIMA'))
combined_eval['Better Model'] = combined_eval.apply(lambda x: 'SMA' if x['MAPE_SMA'] < x['MAPE_ARIMA'] else 'ARIMA', axis=1)
print(f'Combined Evaluation: \n{combined_eval.to_markdown(floatfmt=".2%")}')