## Acquiring the data

In [1]:
import numpy as np
import pandas as pd
import glob

#Adding relative path name to data variable. Dataset suffix is data when it's available

data = [i for i in glob.glob('../data/data*.csv')]
    
#Rows skipped to avoid additional cleaning, dates parsed and date used as the index

MM = pd.read_csv(data[0], skiprows=[1, 483], header= 0, 
                 na_values=0, parse_dates=['Payouts Payout Currency'], 
                 index_col='Payouts Payout Currency').rename_axis(None)

In [None]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [None]:
#This function sets the beginning of the time series based on first 3 consecutives weeks without FX MM
def series_begins(x):
    nan_index = x.index[x.isnull()]
    for i in range(0,len(nan_index)):
        upper = i + 2
        difference = nan_index[i] - nan_index[(upper)]
        if difference.days == 14:
            return nan_index[i]


In [None]:
#This function sets the end of the time series based on last 2 consecutives weeks with FX MM
def series_ends(x):
    notnan_index = x.index[x.notna()]
    for i in range(0,len(notnan_index)):
        notnan_upper = i + 1
        notnan_difference = notnan_index[i] - notnan_index[(notnan_upper)]
        if notnan_difference.days == 7:
            return notnan_index[i]

In [None]:
def series_ends_begins(x):
    ends_begins = []
    notnan_index = x.index[x.notna()]
    for i in range(0,len(notnan_index)):
        notnan_upper = i + 1
        notnan_difference = notnan_index[i] - notnan_index[(notnan_upper)]
        if notnan_difference.days == 7:
            ends_begins.append(notnan_index[i])
            nan_index = x.loc[ends_begins[0]:].index[x.loc[ends_begins[0]:].isnull()]
            for i in range(0,len(nan_index)):
                upper = i + 2
                difference = nan_index[i] - nan_index[(upper)]
                if difference.days == 14:
                    ends_begins.append(nan_index[i])
                    return ends_begins
                
                
type(series_ends_begins(MM['GBP']))

In [None]:
"""
This function lists the end, based on last 2 consecutives weeks with FX MM
and the start, based on on first 3 consecutives weeks without FX MM, of the timeseries.
"""


def series_end_start(x):
    end_start = [0,0]
    notnan_index = x.index[x.notna()]
    for i in range(0,len(notnan_index)):
        notnan_upper = i + 1
        notnan_difference = notnan_index[i] - notnan_index[(notnan_upper)]
        if notnan_difference.days == 7:
            end_start[0] = notnan_index[i]
            nan_index = x.loc[end_start[0]:].index[x.loc[end_start[0]:].isnull()]
            for i in range(0,len(nan_index)):
                upper = i + 2
                difference = nan_index[i] - nan_index[(upper)]
                if difference.days == 14:
                    end_start[1] = nan_index[i]
                    return end_start

In [None]:
series_end_start(MM['PEN'])

In [None]:
series_ends_begins(MM['KRW'])

In [None]:
series = [0,0]
series[1] = 1
series

In [None]:
#Beginning dates are passed on a dictionary per feature in the data set

dict_series_begings = {i:series_begins(MM[i]) for i in MM.columns}

In [None]:
#Series end and start dates are passed on to dictionary

dict_series_end_start = {i:series_end_start(MM[i]) for i in MM.columns}

In [None]:
dict_series_end_start

In [None]:
MM.loc[dict_series_end_start['USD'][0]:dict_series_end_start['USD'][1],'USD'].iloc[:-1]

In [None]:
#The function creates a data frame bounded by the end and start of the series

def get_df(x):
    try:
        series = MM.loc[dict_series_end_start[x][0]:dict_series_end_start[x][1],x].iloc[:-1].fillna(0)
        df_output = series.reset_index().rename({'index':'ds', x:'y'}, axis=1)
        return df_output
    except:
        return print(f"There isn't enough historic data to predict {x} volumes or {x} is not a valid market.")

In [None]:
get_df('GUA')

In [None]:
from matplotlib import pyplot as plt
from fbprophet import Prophet
from fbprophet.diagnostics import cross_validation

In [None]:
#This function creates the forecast and plots it

def plot_forecast(market, period, frequency):
    model = create_forecast(market, period, frequency)
    
    model.plot(forecast)
    a = add_changepoints_to_plot(fig.gca(), model, forecast)

In [None]:
plot_forecast()

In [None]:
#This function fits the model and creates the forecast

def create_forecast(market, period, frequency):
    model = fit_model(market)
    
    future = model.make_future_dataframe(periods=period, freq=frequency)
    forecast = model.predict(future)
    return forecast


In [None]:
#This function fits the model

def fit_model(market):
    X = get_df(market)
    model = Prophet(seasonality_mode='multiplicative', changepoint_prior_scale=0.5)
    return model.fit(X)

In [None]:
#This function fits the model & x-validates it

def model_error(market, baseline, cutoff, fcst, units):
    model = fit_model(market)

    
    cv_results = cross_validation(model=model, initial=pd.to_timedelta(baseline, unit=units), 
                              period=pd.to_timedelta(cutoff, unit=units), 
                              horizon=pd.to_timedelta(fcst, unit=units))
    
    mape = mean_absolute_percentage_error(cv_results.y, cv_results.yhat)
    
    return mape

In [None]:
for i in ['AUD', 'CAD', 'EUR', 'GBP', 'JPY', 'USD']:
    result = model_error(i, 208, 21, 21, 'W')
    print(f'{i} error is {result}')

In [None]:
model_error('GBP', 208, 21, 21, 'W')

In [None]:
?Prophet.make_future_dataframe

In [None]:
#This function cross validates the forecasts based on certain baseline, cutoff
#forecast and units ('M', 'W', 'D') desired to measure it

def cross_val(baseline, cutoff, fcst, units):
    cv_results = cross_validation(model=m, initial=pd.to_timedelta(baseline, unit=units), 
                              period=pd.to_timedelta(cutoff, unit=units), 
                              horizon=pd.to_timedelta(fcst, unit=units))
    return cv_results

In [None]:
#Cross Validation

from fbprophet.diagnostics import cross_validation

cv_results = cross_validation(model=m, initial=pd.to_timedelta(208, unit="W"), 
                              period=pd.to_timedelta(21, unit="W"), 
                              horizon=pd.to_timedelta(21, unit="W"))

cv_results.head()

In [None]:
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [None]:
mean_absolute_percentage_error(cv_results.y, cv_results.yhat)

In [None]:
?plot_cross_validation_metric

In [None]:
from fbprophet.diagnostics import performance_metrics
from fbprophet.plot import plot_cross_validation_metric

df_p = performance_metrics(cv_results)

fig = plot_cross_validation_metric(cv_results, metric='mape')

In [None]:
?performance_metrics

In [None]:
from fbprophet.diagnostics import performance_metrics


df_p = performance_metrics(cv_results)
df_p

In [None]:
plot_forecast('JPY', 5, 'W')

In [None]:
for i in ['AUD', 'CAD', 'EUR', 'GBP', 'JPY', 'USD']:
    create_forecast(i, 10, 'W')

#create_forecast('CAD', 10, 'W')

In [None]:
from fbprophet.plot import add_changepoints_to_plot

baseline = get_df('AUD')
m = Prophet(seasonality_mode='multiplicative', changepoint_prior_scale=0.5)


m.fit(baseline)
futures = m.make_future_dataframe(periods=13, freq='W')
forecast = m.predict(futures)
#print(futures.tail(13))
fig = m.plot(forecast)
a = add_changepoints_to_plot(fig.gca(), m, forecast)

In [None]:
print(m.changepoints)

In [None]:
from fbprophet.diagnostics import performance_metrics
df_p = performance_metrics(df_cv)
df_p.head()


## Cleaning Data