# This notebook is for testing accuracy

## Imports

In [1]:
import pandas as pd
import numpy as np
import prophet

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
  from .autonotebook import tqdm as notebook_tqdm
Importing plotly failed. Interactive plots will not work.


## Defining useful function

In [37]:
def predict(model: prophet.Prophet, days: int = 1) -> list:
    '''
        This function takes number of days and model and predicts.
    '''

    future = model.make_future_dataframe(periods=24 * days,
                                         freq='h')
    predicted = model.predict(future)

    return predicted.iloc[-24 * days:]['yhat'].to_list()


def error_1(predicted: np.ndarray, target: np.ndarray) -> np.float64:
    '''
        This function calculates Error 1 of the prediction.
    '''
    return np.sqrt(np.power((predicted - target), 2).sum() / len(predicted))


def error_2(predicted: np.ndarray, target: np.ndarray) -> np.float64:
    '''
        This function calculates Error 1 of the prediction.
    '''
    return (predicted - target).sum() / len(predicted)


def estimate(model: prophet.Prophet, validation_data: pd.DataFrame, days: int = 1) -> np.float64:

    prediction = predict(model=model, days=days)
    target = validation_data.iloc[:24 * days]['y'].to_list()

    err_1 = error_1(target=np.asarray(target), predicted=np.asarray(prediction))
    err_2 = error_2(target=np.asarray(target), predicted=np.asarray(prediction))

    return err_1, err_2

## Testing

In [38]:
training_data = pd.read_csv('./data/training.csv')
validation_data = pd.read_csv('./data/validation.csv')

model = prophet.Prophet()
model.fit(training_data)

last_prediction = '2016-04-06 22:00:00'

21:01:20 - cmdstanpy - INFO - Chain [1] start processing
21:01:21 - cmdstanpy - INFO - Chain [1] done processing


In [39]:
days = [1, 7, 30, 30 * 6]
for days_num in days:
    acc = estimate(model=model, validation_data=validation_data, days=days_num)
    print('Average squared error for the %d days: ' % days_num, 'Error_1 - %f, Error_2 - %f' % (acc[0], acc[1]))

Average squared error for the 1 days:  Error_1 - 6.523579, Error_2 - -5.962839
Average squared error for the 7 days:  Error_1 - 4.173950, Error_2 - -1.980839
Average squared error for the 30 days:  Error_1 - 4.403863, Error_2 - 0.481318
Average squared error for the 180 days:  Error_1 - 8.371225, Error_2 - 4.661354
