# Experiments on Time series generation as ARIMA



In [1]:
# automatically reaload libraries (useful when working on libraries while playing with the notebook)
%load_ext autoreload
%autoreload 2

In [2]:
import datetime
from pathlib import Path

def save_new_csv(s,path='.',root_name='test_ts',freq='H',start_time=None,columns=['Timestamp','Value']):    
    csv_path = get_free_random_path(path,root_name,'csv')
    s_csv = with_ts_index(s,freq,start_time)
    s_csv.index.name,s_csv.name=columns
    s_csv.to_csv(csv_path)
    return(csv_path)
    
def with_ts_index(s,freq='H',start_time=None):
    if start_time is None :
        now = datetime.datetime.now()
        start_time = datetime.datetime(now.year-1,now.month,1)
    index = pd.date_range(start=start_time,freq=freq,periods=len(s))
    s_ts = s.copy()
    s_ts.index = index
    return s_ts

def get_free_random_path(path,root_name,extension):
    while (free_path := Path(path)/(f'{root_name}_{random.randint(0,1000):04d}.{extension}')).exists():
        pass
    return free_path


## ARMA models

In [3]:
# allow import from parent directory
import sys,os
sys.path.append(os.getcwd() + '/..')

In [4]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt

In [5]:
# import helper functions for dataset generation
# from ts_gen import make_binary_code_dataset,make_flat_dataset,make_slopes_dataset
# import helper functions for dataset visualization
from ts_viz import plot_list_of_ts,plot_list_of_ts_over_subplots
# import helper functions to adjust list of series according to output of the clustering
# from ts_utils import get_clustered_list

In [41]:
import generator_old as gold
import generator as gnew



### [Autoregressive model](https://en.wikipedia.org/wiki/Autoregressive_model)

the notation AR(p) refers to the autoregressive model of order p. The AR(p) model is written

![AR equation](img/ar.svg)

where $\varphi _{1},\ldots ,\varphi _{p}$ are parameters, c is a constant, and the random variable $\varepsilon _{t}$ is white noise. The value of p is called the order of the AR model.

Some constraints are necessary on the values of the parameters so that the model remains stationary. For example, processes in the AR(1) model with ${\displaystyle |\varphi _{1}|\geq 1}$ are not stationary.

In [42]:
param_list = [{'c' : 0, 'pcoeff' : []},
              {'c' : 0, 'pcoeff' : [1,-0.2,0.1]},
              {'c' : 2, 'pcoeff' : [1,-0.5,0.5,-0.1]},]

samples_per_model = 1

np.random.seed(0)
random.seed(0)

list_of_series_old = []
for i,params in enumerate(param_list):
    for _ in range(samples_per_model):
        ar = gold.AR(**params)
        list_of_series_old.append(pd.Series(ar.generate(n=5),name=i))
print(pd.DataFrame(list_of_series_old))

np.random.seed(0)
random.seed(0)

list_of_series_new = []
for i,params in enumerate(param_list):
    for _ in range(samples_per_model):
        ar = gnew.AR(**params)
        list_of_series_new.append(pd.Series(ar.generate(n=5),name=i))
print(pd.DataFrame(list_of_series_new))

np.all((pd.DataFrame(list_of_series_old)==pd.DataFrame(list_of_series_new)).values)

          0         1         2         3         4
0  0.941715 -1.396578 -0.679714  0.370504 -1.016349
1 -0.072120  0.107076 -0.709599 -2.047263 -1.700748
2  2.993250  4.346268  4.515975  7.485138  8.542069
          0         1         2         3         4
0  0.941715 -1.396578 -0.679714  0.370504 -1.016349
1 -0.072120  0.107076 -0.709599 -2.047263 -1.700748
2  2.993250  4.346268  4.515975  7.485138  8.542069


old ARMA
old ARMA
old ARMA
new BaseARMAGenerator
new BaseARMAGenerator
new BaseARMAGenerator


True

In [43]:
pd.DataFrame(list_of_series)

Unnamed: 0,0,1,2,3,4
0,0.941715,-1.396578,-0.679714,0.370504,-1.016349
1,-0.07212,0.107076,-0.709599,-2.047263,-1.700748
2,2.99325,4.346268,4.515975,7.485138,8.542069


In [44]:
pd.DataFrame(list_of_series)

Unnamed: 0,0,1,2,3,4
0,0.941715,-1.396578,-0.679714,0.370504,-1.016349
1,-0.07212,0.107076,-0.709599,-2.047263,-1.700748
2,2.99325,4.346268,4.515975,7.485138,8.542069




### [Moving-Average model](https://en.wikipedia.org/wiki/Moving-average_model)

The notation MA(q) refers to the moving average model of order q:

![MA equation](img/ma.svg)

where μ is the mean of the series, the $\theta _{1},\ldots ,\theta _{q}$ are the parameters of the model and the  $\varepsilon _{t}, \varepsilon _{t-1},\ldots ,\varepsilon _{t-q}$ are white noise error terms. The value of q is called the order of the MA model

In [45]:

param_list = [{'c' : 0, 'qcoeff' : []},
              {'c' : 0, 'qcoeff' : [1]},
              {'c' : 0, 'qcoeff' : [5,4,3,3,3,3,2,1]}]


samples_per_model = 1

np.random.seed(0)
random.seed(0)

list_of_series_old = []
for i,params in enumerate(param_list):
    for _ in range(samples_per_model):
        ma = gold.MA(**params)
        list_of_series_old.append(pd.Series(ma.generate(n=5),name=i))
print(pd.DataFrame(list_of_series_old))

np.random.seed(0)
random.seed(0)

list_of_series_new = []
for i,params in enumerate(param_list):
    for _ in range(samples_per_model):
        ma = gnew.MA(**params)
        list_of_series_new.append(pd.Series(ma.generate(n=5),name=i))
print(pd.DataFrame(list_of_series_new))

np.all((pd.DataFrame(list_of_series_old)==pd.DataFrame(list_of_series_new)).values)

          0         1         2         3         4
0  0.941715 -1.396578 -0.679714  0.370504 -1.016349
1 -0.072120  0.107076 -0.651903 -2.140137 -1.115150
2  0.993250  4.319267  0.404423  0.369154  7.373601
          0         1         2         3         4
0  0.941715 -1.396578 -0.679714  0.370504 -1.016349
1 -0.072120  0.107076 -0.651903 -2.140137 -1.115150
2  0.993250  4.319267  0.404423  0.369154  7.373601


old ARMA
old ARMA
old ARMA
new BaseARMAGenerator
new BaseARMAGenerator
new BaseARMAGenerator


True

In [46]:
ma_old = gold.MA(qcoeff=[1])
ma_new = gnew.MA(qcoeff=[1])


old ARMA
new BaseARMAGenerator


In [47]:
np.random.seed(0)
random.seed(0)

ma_old.generate(2)


[0.9417154046806644, -0.45486270002048546]

In [48]:
np.random.seed(0)
random.seed(0)

ma_new.generate(2)


[0.9417154046806644, -0.45486270002048546]

In [49]:
ma_old._model

ARMA(pcoeff=[], qcoeff=[1], c=0.0, sigma=1.0, e_buff=deque([-1.3965781047011498], maxlen=1), x_buff=deque([], maxlen=0))

In [50]:
ma_new.wrapped_sarima

SARIMA(c=0.0, pcoeff=[], d=0, qcoeff=[1], Pcoeff=[], D=0, Qcoeff=[], m=0, sigma=1.0, e_buff=[], x_buff=[])

In [51]:
ma_new.wrapped_sarima._generator

BaseARMAGenerator(phi_coeff=[], theta_coeff=[1.0], c=0.0, sigma=1.0, e_buff=[], x_buff=[])