# Generating data for analysis

In [1]:
from pathlib import Path

import numpy as np
import scipy as sp

from hyppo.tools import ts_sim

TS_SIMS = [
    "indep_ar",
    "cross_corr_ar",
    "nonlinear_process",
    "extinct_gaussian_process",
]

p = "./data/"

## Generate Experiment 1 - Independent AR(1) with increasing sample size.

In [2]:
fname = "1-independent_ar_n"

n = 200
reps = 300

phi = 0.5
sigma = 1 - (phi**2)

np.random.seed(1)

datas = [ts_sim("indep_ar", n, phi=phi, sigma=sigma) for _ in range(reps)]

X = np.stack([data[0] for data in datas])
Y = np.stack([data[1] for data in datas])

savedict = {
    "X": X,
    "Y": Y,
}

# save to disk
sp.io.savemat(f"{p}{fname}.mat", savedict, do_compression=True)

## Generate Experiment 2 - Independent AR(1) with increasing phi.

In [8]:
fname = "2-independent_ar_phi"

n = 200
reps = 300
phis = np.arange(0.2, 1, 0.05)
sigmas = 1 - (phis**2)

np.random.seed(1)

Xs = []
Ys = []

for (phi, sigma) in zip(phis, sigmas):
    datas = [
        ts_sim("indep_ar", n, phi=float(phi), sigma=float(sigma)) for _ in range(reps)
    ]
    Xs.append(np.stack([data[0] for data in datas]))
    Ys.append(np.stack([data[1] for data in datas]))


X = np.stack(Xs)
Y = np.stack(Ys)

savedict = {"X": X, "Y": Y, "phi": phis}

# save to disk
sp.io.savemat(f"{p}{fname}.mat", savedict, do_compression=True)

## Generate Experiment 3 - Linear cross correlated AR(1) with increasing sample size

In [4]:
fname = "3-linear_ar"

n = 200
reps = 300

np.random.seed(1)

datas = [ts_sim("cross_corr_ar", n) for _ in range(reps)]

X = np.stack([data[0] for data in datas])
Y = np.stack([data[1] for data in datas])

savedict = {
    "X": X,
    "Y": Y,
}

# save to disk
sp.io.savemat(f"{p}{fname}.mat", savedict, do_compression=True)

## Generate Experiment 4 - Non-linearly cross correlated AR(1) with increasing sample size

In [5]:
fname = "4-nonlinear_ar"

n = 200
reps = 300

np.random.seed(1)

datas = [ts_sim("nonlinear_process", n) for _ in range(reps)]

X = np.stack([data[0] for data in datas])
Y = np.stack([data[1] for data in datas])

savedict = {
    "X": X,
    "Y": Y,
}

# save to disk
sp.io.savemat(f"{p}{fname}.mat", savedict, do_compression=True)

## Generate Experiment 5 - Non-linearly cross correlated AR(1) with increasing sample size

In [6]:
fname = "5-extinct_gaussian"

n = 200
reps = 300

np.random.seed(1)

datas = [ts_sim("extinct_gaussian_process", n) for _ in range(reps)]

X = np.stack([data[0] for data in datas])
Y = np.stack([data[1] for data in datas])

savedict = {
    "X": X,
    "Y": Y,
}

# save to disk
sp.io.savemat(f"{p}{fname}.mat", savedict, do_compression=True)

# Generate Experiment 6 - Independent Vector AR(1) with increasing sample size


In [9]:
def indep_var(n, d, phi=0.5, seed=None):
    """
    d : corresponds to dimension of the time series
    """
    rng = rng = np.random.default_rng(seed)
    coeff = np.eye(d * 2) * phi
    covar = np.eye(d * 2) * (1 - (phi**2))
    errors = np.random.multivariate_normal(np.zeros(d * 2), covar, n)

    Y = np.zeros((n, d * 2))
    Y[0] = 0

    for t in range(1, n):
        Y[t] = np.dot(coeff, Y[t - 1]) + errors[t]

    series1 = Y[:, :d]
    series2 = Y[:, d:]

    return series1, series2

In [None]:
fname = "6-independent_var_n"

n = 200
d = 100
reps = 300

phi = 0.5

datas = [indep_var(n, d, phi, seed=1) for _ in range(reps)]

X = np.stack([data[0] for data in datas])
Y = np.stack([data[1] for data in datas])

savedict = {
    "X": X,
    "Y": Y,
}

# save to disk
sp.io.savemat(f"{p}{fname}.mat", savedict, do_compression=True)

In [18]:
X.shape

(300, 200, 100)