# Market generator Bühler - Synthetic Data

In [1]:
import os
# set working directory to MarketGenerators folder
# if you are working on LRZ servers, create the folder "MarketGenerators" and then specify something like
path = "/dss/dsshome1/02/YOUR_LRZ_USER_NAME/MarketGenerators"
os.chdir(path)

In [None]:
!pip install esig
!pip install tqdm 

In [None]:
import BuehlerVAE.src.logsig_inversion as logsig_inversion
import numpy as np

## Invert log-signature

### Choose Model Specifications

In [4]:
freq = "M"
order = 4
method = "Kou_Jump_Diffusion"
n_years_in_list = [100]
n_years_out = 1000
seed = 42
# specify the parameters of the model
params = {
    "mu": 0.12, 
    "sigma": 0.2, 
    "lambda": 2.0, 
    "p": 0.3, 
    "eta1": 50., 
    "eta2": 25.
}
spec_folder_name = "_".join(f"{key}={params[key]}" for key in params)
# determine how frequently back-up savings are made
save_every_n = 120 if freq == "M" else 10

n_out = n_years_out*12 if freq == "M" else n_years_out
params["S0"] = 1.
values={k: v for k, v in params.items() if k not in ['S0', 'n', 'n_points', 'T']}.values()
values_str=[str(value).replace('.',',') for value in values]
model_spec = '_'.join(values_str)

In [None]:
# Specify the parameters for Breed and Mutate algorithm
pip = 0.001 if freq == "M" else 0.0001
n_pips = 50 if freq == "M" else 100
n_points = 22 if freq == "M" else 253
n_iterations = 100 if freq == "M" else 100
n_organisms = 400 if freq == "M" else 150

In [None]:
for n_years_in in n_years_in_list:
    n_in = n_years_in*12 if freq == "M" else n_years_in
    params["n"] = n_in
    name_string = f"_{method}_{n_in}_{n_out}_{freq}_{model_spec}_seed{seed}"

    recovered_paths = []
    generated_log_sigs= np.load(f"buehler_output/sigs_generated{name_string}.npy")
    save_dir = f"numerical_results/{method}/{spec_folder_name}/n-in={n_years_in}Y/seed={seed}/CVAE"
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    file_name = f"{save_dir}/generated_returns_rescaled.npy"
    if os.path.exists(file_name):
        recovered_returns_np = np.load(file_name)
    else:
        recovered_returns_np = np.array([],ndmin=1)

    # shorten generated log sig such that only uncalculated ones are converted
    start_index = recovered_returns_np.shape[0] // (n_points-1)
    untransformed_generated_log_sigs = generated_log_sigs[start_index:,:]
    n_iter = untransformed_generated_log_sigs.shape[0]
    print(f"Total number of iterations left: {n_iter}")
    for i, logsig in enumerate(untransformed_generated_log_sigs):
        recovered_path, loss = logsig_inversion.train(logsig, order, n_iterations, n_organisms, n_points,
                                                      pip, n_pips)
        recovered_path += 1
        recovered_return_np = np.diff(np.log(recovered_path))
        recovered_returns_np = np.append(recovered_returns_np, recovered_return_np)
        if i % save_every_n == (save_every_n-1):
            # back-up saves
            np.save(f"{save_dir}/generated_returns_rescaled.npy", recovered_returns_np)
            print(f"Iteration {i+1}/{n_iter} done and {recovered_returns_np.shape[0]} returns saved.")                
    if i > -1:
        np.save(f"{save_dir}/generated_returns_rescaled.npy", recovered_returns_np)
        print(f"Last iteration done and {recovered_returns_np.shape[0]} returns saved.")