# Market generator Bühler - Synthetic Data

In [1]:
import os
# set working directory to MarketGenerators folder
# if you are working on LRZ servers, create the folder "MarketGenerators" and then specify something like
# Use "nvcr.io#nvidia/tensorflow:20.03-tf1-py3" container for cloud computing
path = "/dss/dsshome1/02/YOUR_LRZ_USER_NAME/MarketGenerators"
os.chdir(path)

In [None]:
!pip install -r requirements_buehler.txt

In [3]:
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np

import BuehlerVAE.src.market_generator as market_generator
from BuehlerVAE.src.utils import tosig
import BuehlerVAE.src.process_discriminator as process_discriminator


## Server computing part

In [None]:
# Turn interactive plotting off
plt.ioff()

# Signature order
def run_buehler_pipeline(n_years_in, n_years_out):
    ticker = "^GSPC"
    seed = 42
    order = 4
    method = "GBM"
    freq = "M"
    mu = 0.05
    sigma = 0.2
    n_in = n_years_in * 12 if freq == "M" else n_years_in
    n_out = n_years_out * 12 if freq == "M" else n_years_out
    params = {
        "S0": 1., 
        "mu": mu,
        "sigma": sigma, 
        "n": n_in + 1
    }
    print(f"Currently at {n_in}{freq} training and {n_out}{freq} generation")
    
    values = {k: v for k, v in params.items() if k not in ['S0', 'n', 'n_points', 'T']}.values()
    values_str = [str(value).replace('.', ',') for value in values]
    model_spec = '_'.join(values_str)
    name_string = f"_{method}_{n_in}_{n_out}_{freq}_{model_spec}_seed{seed}"
    MG = market_generator.MarketGenerator(ticker, freq=freq, sig_order=order, own_params=params, method=method, seed=seed)
    
    # plot input paths and save to plots_buehler folder
    fig = plt.figure(figsize=(18, 10))
    plt.rcParams.update({'font.size': 22})
    for path in MG.windows:
        returns = path[::2, 1]
        plt.plot(returns, "b", alpha=0.1)

    plt.title("Paths")
    plt.xlabel("Days")
    plt.savefig(f"plots_buehler/input_paths{name_string}")
    plt.close(fig)

    # train model 
    MG.train(n_epochs=10000)
    
    # generate new paths with same format
    params_out = params
    params_out['n'] = n_out + 1
    MG_out = market_generator.MarketGenerator(ticker, freq=freq, sig_order=order, own_params=params_out, method=method, seed=seed + 1)
    
    # generated
    normalised_generated = np.array([MG.generate(cond, normalised=True) for cond in MG_out.conditions])
    generated = np.array([MG.generate(cond) for cond in MG_out.conditions])
    sigs1 = np.array([tosig.logsig2sig(logsig, 2, order) for logsig in tqdm(normalised_generated)])
    
    # new seed here, i.e. "out of sample" test
    MG.seed = 43
    MG._load_own_data(params, method)
    MG.orig_logsig = np.array([MG._logsig(path) for path in MG.windows])
    sigs2 = np.array([tosig.logsig2sig(logsig, 2, order) for logsig in tqdm(MG.scaler.transform(MG.orig_logsig[1:]))])
    
    # compute test statistic
    test_stat = process_discriminator.T(set1=sigs1, set2=sigs2, order=order, compute_sigs=False, verbose=False)

    K = 1.    
    with open(f"buehler_output/sigSummary{name_string}.txt", "w") as text_file:
        # write test statistic to file
        text_file.write(f'confidence level = {np.exp(-test_stat**2/16*len(sigs1))*100:.4f}%\n' + 
            f'confidence level (other metric) = {np.exp(-(np.sqrt(len(sigs1)/(2*K)*test_stat)-1)**2/2)*100:.4f}%'
        )
    
    # save generated signatures
    np.save(f"buehler_output/sigs_generated{name_string}.npy", generated)
    return

In [None]:
# specify number of training years and output years
for n_years_train in [10, 100, 1000, 10000, 3]:
    for n_generate in [3, 10, 100, 1000, 10000]:
        run_buehler_pipeline(n_years_train, n_generate)