# Option Pricing Logic for all Retrained Synthetic Models

Make sure to create the required files first in other notebook(s) before evaluating them here

In [1]:
!pip install fbm
!pip install yfinance
!pip install matplotlib

In [2]:
import os
# set working directory to MarketGenerators folder
# if you are working on LRZ servers, create the folder "MarketGenerators" and then specify something like
path = "/dss/dsshome1/02/YOUR_LRZ_USER_NAME/MarketGenerators"
os.chdir(path)

In [3]:
import numpy as np
from src.evaluate import metrics as m
from src.data.make_dataset import DataLoader
import src.helper.utils as utils

In [None]:
# Define the base directory
base_dir = "numerical_results"
# Define the folders of interest
model_folders = ["GBM", "Kou_Jump_Diffusion"]
# Define the target subfolders
relevant_folders_single_model = ["n-in=2Y", "n-in=9Y", "n-in=20Y", "n-in=50Y", "n-in=99Y", "n-in=999Y"]
target_subfolders = ["CWGAN", "GMMN", "RCGAN", "SigCWGAN", "TimeGAN"]
# Specify option types with strikes prices
option_types_K = ["European", "Asian"]
n_days_list = [5, 10, 21, 252]
# Parameters
S0 = 1
t = 0
# only evaluate lookback option at time T ==> Make grid size large
lookBackGridSize = 1000000

In [None]:
for nDays in n_days_list:
    K_grid = np.linspace(0.95, 1.1, 4) if nDays >= 21 else np.linspace(1, 1.08, 3)
    settings = [f"{metric}_K={K:.2f}" for metric in option_types_K for K in K_grid]
    settings.append("Lookback")
    T = nDays/252
    # initialize epmty last specification
    last_spec=""
    last_seed_spec=""
    last_year_folder=""

    # Loop through each model folder
    for model_folder in model_folders:
        print(f"Start evaluating all {model_folder}-based models for {nDays} days maturity...")
        model_path = os.path.join(base_dir, model_folder)

        # Traverse the directory tree
        for root, dirs, files in os.walk(model_path):
            gen_model = os.path.basename(root)

            if gen_model in target_subfolders:
                # Model is retrained only for nYears = 20 and 999, this will be analyzed
                nYearsFolderName = root.split("/")[3] 
                if nYearsFolderName not in relevant_folders_single_model:
                    continue

                # Read the relevant npy files
                generated_file = os.path.join(root, "generated_returns_rescaled.npy")
                input_file = os.path.join(root, "input_returns_unscaled.npy")
                if os.path.exists(generated_file) and os.path.exists(input_file):
                    input_prices_df, generated_prices_df, generated_returns = utils.load_input_and_generated_returns(
                        input_file, generated_file, nDays, T
                    )
                    # get distinct names of current directory
                    input_model, model_spec, year_folder, seed_spec = root.split("/")[1:5]
                   
                    # New evaluation only if new input spec comes
                    if last_spec != model_spec:
                        results_call = {setting: {gan: [] for gan in target_subfolders + ["Input"]} for setting in settings}
                        results_put = {setting: {gan: [] for gan in target_subfolders + ["Input"]} for setting in settings}
                        seed_count=0
                        last_year_folder = year_folder
                        model_desc = "/".join(root.split("/")[1:4])
                        print(f"   Evaluating model {model_desc}...")
                        last_spec = model_spec
                        
                        pairs = model_spec.split("_")
                        params = {key: float(value) for key, value in (pair.split("=") for pair in pairs)}
                        
                        if input_model != "GBM":
                            # create data for approximate exact price (1,000,000 paths) in the first run
                            # fix lambda naming:
                            if "lambda" in params:
                                params["lambda_"] = params.pop("lambda")
                            n_approx = 1000000
                            params["n"] = n_approx
                            params["T"] = T
                            params["n_points"] = nDays+1
                            params["S0"] = S0
                            dataloader = DataLoader(method=root.split("/")[1], params=params, seed=314)
                            print(f"      ...Generating {n_approx} paths of {input_model} model...")
                            approx_df = dataloader.create_dataset(output_type="DataFrame")
                            approx_df_lookback = approx_df.iloc[:10000,:]
                            last_spec = model_spec

                        european_engine, asian_engine, lookback_engine = utils.initialize_all_option_engines(
                            input_prices_df, generated_prices_df, T, t=t, S0=S0, approx_exact=(input_model!="GBM")
                        )

                        if input_model == "GBM":
                            approx_df = None
                            approx_df_lookback = None
                            european_engine.sigma = params["sigma"]
                            european_engine.r = params["mu"]
                            asian_engine.sigma = params["sigma"]
                            asian_engine.r = params["mu"]
                            lookback_engine.sigma = params["sigma"]
                            lookback_engine.r = params["mu"]
                        else:
                            with utils.Capturing([]) as summary_output:
                                # Write summary to txt
                                european_engine.r = m.print_basic_non_gbm_metrics( 
                                    n_periods=T,
                                    annualization_factor = 252, 
                                    ground_paths_df = input_prices_df, 
                                    recovered_paths_df = generated_prices_df, 
                                    approx_df=approx_df,
                                    return_threshold = 0.03
                                )
                            asian_engine.r = european_engine.r
                            lookback_engine.r = european_engine.r
                    
                    elif last_year_folder != year_folder:     
                        # initialize new instances
                        results_call = {setting: {gan: [] for gan in target_subfolders + ["Input"]} for setting in settings}
                        results_put = {setting: {gan: [] for gan in target_subfolders + ["Input"]} for setting in settings}
                        seed_count=0
                        model_desc = "/".join(root.split("/")[1:4])
                        print(f"   Evaluating model {model_desc}...") 
                        last_year_folder = year_folder
                        
                    if seed_spec != last_seed_spec:
                        seed_count+=1
                        last_seed_spec=seed_spec
                        # new input seed ==> new calculation of input
                        recalculate_input=True
                        european_engine.ground_paths_df=input_prices_df
                        asian_engine.ground_paths_df=input_prices_df
                        lookback_engine.ground_paths_df=input_prices_df
                        print(f"         Currently at {seed_spec} ({seed_count}/50)")
                        
                    # calculate all values (option prices & deviations) for different strike prices (K or T values)
                    european_engine.gen_paths_df=generated_prices_df
                    european_engine.calc_all_K(K_grid, approx_df=approx_df, recalculate_input=recalculate_input)
                    asian_engine.gen_paths_df=generated_prices_df
                    asian_engine.calc_all_K(K_grid, approx_df=approx_df_lookback, recalculate_input=recalculate_input)
                    # use less data for runtime reasons for approx_df for lookback options logic
                    lookback_engine.gen_paths_df=generated_prices_df
                    lookback_engine.calc_all_T(grid_size=lookBackGridSize, approx_df=approx_df_lookback, recalculate_input=recalculate_input)
                    
                    # format the results             
                    n_infs = np.sum(np.isinf(generated_returns))
                    n_na = np.sum(np.isnan(generated_returns))
                    if n_infs == 0 and n_na == 0:
                        results_call, results_put = utils.fill_results(
                            european_engine, asian_engine, lookback_engine, results_call, results_put, 
                            gen_model, recalculate_input=recalculate_input, has_input_dev=True
                        )
                        recalculate_input = False
                    else:
                        print(f"Missing values detected. Model {gen_model} skipped for this seed.")
                
                else:
                    print("No file found yet.")

                if seed_count == 50 and gen_model == "GMMN":
                    # GMMN is the last model ==> summarize the results 
                    # THIS NEEDS TO BE ADJUSTED IF OTHER MODELS ARE CHOSEN
                    print("      Writing summary File (assuming GMMN is last model in tree)...")
                   
                    relevant_dir = "/".join(root.split("/")[:-2])
                    utils.save_stat_analysis_to_csv(
                        settings, results_put, lookback_engine, european_engine, asian_engine,
                        relevant_dir, target_subfolders, nDays, "put"
                    )
                    utils.save_stat_analysis_to_csv(
                        settings, results_call, lookback_engine, european_engine, asian_engine,
                        relevant_dir, target_subfolders, nDays, "call"
                    )                   
print("Done.")