# Option Pricing Logic for all Retrained YFinance Models

Make sure to create the required files first in other notebook(s) before evaluating them here

In [1]:
!pip install fbm
!pip install yfinance
!pip install matplotlib

In [2]:
import os
# set working directory to MarketGenerators folder
# if you are working on LRZ servers, create the folder "MarketGenerators" and then specify something like
path = "/dss/dsshome1/02/YOUR_LRZ_USER_NAME/MarketGenerators"
os.chdir(path)

In [2]:
import numpy as np
from src.evaluate import metrics as m
import src.helper.utils as utils

In [None]:
# Define the base directory
base_dir = "numerical_results"
# Define the folders of interest
model_folders = ["YFinance"]
# Define the target subfolders
target_subfolders = ["CWGAN", "GMMN", "RCGAN", "SigCWGAN", "TimeGAN"]
relevant_folders_single_model = [
    "n-in=0Y" # only year folder in YFinance case
]
# Name of options with strike prices (Lookback has no strike price)
option_types_K = ["European", "Asian"]
n_days_list = [5, 10, 21, 252]
# Parameters
S0 = 1
t = 0
# only evaluate lookback option at time T ==> Make grid size large
lookBackGridSize = 1000000

In [None]:
for nDays in n_days_list:
    # Specify grid points to be evaluated (single values also works)
    K_grid = np.linspace(0.95, 1.1, 4) if nDays >= 21 else np.linspace(1, 1.08, 3)
    settings = [f"{metric}_K={K:.2f}" for metric in option_types_K for K in K_grid]
    settings.append("Lookback")
    T = nDays/252
    # initialize epmty last specification
    last_spec=""
    last_seed_spec=""
    last_year_folder=""

    # Loop through each model folder
    for model_folder in model_folders:
        print(f"Start evaluating all {model_folder}-based models for {nDays} days maturity...")
        model_path = os.path.join(base_dir, model_folder)

        # Traverse the directory tree
        for root, dirs, files in os.walk(model_path):
            gen_model = os.path.basename(root)

            if gen_model in target_subfolders:
                # Model is retrained only for nYears = 20 and 999, this will be analyzed
                nYearsFolderName = root.split("/")[3] 
                if nYearsFolderName not in relevant_folders_single_model:
                    continue
                if root.split("/")[4] == "seed=42":
                    # Only consider retrained models (seed!=42)
                    continue

                # Read the relevant npy files
                generated_file = os.path.join(root, "generated_returns_rescaled.npy")
                input_file = os.path.join(root, "input_returns_unscaled.npy")
                if os.path.exists(generated_file) and os.path.exists(input_file):
                    # load data frames and returs 
                    input_prices_df, generated_prices_df, generated_returns = utils.load_input_and_generated_returns(
                        input_file, generated_file, nDays, T
                    )
                    # get distinct names of current directory
                    input_model, model_spec, year_folder, seed_spec = root.split("/")[1:5]             
                    
                    # New evaluation only if new input spec comes
                    if last_spec != model_spec:
                        # Reset results for new model
                        results_call = {setting: {gan: [] for gan in target_subfolders + ["Input"]} for setting in settings}
                        results_put = {setting: {gan: [] for gan in target_subfolders + ["Input"]} for setting in settings}
                        seed_count=0
                        model_desc = "/".join(root.split("/")[1:4])
                        print(f"   Evaluating model {model_desc}...")
                        last_spec = model_spec
                
                        european_engine, asian_engine, lookback_engine = utils.initialize_all_option_engines(
                            input_prices_df, generated_prices_df, T=T, input_is_real_data = False, t=t, S0=S0
                        )
                        
                        with utils.Capturing([]) as summary_output:
                            # print summary of the model to txt file
                            european_engine.r = m.print_basic_non_gbm_metrics( 
                                n_periods=T,
                                annualization_factor = 252, 
                                ground_paths_df = input_prices_df, 
                                recovered_paths_df = generated_prices_df, 
                                approx_df = input_prices_df,
                                return_threshold = 0.03
                            )
                            asian_engine.r = european_engine.r
                            lookback_engine.r = european_engine.r

                    if seed_spec != last_seed_spec:
                        # New ssed => reset seed count and reset input paths of engines
                        seed_count+=1
                        last_seed_spec=seed_spec
                        # new input seed ==> new calculation of input
                        recalculate_input=True
                        european_engine.ground_paths_df=input_prices_df
                        asian_engine.ground_paths_df=input_prices_df
                        lookback_engine.ground_paths_df=input_prices_df
                        print(f"         Currently at {seed_spec} ({seed_count}/50)")
                        
                    # calculate all values (option prices & deviations) for different strike prices (K or T values)
                    european_engine.gen_paths_df=generated_prices_df
                    european_engine.calc_all_K(K_grid, recalculate_input=recalculate_input)
                    asian_engine.gen_paths_df=generated_prices_df
                    asian_engine.calc_all_K(K_grid, recalculate_input=recalculate_input)
                    # use less data for runtime reasons for approx_df for lookback options logic
                    lookback_engine.gen_paths_df=generated_prices_df
                    lookback_engine.calc_all_T(grid_size=lookBackGridSize, recalculate_input=recalculate_input)
                    
                    # format the results and make nan exception                
                    n_infs = np.sum(np.isinf(generated_returns))
                    n_na = np.sum(np.isnan(generated_returns))
                    if n_infs == 0 and n_na == 0:
                        results_call, results_put = utils.fill_results(
                            european_engine, asian_engine, lookback_engine, results_call, results_put, 
                            gen_model, recalculate_input=recalculate_input, has_input_dev=False
                        )
                        recalculate_input = False
                    else:
                        print(f"Missing values detected. Model {gen_model} skipped for this seed.")
                else:
                    print("No file found yet.")

                if seed_count == 50 and gen_model == "GMMN":
                    # GMMN is the last model ==> summarize the results 
                    # THIS NEEDS TO BE ADJUSTED IF OTHER MODELS ARE CHOSEN
                    print("      Writing summary File (assuming GMMN is last model in tree)...")
                    # Save the results to csv                    
                    relevant_dir = "/".join(root.split("/")[:-2])
                    utils.save_stat_analysis_to_csv(
                        settings, results_put, lookback_engine, european_engine, asian_engine,
                        relevant_dir, target_subfolders, nDays, "put"
                    )
                    utils.save_stat_analysis_to_csv(
                        settings, results_call, lookback_engine, european_engine, asian_engine,
                        relevant_dir, target_subfolders, nDays, "call"
                    )
print("Done.")