# Plot all histograms (log-scale and normal scale)

In [2]:
import os
# set working directory to MarketGenerators folder
# if you are working on LRZ servers, create the folder "MarketGenerators" and then specify something like
path = "/dss/dsshome1/02/YOUR_LRZ_USER_NAME/MarketGenerators"
os.chdir(path)

In [1]:
!pip install matplotlib

In [3]:
import numpy as np
import matplotlib.pyplot as plt

In [5]:
# Use distinct plotting formats for histograms
plt.rcParams.update({
    'font.size': 18,
    'xtick.labelsize': 20,  # Font size of the x-axis tick labels
    'ytick.labelsize': 20,
    'axes.titlesize': 30,   # Font size of the title
    'axes.labelsize': 28,   # Font size of the x and y labels
    'legend.fontsize': 22,  # Font size of the legend
})

In [6]:
def set_style(ax):
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['bottom'].set_visible(False)

In [None]:
# Define the base directory
base_dir = "numerical_results"

# Define the folders of interest
model_folders = [
    "YFinance",
    "GBM",
    "Kou_Jump_Diffusion",
]

# Define the target subfolders
target_subfolders = [
    "CWGAN", "GMMN", "RCGAN", "SigCWGAN", "TimeGAN", 
    "CVAE" # only add CVAE if Buehler logic was run
]
# Ignore folders of retrained models 
exception_folders = ["n-in=2Y", "n-in=9Y","n-in=20Y", "n-in=50Y", "n-in=99Y","n-in=999Y", "n-in=150Y"]
# initialize epmty last specification
last_spec=""

# Loop through each model folder
for model_folder in model_folders:
    print(f"Start evaluating all {model_folder}-based models.")
    model_path = os.path.join(base_dir, model_folder)

    # Traverse the directory tree
    for root, dirs, files in os.walk(model_path):
        gen_model = os.path.basename(root)

        if gen_model in target_subfolders:

            # Skip folders only used for statistical analysis
            root_split = root.split("/")
            input_model = root_split[1] 
            model_spec = root_split[2]
            nYearsFolderName = root_split[3] 
            seed_config = root_split[4]
            if nYearsFolderName in exception_folders:
                continue
            if seed_config != "seed=42":
                # Skip folders with different seeds than 42
                continue

            # Read the relevant npy files
            generated_file = os.path.join(root, "generated_returns_rescaled.npy")
            input_file = os.path.join(root, "input_returns_unscaled.npy")
            if os.path.exists(generated_file) and (os.path.exists(input_file) or gen_model =="CVAE"):
                if gen_model == "CVAE":
                    print(f"   Currently at {root}...")
                    # no input data for CVAE model currently
                    input_file = os.path.join(root[:-4]+"SigCWGAN", "input_returns_unscaled.npy")
                    if model_folder != "YFinance":
                        generated_file = os.path.join(root, "generated_returns_rescaled_fixed.npy")

                all_input_returns = np.load(input_file).flatten()
                all_generated_returns = np.load(generated_file).flatten()
                
                combined_returns = np.concatenate([all_input_returns, all_generated_returns])
                # Determine the number of bins based on the range of all_input_returns
                data_range = combined_returns.max() - combined_returns.min()
                bin_count = max(34,int(data_range // 0.005)) # 1 bin per 0.005 units of range
                # Compute bin edges based on the combined data (to get respective mins and maxs
                bin_edges = np.histogram_bin_edges(combined_returns, bins=bin_count)
                
                fig, axs = plt.subplots(2, 2, figsize=(18.21,10.41))
                # Plot on the original scale
                axs[0,0].hist(all_input_returns, bins=bin_edges, alpha=0.6, density=True, label="Input")[1]
                axs[0,0].hist(all_generated_returns, bins=bin_edges, alpha=0.6, density=True, label="Generated")
                axs[0,0].grid()
                set_style(axs[0,0])
                axs[0,0].legend()
                axs[0,0].set_ylabel('pdf')
                axs[0,0].set_ylim((0,37))
                axs[0,0].set_title(gen_model)

                # Plot on the logarithmic scale
                axs[0,1].hist(all_input_returns, bins=bin_edges, alpha=0.6, density=True, label="Input")
                axs[0,1].hist(all_generated_returns, bins=bin_edges, alpha=0.6, density=True, label="Generated")
                axs[0,1].grid()
                set_style(axs[0,1])
                axs[0,1].legend()
                axs[0,1].set_ylabel('log-pdf')
                axs[0,1].set_yscale('log')
                axs[0,1].set_ylim((5e-6, 37))
                axs[0,1].set_title(gen_model)
                
                # Plot with fixed x-axes
                axs[1,0].hist(all_input_returns, bins=bin_edges, alpha=0.6, density=True, label="Input")[1]
                axs[1,0].hist(all_generated_returns, bins=bin_edges, alpha=0.6, density=True, label="Generated")
                axs[1,0].grid()
                set_style(axs[1,0])
                axs[1,0].legend()
                axs[1,0].set_xlim([-0.215, 0.165])
                axs[1,0].set_ylabel('pdf')
                axs[1,0].set_ylim((0,37))
                axs[1,0].set_title(gen_model)

                # Plot on the logarithmic scale
                axs[1,1].hist(all_input_returns, bins=bin_edges, alpha=0.6, density=True, label="Input")
                axs[1,1].hist(all_generated_returns, bins=bin_edges, alpha=0.6, density=True, label="Generated")
                axs[1,1].grid()
                set_style(axs[1,1])
                axs[1,1].legend()
                axs[1,1].set_xlim([-0.215, 0.165])
                axs[1,1].set_ylabel('log-pdf')
                axs[1,1].set_yscale('log')
                axs[1,1].set_ylim((5e-6, 37))
                axs[1,1].set_title(gen_model)

                # Adjust layout for better spacing
                plt.tight_layout()

                # Show the plot
                plt.savefig(f"{root}/histograms")
                plt.savefig(f"histograms/{input_model}_{model_spec}_{nYearsFolderName}_{seed_config}_{gen_model}.png")

                # plt.show()
                plt.close()

            else:
                print("No file found yet.")

print("Done.")