In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

In [2]:
def get_exp_path(expid):
    expid = str(expid)
    logs_folder = Path(r"C:\Users\Mathiass\Documents\Projects\master-thesis")/"logs"
    matches = Path(logs_folder).rglob(expid) #Get folder in logs_folder that matches expid
    matches_list = list(matches)
    if not len(matches_list) == 1:
        raise ValueError(f"There exists none or more than 1 folder with "
                            f"experiment id {expid} in the {logs_folder.name} "
                            "directory!")
    exp_dir = matches_list[0]
    return exp_dir

In [3]:
def load_df(expid_path, metric):
    if metric == "balacc":
        imp_path = expid_path/"results/importance_full/balaccmeandiff_sorted.csv"
        print("Reading", imp_path.name, "...")
        imp_df = pd.read_csv(imp_path, index_col=0, header=1, usecols=["Unnamed: 0", "MeanDiffOrigPerm"])
    elif metric == "return":
        imp_path = expid_path/"results/importance_full/meanofmeandiffpf_sorted.csv"
        print("Reading", imp_path.name, "...")
        imp_df = pd.read_csv(imp_path, index_col=0, header=1, usecols=["Unnamed: 0", "MomDiff"])
    else:
        raise NotImplementedError("Please specify a valid metric.")
    return imp_df
    

In [4]:
# def sum_ranks(expid_ls):
#     count_dic = {}
#     for expid in expid_ls:
#         p = get_exp_path(expid)
#         print("Model:", p.parent.parent.name,",", p.parent.name)
#         df = load_df(p, metric="balacc")
#         df_dic = {k: v for v, k in list(enumerate(df.index))}
# #         print(df_dic)
#         count_dic = {key: count_dic.get(key, 0) + df_dic.get(key, 0) for key in df_dic}
#     return count_dic
        

In [5]:
def gradient_table(expid_ls, metric):
    counts = {}
    ls_df = []
    for expid in expid_ls:
        p = get_exp_path(expid)
        print("Model:", p.parent.parent.name,",", p.parent.name)
        df = load_df(p, metric=metric)
        df.columns = [p.parent.name]
        ls_df.append(df)
        df_dic = {k: v for v, k in list(enumerate(df.index))}
        print(df_dic)
        counts = {key: counts.get(key, 0) + df_dic.get(key, 0) for key in df_dic}
    counts_sorted = {k: v for k, v in sorted(counts.items(), key=lambda item: item[1])}
    print("---")
    print("Counts added and sorted:", counts_sorted)
    for i in range(len(ls_df)):
        print(ls_df[i])
        ls_df[i] = ls_df[i].reindex(list(counts_sorted))
#     print(ls_df)
    return pd.concat(ls_df, axis=1)

In [6]:
def return_grad_tables(features, metric_ls):
    feature_expid_map = {"option": [20220919194922, 20220919213409, 20220919200811, 20220908133630, 20220915074003],
                        "option+stock": [20220922192602, 20220923123700, 20220924104656, 20220920100736, 20220923074741]}
    grad_tables = {}
    for f in features:
        expid_ls = feature_expid_map[f]
        for m in metric_ls:
            grad_tables[f+"_"+m] = gradient_table(expid_ls, m).style.background_gradient(axis=0)
    return grad_tables

### 1. Specify feature set

In [7]:
features = ["option", "option+stock"]

### 2. Specfiy metric

In [8]:
metrics = ["balacc", "return"]

### 3. Produce tables.

In [9]:
# ls_df = gradient_table(expid_ls, metric=metric)
grad_tables = return_grad_tables(features, metrics)


Model: small , lin
Reading balaccmeandiff_sorted.csv ...
{'delta': 0, 'gamma': 1, 'impl_volatility': 2, 'days_to_exp': 3, 'moneyness': 4, 'theta': 5, 'vega': 6, 'ba_spread_option': 7, 'volume': 8, 'mid_price': 9, 'open_interest': 10}
Model: small , rf
Reading balaccmeandiff_sorted.csv ...
{'moneyness': 0, 'impl_volatility': 1, 'theta': 2, 'delta': 3, 'gamma': 4, 'days_to_exp': 5, 'vega': 6, 'volume': 7, 'open_interest': 8, 'ba_spread_option': 9, 'mid_price': 10}
Model: small , xgb
Reading balaccmeandiff_sorted.csv ...
{'impl_volatility': 0, 'moneyness': 1, 'delta': 2, 'theta': 3, 'gamma': 4, 'days_to_exp': 5, 'mid_price': 6, 'vega': 7, 'ba_spread_option': 8, 'open_interest': 9, 'volume': 10}
Model: small , nn
Reading balaccmeandiff_sorted.csv ...
{'impl_volatility': 0, 'delta': 1, 'days_to_exp': 2, 'theta': 3, 'gamma': 4, 'moneyness': 5, 'vega': 6, 'ba_spread_option': 7, 'volume': 8, 'open_interest': 9, 'mid_price': 10}
Model: small , transformer
Reading balaccmeandiff_sorted.csv ...
{

Model: medium , xgb
Reading balaccmeandiff_sorted.csv ...
{'impl_volatility': 0, 'moneyness': 1, 'turn': 2, 'theta': 3, 'delta': 4, 'gamma': 5, 'vega': 6, 'zerotrade': 7, 'days_to_exp': 8, 'baspread': 9, 'mom1m': 10, 'mid_price': 11, 'ill': 12, 'realestate': 13, 'cfp': 14, 'rd_sale': 15, 'betasq': 16, 'mve_ia': 17, 'mom36m': 18, 'stdacc': 19, 'beta': 20, 'ba_spread_option': 21, 'mom12m': 22, 'rd_mve': 23, 'std_dolvol': 24, 'bm_ia': 25, 'rsup': 26, 'herf': 27, 'mom6m': 28, 'egr': 29, 'stdcf': 30, 'roaq': 31, 'std_turn': 32, 'invest': 33, 'chcsho': 34, 'cashdebt': 35, 'sp': 36, 'dy': 37, 'salecash': 38, 'roeq': 39, 'pchsale_pchinvt': 40, 'divi': 41, 'gma': 42, 'sgr': 43, 'pchgm_pchsale': 44, 'roic': 45, 'ear': 46, 'currat': 47, 'rd': 48, 'tang': 49, 'grcapx': 50, 'pchsale_pchxsga': 51, 'convind': 52, 'pricedelay': 53, 'quick': 54, 'ms': 55, 'securedind': 56, 'pchcurrat': 57, 'sin': 58, 'cash': 59, 'lev': 60, 'pchcapx_ia': 61, 'orgcap': 62, 'ep': 63, 'nincr': 64, 'depr': 65, 'tb': 66, 'di

Model: medium , rf
Reading meanofmeandiffpf_sorted.csv ...
{'baspread': 0, 'gamma': 1, 'impl_volatility': 2, 'retvol': 3, 'delta': 4, 'theta': 5, 'turn': 6, 'maxret': 7, 'days_to_exp': 8, 'chmom': 9, 'ba_spread_option': 10, 'mom12m': 11, 'mve_ia': 12, 'mvel1': 13, 'mom1m': 14, 'volume': 15, 'open_interest': 16, 'beta': 17, 'dolvol': 18, 'mom36m': 19, 'pricedelay': 20, 'absacc': 21, 'acc': 22, 'age': 23, 'agr': 24, 'bm': 25, 'bm_ia': 26, 'cashdebt': 27, 'cashpr': 28, 'cfp': 29, 'cfp_ia': 30, 'chatoia': 31, 'chcsho': 32, 'chempia': 33, 'chinv': 34, 'chpmia': 35, 'convind': 36, 'currat': 37, 'depr': 38, 'divi': 39, 'divo': 40, 'egr': 41, 'ep': 42, 'gma': 43, 'grcapx': 44, 'grltnoa': 45, 'herf': 46, 'hire': 47, 'invest': 48, 'lev': 49, 'lgr': 50, 'operprof': 51, 'orgcap': 52, 'pchcapx_ia': 53, 'pchcurrat': 54, 'pchdepr': 55, 'pchgm_pchsale': 56, 'pchquick': 57, 'pchsale_pchinvt': 58, 'pchsale_pchrect': 59, 'pchsale_pchxsga': 60, 'pchsaleinv': 61, 'pctacc': 62, 'ps': 63, 'quick': 64, 'rd': 

In [10]:
grad_tables

{'option_balacc': <pandas.io.formats.style.Styler at 0x1c0e7e20940>,
 'option_return': <pandas.io.formats.style.Styler at 0x1c0e8d5e1f0>,
 'option+stock_balacc': <pandas.io.formats.style.Styler at 0x1c0e7e20b50>,
 'option+stock_return': <pandas.io.formats.style.Styler at 0x1c0e8664130>}

In [11]:
# with open("Thesis_gradienttable".txt", "w") as text_file:
#     text_file.write(grad_tables["option+stock_balacc"].to_latex(convert_css=True))

In [12]:
grad_tables["option+stock_balacc"]

Unnamed: 0,lin,rf,xgb,nn,transformer
impl_volatility,0.023419,0.018053,0.082753,0.051406,0.09806
delta,0.05969,0.01041,0.011465,0.054998,0.083234
moneyness,0.003622,0.035664,0.065827,0.019148,0.015342
theta,0.01727,0.002853,0.01507,0.028829,0.014689
gamma,0.022393,0.001027,0.004539,0.010885,0.020148
days_to_exp,0.005923,0.000127,0.003141,0.005327,0.014217
mid_price,0.001409,0.000482,0.000798,0.001544,0.001038
ba_spread_option,0.000587,0.000238,0.00036,0.00093,0.001703
betasq,0.001028,2.9e-05,0.000514,0.000414,0.001294
mom36m,0.000333,6e-06,0.000411,0.000815,0.000798


In [14]:
# grad_tables["option+stock_return"]

## INFO MASTER THESIS
#### ORANGE / GREEN COLORS WERE MADE MANUALLY IN LATEX