In [21]:
import pandas as pd
import os
import re
import numpy as np
import sys 
import config_options as cfg
chosen_day = cfg.chosen_day

In [22]:
def read_csv(dir, chosen_day):
    day_fmt1 = chosen_day.replace("-", "")   # YYYYMMDD
    day_fmt2 = (chosen_day[:4] + "-" + chosen_day[4:6] + "-" + chosen_day[6:]) if "-" not in chosen_day else chosen_day
    
    files = [f for f in os.listdir(dir) if (day_fmt1 in f or day_fmt2 in f) and f.endswith(".csv")]
    if files:
        full_path = os.path.join(dir, files[0])
        df = pd.read_csv(full_path)
        df = df.drop(df.columns[0], axis=1)
        return df
    else:
        print(f"Dia {chosen_day} no encontrado en {dir}. Revisa en la carpeta")
        return None

data_base_option = read_csv(cfg.dir_md_opciones, chosen_day)
data_base_underlying = read_csv(cfg.dir_md_subyacente, chosen_day)

In [23]:
data_base_option["biof_fecha"] = pd.to_datetime(data_base_option["biof_fecha"])
data_base_underlying["biof_fecha"] = pd.to_datetime(data_base_underlying["biof_fecha"])

def flujo_eventos(data_base_option: pd.DataFrame,
                  data_base_underlying: pd.DataFrame,
                  opt_cols,
                  und_cols,
                  time_col,
                  id_col):
    opt_cols = list(opt_cols)
    und_cols = list(und_cols)

    # Subyacente
    und = (data_base_underlying
           .sort_values(time_col)
           [[time_col] + und_cols + ["ultimo_fecha"]]  # aseguro que esté ultimo_fecha
           .rename(columns={c: f"{c}_under" for c in und_cols + ["ultimo_fecha"]})
           .set_index(time_col))

    und_suff = [f"{c}_under" for c in und_cols]
    trade_col_und = "ultimo_fecha_under"

    out = []
    for opt_id, g in data_base_option.groupby(id_col, sort=False):
        g = (g.sort_values(time_col)
               [[time_col] + opt_cols + ["ultimo_fecha"]]  # aseguro ultimo_fecha
               .set_index(time_col))

        trade_col_opt = "ultimo_fecha"

        t = und.index.union(g.index).unique().sort_values()

        g_re = g.reindex(t).ffill()
        u_re = und.reindex(t).ffill()

        df = pd.concat([g_re, u_re], axis=1)

        # cambios en order book
        changed_opt = df[opt_cols].ne(df[opt_cols].shift()).any(axis=1)
        changed_und = df[und_suff].ne(df[und_suff].shift()).any(axis=1)

        changed_opt = changed_opt & df[opt_cols].notna().any(axis=1)
        changed_und = changed_und & df[und_suff].notna().any(axis=1)

        # cambios en trade (ultimo_fecha)
        trade_opt = df[trade_col_opt].ne(df[trade_col_opt].shift())
        trade_und = df[trade_col_und].ne(df[trade_col_und].shift())

        if not df.empty:
            changed_opt.iloc[0] = False
            changed_und.iloc[0] = False
            trade_opt.iloc[0] = False
            trade_und.iloc[0] = False

        block = df.reset_index().rename(columns={'index': time_col})
        block[id_col] = opt_id
        block['changed_opt'] = changed_opt.values
        block['changed_und'] = changed_und.values
        block['trade_opt'] = trade_opt.values
        block['trade_und'] = trade_und.values

        # Clasificación de eventos
        event = np.full(len(block), "NONE", dtype=object)
        # Prioridad: primero OB, luego trades
        event[(changed_opt & changed_und)] = "OB_BOTH"
        event[(changed_opt & ~changed_und)] = "OB_OPT"
        event[(~changed_opt & changed_und)] = "OB_UND"
        mask_no_ob = ~(changed_opt | changed_und)
        event[(mask_no_ob) & (trade_opt & trade_und)] = "TRADE_BOTH"
        event[(mask_no_ob) & (trade_opt & ~trade_und)] = "TRADE_OPT"
        event[(mask_no_ob) & (~trade_opt & trade_und)] = "TRADE_UND"
        block["event"] = event
        if not block.empty:
            block.loc[block.index[0], "event"] = "INIT"

        out.append(block)

    result = (pd.concat(out, ignore_index=True)
                .sort_values([id_col, time_col])
                .reset_index(drop=True))
    return result

data_base_merged = flujo_eventos(
     data_base_option,
     data_base_underlying,
     opt_cols=('bi_1_precio','bi_1_size','of_1_precio','of_1_size'),
     und_cols=('bi_1_precio','bi_1_size','of_1_precio','of_1_size'),
     time_col='biof_fecha',
     id_col='id_simbolo' )

In [24]:
data_base_merged = data_base_merged[(data_base_merged["event"]=="OB_UND")|(data_base_merged["event"]=="OB_OPT")|(data_base_merged["event"]=="OB_BOTH")]         #en caso de mo

In [25]:
data_base_merged["instrument"] = data_base_merged["id_simbolo"].str.extract(r"GFG([CV])")
data_base_merged["instrument"] = data_base_merged["instrument"].map({"C": "call", "V": "put"})

In [26]:
def extraer_strike(id_simbolo, precio_under):
    codigo = id_simbolo.split(" - ")[2]
    match = re.search(r'(\d+)', codigo)
    if match:
        numero = int(match.group(1))
        
        # Regla 1: si termina en 3 -> dividir por 10
        if str(numero).endswith("3"):
            strike = numero / 10
        else:
            strike = numero
        
        # Regla 2: si el strike es ~3 veces mayor al precio_under -> dividir por 10
        if strike >= 3 * precio_under:
            strike = strike / 10
        
        return strike
    return None

data_base_merged["strike"] = data_base_merged.apply(
    lambda row: extraer_strike(row["id_simbolo"], row["of_1_precio_under"]), axis=1
)

In [27]:
data_base_merged["intrinsic_value"] = np.where(
    data_base_merged["instrument"].str.lower() == "call",
    data_base_merged["bi_1_precio_under"] - data_base_merged["strike"],
    data_base_merged["strike"] - data_base_merged["of_1_precio_under"]
)
data_base_merged["intrinsic_value"] = data_base_merged["intrinsic_value"].clip(lower=0)

In [28]:
data_base_merged["time_value"] = np.where(
    (data_base_merged["of_1_precio"].notna()) & (data_base_merged["of_1_precio"] != 0),
    data_base_merged["of_1_precio"] - data_base_merged["intrinsic_value"],
    np.nan
)
data_base_merged = data_base_merged.dropna(subset=["time_value"])

In [29]:
strategy = data_base_merged[data_base_merged["time_value"]<0]

In [30]:
strategy.head(4)

Unnamed: 0,biof_fecha,bi_1_precio,bi_1_size,of_1_precio,of_1_size,ultimo_fecha,bi_1_precio_under,bi_1_size_under,of_1_precio_under,of_1_size_under,...,id_simbolo,changed_opt,changed_und,trade_opt,trade_und,event,instrument,strike,intrinsic_value,time_value
158914,2025-04-11 14:13:44.607,0.0,0.0,2190.0,1.0,2025-04-11 14:13:43,6990.0,14372.0,7000.0,995.0,...,MERV - XMEV - GFGC4500AB - 24hs,True,False,True,False,OB_OPT,call,4500.0,2490.0,-300.0
205651,2025-04-11 14:43:07.910,0.0,0.0,2100.0,1.0,2025-04-11 14:07:24,7080.0,375.0,7090.0,6851.0,...,MERV - XMEV - GFGC49783A - 24hs,False,True,False,False,OB_UND,call,4978.3,2101.7,-1.7
205662,2025-04-11 14:43:17.216,0.0,0.0,2100.0,1.0,2025-04-11 14:07:24,7080.0,53.0,7090.0,6851.0,...,MERV - XMEV - GFGC49783A - 24hs,False,True,False,False,OB_UND,call,4978.3,2101.7,-1.7
205666,2025-04-11 14:43:19.893,0.0,0.0,2100.0,1.0,2025-04-11 14:07:24,7080.0,41.0,7090.0,6851.0,...,MERV - XMEV - GFGC49783A - 24hs,False,True,False,False,OB_UND,call,4978.3,2101.7,-1.7
