In [1]:
import pandas as pd
import os
import re
import numpy as np
import sys 
import config_options as cfg
chosen_day = cfg.chosen_day

In [None]:
def read_csv(dir, chosen_day):
    files = [f for f in os.listdir(dir) if chosen_day in f and f.endswith(".csv")]
    if files:
        full_path = os.path.join(dir, files[0])
        df=pd.read_csv(full_path)
        df=df.drop(df.columns[0],axis=1)
        return df
    else:
        print(f"Dia {chosen_day} no encontrado en {dir}. Revisa en la carpeta")
        return None

data_base_option = read_csv(cfg.dir_md_opciones, chosen_day)
data_base_underlying = read_csv(cfg.dir_md_subyacente, chosen_day)

Unnamed: 0,id,id_simbolo,ultimo_precio,ultimo_fecha,ultimo_size,fecha_insercion,bi_1_precio,bi_1_size,bi_2_precio,bi_2_size,...,of_3_precio,of_3_size,of_4_precio,of_4_size,of_5_precio,of_5_size,biof_fecha,insercion_dia,ev,nv
0,1596828214,MERV - XMEV - GFGV6400JU - 24hs,105.0,2025-05-05 14:00:09,50,2025-05-05 14:00:11.550,98.001,2,98.0,23,...,0.0,0,0.0,0,0.0,0,2025-05-05 14:00:10.556,,0,0


In [3]:
data_base_option["ultimo_fecha"] = pd.to_datetime(data_base_option["ultimo_fecha"])
data_base_underlying["ultimo_fecha"] = pd.to_datetime(data_base_underlying["ultimo_fecha"])
data_base_underlying = data_base_underlying.rename(
    columns={"ultimo_precio": "last_underlying"}
)
data_base_option = pd.merge_asof(
    data_base_option.sort_values("ultimo_fecha"),
    data_base_underlying[["ultimo_fecha", "last_underlying"]].sort_values("ultimo_fecha"),
    on="ultimo_fecha",
    direction="backward"
)

data_base_underlying = data_base_underlying.rename(
    columns={"last_underlying": "ultimo_precio"}
)

#n_nans = data_base_option["last_underlying"].isna().sum()
#print("Cantidad de NaN:",data_base_option["last_underlying"].isna().sum())

In [4]:
data_base_option["instrument"] = data_base_option["id_simbolo"].str.extract(r"GFG([CV])")
data_base_option["instrument"] = data_base_option["instrument"].map({"C": "call", "V": "put"})

In [None]:
def extraer_strike(row):
    match = re.search(r"GFG[CV](\d+)", row["id_simbolo"])
    if not match:
        return pd.Series([np.nan, np.nan, np.nan])

    K_raw = int(match.group(1))
    ultimo = float(row["ultimo_precio"])
    inst = row["instrument"]
    S = float(row["last_underlying"])   

    # Strike alternativo con coma
    K_b = K_raw / 10.0           

    # Valor intrínseco con strike sin coma
    if inst == "call":
        VI_raw = max(S - K_raw, 0)
    else:
        VI_raw = max(K_raw - S, 0)

    tol_pct = 0.1  #tolerancia de que el valor intrinseco sea mayor a prima (deslizamientos de precios)

    # Filtro: nunca más de 3x el subyacente
    if K_raw > 3 * S:
        strike = K_b
        if inst == "call":
            VI = max(S - strike, 0)
        else:
            VI = max(strike - S, 0)

    # Condición de tolerancia sobre el strike sin coma
    elif ultimo >= (1 - tol_pct) * VI_raw:
        strike = float(K_raw)
        VI = VI_raw

    # Caso alternativo: usar strike con coma
    else:
        strike = K_b
        if inst == "call":
            VI = max(S - strike, 0)
        else:
            VI = max(strike - S, 0)

    VT = ultimo - VI
    return pd.Series([strike, VI, VT])
data_base_option[["strike", "valor_intrinseco", "valor_tiempo"]] = data_base_option.apply(
    extraer_strike, axis=1)


In [8]:
data_base_option["valor_tiempo"].describe()

count    97586.000000
mean       231.118884
std        180.161017
min       -118.300000
25%         76.550000
50%        201.770000
75%        335.000000
max        800.000000
Name: valor_tiempo, dtype: float64

In [9]:
data_base_option=data_base_option.sort_values(by="valor_tiempo",ascending=True)
data_base_option.head(40)

Unnamed: 0.1,Unnamed: 0,id,id_simbolo,ultimo_precio,ultimo_fecha,ultimo_size,fecha_insercion,bi_1_precio,bi_1_size,bi_2_precio,...,of_5_size,biof_fecha,insercion_dia,ev,nv,last_underlying,instrument,strike,valor_intrinseco,valor_tiempo
73469,71548,1599216804,MERV - XMEV - GFGV88783J - 24hs,1750.0,2025-05-05 18:16:40,1,2025-05-05 18:16:41.730,1641.0,10,0.0,...,0,2025-05-05 18:16:41.639,,875000,5,7010.0,put,8878.3,1868.3,-118.3
73479,82920,1599503942,MERV - XMEV - GFGV88783J - 24hs,1750.0,2025-05-05 18:16:40,1,2025-05-05 18:57:15.348,1671.0,10,1650.0,...,0,2025-05-05 18:57:15.159,,1050000,6,7010.0,put,8878.3,1868.3,-118.3
73478,76369,1599318851,MERV - XMEV - GFGV88783J - 24hs,1750.0,2025-05-05 18:16:40,1,2025-05-05 18:30:21.688,1661.0,10,1650.0,...,0,2025-05-05 18:30:21.562,,1050000,6,7010.0,put,8878.3,1868.3,-118.3
73477,74787,1599292521,MERV - XMEV - GFGV88783J - 24hs,1750.0,2025-05-05 18:16:40,1,2025-05-05 18:26:57.328,1661.0,10,1650.0,...,0,2025-05-05 18:26:57.247,,1050000,6,7010.0,put,8878.3,1868.3,-118.3
73476,82155,1599484370,MERV - XMEV - GFGV88783J - 24hs,1750.0,2025-05-05 18:16:40,1,2025-05-05 18:53:37.272,1651.0,10,1650.0,...,0,2025-05-05 18:53:37.193,,1050000,6,7010.0,put,8878.3,1868.3,-118.3
73475,74825,1599292679,MERV - XMEV - GFGV88783J - 24hs,1750.0,2025-05-05 18:16:40,1,2025-05-05 18:26:58.264,1661.0,10,1650.0,...,0,2025-05-05 18:26:58.177,,1050000,6,7010.0,put,8878.3,1868.3,-118.3
73474,74870,1599293256,MERV - XMEV - GFGV88783J - 24hs,1750.0,2025-05-05 18:16:40,1,2025-05-05 18:27:03.201,1661.0,10,1650.0,...,0,2025-05-05 18:27:03.122,,1050000,6,7010.0,put,8878.3,1868.3,-118.3
73473,71807,1599221442,MERV - XMEV - GFGV88783J - 24hs,1750.0,2025-05-05 18:16:40,1,2025-05-05 18:17:18.653,1641.0,10,0.0,...,0,2025-05-05 18:17:18.536,,1050000,6,7010.0,put,8878.3,1868.3,-118.3
73480,74534,1599290754,MERV - XMEV - GFGV88783J - 24hs,1750.0,2025-05-05 18:16:40,1,2025-05-05 18:26:43.150,1651.0,10,1650.0,...,0,2025-05-05 18:26:43.046,,1050000,6,7010.0,put,8878.3,1868.3,-118.3
73472,75439,1599300619,MERV - XMEV - GFGV88783J - 24hs,1750.0,2025-05-05 18:16:40,1,2025-05-05 18:28:04.411,1661.0,10,1650.0,...,0,2025-05-05 18:28:04.334,,1050000,6,7010.0,put,8878.3,1868.3,-118.3


In [None]:


def extraer_vencimiento(texto):
    base = re.search(r'([A-Z0-9]+)\s*-\s*24hs', texto)
    if base:
        ultimos = base.group(1)[-2:]  # últimos dos caracteres
        # Si ambos son letras → devolver ambos
        if ultimos.isalpha():
            return ultimos
        # Si uno es número y el otro letra → devolver solo la letra
        else:
            return ''.join([c for c in ultimos if c.isalpha()])
    return None

data_base["vencimiento"] = data_base["id_simbolo"].apply(extraer_vencimiento)
# data_base["vencimiento"].unique()