In [1]:
import pandas as pd
import os
import re
import numpy as np
import sys 
import config_options as cfg
chosen_day = cfg.chosen_day

In [2]:
def read_csv(dir, chosen_day):
    files = [f for f in os.listdir(dir) if chosen_day in f and f.endswith(".csv")]
    if files:
        full_path = os.path.join(dir, files[0])
        df=pd.read_csv(full_path)
        df=df.drop(df.columns[0],axis=1)
        return df
    else:
        print(f"Dia {chosen_day} no encontrado en {dir}. Revisa en la carpeta")
        return None

data_base_option = read_csv(cfg.dir_md_opciones, chosen_day)
data_base_underlying = read_csv(cfg.dir_md_subyacente, chosen_day)

In [None]:
data_base_option["ultimo_fecha"] = pd.to_datetime(data_base_option["ultimo_fecha"])
data_base_underlying["ultimo_fecha"] = pd.to_datetime(data_base_underlying["ultimo_fecha"])
data_base_underlying = data_base_underlying.rename(
    columns={
        'bi_1_precio':'bi_1_precio_underlying',
         'of_1_precio':'of_1_precio_underlying',
         'bi_1_size':'bi_1_size_underlying',
         'of_1_size':'of_1_size_underlying'
        }
    )


In [None]:

data_base_option = pd.merge_asof(
    data_base_option.sort_values("ultimo_fecha"),
    data_base_underlying[["ultimo_fecha", "last_underlying"]].sort_values("ultimo_fecha"),
    on="ultimo_fecha",
    direction="backward")











#n_nans = data_base_option["last_underlying"].isna().sum()
#print("Cantidad de NaN:",data_base_option["last_underlying"].isna().sum())

In [9]:
data_base_option["instrument"] = data_base_option["id_simbolo"].str.extract(r"GFG([CV])")
data_base_option["instrument"] = data_base_option["instrument"].map({"C": "call", "V": "put"})

In [None]:
def extraer_strike(row):
    match = re.search(r"GFG[CV](\d+)", row["id_simbolo"])
    if not match:
        return pd.Series([np.nan, np.nan, np.nan])

    K_raw = int(match.group(1))
    ultimo = float(row["ultimo_precio"])
    inst = row["instrument"]
    S = float(row["last_underlying"])   
    
    K_b = K_raw / 10.0    # Strike alternativo con coma
   

    if inst == "call":     # Valor intrínseco con strike sin coma

        VI_raw = max(S - K_raw, 0)
    else:
        VI_raw = max(K_raw - S, 0)

    tol_pct = 0.1  #tolerancia de que el valor intrinseco sea mayor a prima (deslizamientos de precios)

    if K_raw > 3 * S:    # Filtro: nunca más de 3x el subyacente
        strike = K_b
        if inst == "call":
            VI = max(S - strike, 0)
        else:
            VI = max(strike - S, 0)

    elif ultimo >= (1 - tol_pct) * VI_raw:      # Condición de tolerancia sobre el strike sin coma

        strike = float(K_raw)
        VI = VI_raw

    else:    # Caso alternativo: usar strike con coma
        strike = K_b
        if inst == "call":
            VI = max(S - strike, 0)
        else:
            VI = max(strike - S, 0)
    VT = ultimo - VI
    return pd.Series([strike, VI, VT])
data_base_option[["strike", "valor_intrinseco", "valor_tiempo"]] = data_base_option.apply(
    extraer_strike, axis=1)

In [None]:
data_base_option["valor_tiempo"].describe()

In [17]:
data_base_option

Unnamed: 0,id,id_simbolo,ultimo_precio,ultimo_fecha,ultimo_size,fecha_insercion,bi_1_precio,bi_1_size,bi_2_precio,bi_2_size,...,of_5_size,biof_fecha,insercion_dia,ev,nv,last_underlying,instrument,strike,valor_intrinseco,valor_tiempo
73469,1599216804,MERV - XMEV - GFGV88783J - 24hs,1750.00,2025-05-05 18:16:40,1,2025-05-05 18:16:41.730,1641.00,10,0.00,0,...,0,2025-05-05 18:16:41.639,,875000,5,7010.0,put,8878.3,1868.3,-118.30
73433,1599320859,MERV - XMEV - GFGV88783J - 24hs,1750.00,2025-05-05 18:16:40,1,2025-05-05 18:30:36.744,1671.00,10,1650.00,5,...,0,2025-05-05 18:30:36.638,,1050000,6,7010.0,put,8878.3,1868.3,-118.30
73432,1599242019,MERV - XMEV - GFGV88783J - 24hs,1750.00,2025-05-05 18:16:40,1,2025-05-05 18:19:37.855,1641.00,10,0.00,0,...,0,2025-05-05 18:19:37.775,,1050000,6,7010.0,put,8878.3,1868.3,-118.30
73511,1599689206,MERV - XMEV - GFGV88783J - 24hs,1750.00,2025-05-05 18:16:40,1,2025-05-05 19:22:37.608,1750.00,50,1671.00,10,...,0,2025-05-05 19:22:37.177,,1050000,6,7010.0,put,8878.3,1868.3,-118.30
73503,1599625729,MERV - XMEV - GFGV88783J - 24hs,1750.00,2025-05-05 18:16:40,1,2025-05-05 19:13:42.099,1661.00,10,1650.00,5,...,0,2025-05-05 19:13:41.901,,1050000,6,7010.0,put,8878.3,1868.3,-118.30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73297,1599351362,MERV - XMEV - GFGV7800JU - 24hs,799.99,2025-05-05 18:16:34,9,2025-05-05 18:34:13.746,780.00,1,771.01,6,...,2,2025-05-05 18:34:13.667,,29451401,372,7000.0,put,7800.0,800.0,-0.01
73289,1599216017,MERV - XMEV - GFGV7800JU - 24hs,799.99,2025-05-05 18:16:34,9,2025-05-05 18:16:36.174,771.00,5,770.80,5,...,20,2025-05-05 18:16:35.993,,29451401,372,7000.0,put,7800.0,800.0,-0.01
73337,1599245767,MERV - XMEV - GFGV7800JU - 24hs,799.99,2025-05-05 18:16:34,9,2025-05-05 18:20:03.557,771.00,5,770.90,2,...,2,2025-05-05 18:20:03.477,,29451401,372,7000.0,put,7800.0,800.0,-0.01
73270,1599325300,MERV - XMEV - GFGV7800JU - 24hs,799.99,2025-05-05 18:16:34,9,2025-05-05 18:31:08.154,771.01,6,771.00,5,...,2,2025-05-05 18:31:08.077,,29451401,372,7000.0,put,7800.0,800.0,-0.01


In [None]:
data_base_option=data_base_option.sort_values(by="valor_tiempo",ascending=True)
data_base_option=data_base_option[data_base_option["valor_tiempo"]<0]
print(data_base_option.shape)
data_base_option.head(100)


(1347, 35)


Unnamed: 0,id,id_simbolo,ultimo_precio,ultimo_fecha,ultimo_size,fecha_insercion,bi_1_precio,bi_1_size,bi_2_precio,bi_2_size,...,of_5_size,biof_fecha,insercion_dia,ev,nv,last_underlying,instrument,strike,valor_intrinseco,valor_tiempo
73469,1599216804,MERV - XMEV - GFGV88783J - 24hs,1750.0,2025-05-05 18:16:40,1,2025-05-05 18:16:41.730,1641.0,10,0.0,0,...,0,2025-05-05 18:16:41.639,,875000,5,7010.0,put,8878.3,1868.3,-118.3
73433,1599320859,MERV - XMEV - GFGV88783J - 24hs,1750.0,2025-05-05 18:16:40,1,2025-05-05 18:30:36.744,1671.0,10,1650.0,5,...,0,2025-05-05 18:30:36.638,,1050000,6,7010.0,put,8878.3,1868.3,-118.3
73432,1599242019,MERV - XMEV - GFGV88783J - 24hs,1750.0,2025-05-05 18:16:40,1,2025-05-05 18:19:37.855,1641.0,10,0.0,0,...,0,2025-05-05 18:19:37.775,,1050000,6,7010.0,put,8878.3,1868.3,-118.3
73511,1599689206,MERV - XMEV - GFGV88783J - 24hs,1750.0,2025-05-05 18:16:40,1,2025-05-05 19:22:37.608,1750.0,50,1671.0,10,...,0,2025-05-05 19:22:37.177,,1050000,6,7010.0,put,8878.3,1868.3,-118.3
73503,1599625729,MERV - XMEV - GFGV88783J - 24hs,1750.0,2025-05-05 18:16:40,1,2025-05-05 19:13:42.099,1661.0,10,1650.0,5,...,0,2025-05-05 19:13:41.901,,1050000,6,7010.0,put,8878.3,1868.3,-118.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73435,1599288115,MERV - XMEV - GFGV88783J - 24hs,1750.0,2025-05-05 18:16:40,1,2025-05-05 18:26:19.807,1651.0,10,1650.0,5,...,0,2025-05-05 18:26:19.718,,1050000,6,7010.0,put,8878.3,1868.3,-118.3
73437,1599571914,MERV - XMEV - GFGV88783J - 24hs,1750.0,2025-05-05 18:16:40,1,2025-05-05 19:06:09.048,1651.0,10,1650.0,5,...,0,2025-05-05 19:06:08.951,,1050000,6,7010.0,put,8878.3,1868.3,-118.3
73420,1599230422,MERV - XMEV - GFGV88783J - 24hs,1750.0,2025-05-05 18:16:40,1,2025-05-05 18:18:20.830,1641.0,10,0.0,0,...,0,2025-05-05 18:18:20.722,,1050000,6,7010.0,put,8878.3,1868.3,-118.3
73500,1599266075,MERV - XMEV - GFGV88783J - 24hs,1750.0,2025-05-05 18:16:40,1,2025-05-05 18:22:21.655,1641.0,10,0.0,0,...,0,2025-05-05 18:22:21.572,,1050000,6,7010.0,put,8878.3,1868.3,-118.3


In [None]:


def extraer_vencimiento(texto):
    base = re.search(r'([A-Z0-9]+)\s*-\s*24hs', texto)
    if base:
        ultimos = base.group(1)[-2:]  # últimos dos caracteres
        # Si ambos son letras → devolver ambos
        if ultimos.isalpha():
            return ultimos
        # Si uno es número y el otro letra → devolver solo la letra
        else:
            return ''.join([c for c in ultimos if c.isalpha()])
    return None

data_base["vencimiento"] = data_base["id_simbolo"].apply(extraer_vencimiento)
# data_base["vencimiento"].unique()