# Distance

In [7]:
import pandas as pd
import numpy as np

# Carrega os dados
df_returns = pd.read_csv("../distance_results/Rp_ew_cc.csv")  # 'Return' e 'Semester'
df_rf = pd.read_csv("../distance_results/risk_free.csv")      # 'Return' e 'Semester'

# Renomeia a coluna da taxa livre de risco
df_rf = df_rf.rename(columns={"Return": "RiskFree"})

# Faz o merge
df = pd.merge(df_returns, df_rf, on="Semester")

# Função para calcular métricas
def calcular_metricas_completas(returns, risk_free, target=0.0):
    if len(returns) == 0 or returns.std() == 0:
        return pd.Series({m: np.nan for m in [
            "Sharpe", "Sortino", "Kappa_3", "Omega", "VaR_95", "CVaR_95",
            "Max_Drawdown", "Calmar", "Sterling", "Burke"
        ]})
    
    rf = risk_free.mean()
    excess_return = returns.mean() - rf
    cumulative = (1 + returns).cumprod()
    peak = cumulative.cummax()
    drawdowns = cumulative / peak - 1

    downside = returns[returns < target]
    downside_std = np.sqrt((downside**2).mean())
    lpm3 = ((np.maximum(target - returns, 0))**3).mean()
    var_95 = np.percentile(returns, 5)
    cvar_95 = returns[returns <= var_95].mean()
    max_dd = drawdowns.min()

    threshold = -0.10
    worst_dds = drawdowns[drawdowns < threshold]
    mean_dd = abs(worst_dds.mean()) if not worst_dds.empty else np.nan
    squared_dd_sum = (drawdowns[drawdowns < 0] ** 2).sum()
    burke_denom = np.sqrt(squared_dd_sum)

    return pd.Series({
        "Sharpe": excess_return / returns.std(),
        "Sortino": excess_return / downside_std,
        "Kappa_3": excess_return / (lpm3**(1/3)),
        "Omega": ((returns[returns > target] - target).sum()) / ((target - returns[returns < target]).sum()),
        "VaR_95": var_95,
        "CVaR_95": cvar_95,
        "Max_Drawdown": max_dd,
        "Calmar": (returns.mean() / abs(max_dd)) if max_dd != 0 else np.nan,
        "Sterling": (returns.mean() / mean_dd) if mean_dd != 0 else np.nan,
        "Burke": (returns.mean() / burke_denom) if burke_denom != 0 else np.nan
    })

# Loop por semestre
metricas_list = []
for semestre in df["Semester"].unique():
    grupo = df[df["Semester"] == semestre]
    metricas = calcular_metricas_completas(grupo["Return"], grupo["RiskFree"])
    metricas["Semester"] = semestre
    metricas_list.append(metricas)

# Junta tudo
distance_metricas_por_semestre = pd.DataFrame(metricas_list)

# Visualiza
distance_metricas_por_semestre = distance_metricas_por_semestre[["Semester"] + [col for col in distance_metricas_por_semestre.columns if col != "Semester"]]

distance_metricas_por_semestre.to_csv(f"../distance_results/distance_riskfree_measures.csv")
distance_metricas_por_semestre

Unnamed: 0,Semester,Sharpe,Sortino,Kappa_3,Omega,VaR_95,CVaR_95,Max_Drawdown,Calmar,Sterling,Burke
0,0.0,0.002107,0.001709,0.002233,1.401661,-0.002793,-0.003983,-0.835068,0.000246,0.000543,3.949147e-06
1,1.0,0.065031,0.06902,0.080126,1.50687,-0.003474,-0.003848,-0.629577,0.000582,0.0011,1.018247e-05
2,2.0,0.097127,0.098643,0.111297,1.620543,-0.003346,-0.004117,-0.640956,0.000613,0.001346,1.285842e-05
3,3.0,0.059322,0.056391,0.063566,1.422801,-0.003226,-0.004159,-0.786263,0.000337,0.000619,5.845243e-06
4,4.0,-0.164571,-0.151013,-0.158269,0.756228,-0.003436,-0.005273,-0.989481,-0.000263,-0.000334,-2.54292e-06
5,5.0,-0.000123,-0.00013,-0.000141,1.100831,-0.0043,-0.00576,-0.994133,0.000116,0.000166,1.343996e-06
6,6.0,0.075768,0.075561,0.078633,1.391735,-0.003642,-0.005347,-0.951031,0.000322,0.000567,4.860276e-06
7,7.0,0.249732,0.284753,0.318088,2.279066,-0.002633,-0.004123,-0.578889,0.001193,0.002521,3.058401e-05
8,8.0,0.047885,0.051164,0.053872,1.342476,-0.004316,-0.005917,-0.966929,0.000336,0.000532,4.435771e-06
9,9.0,0.026234,0.028166,0.032195,1.370027,-0.003741,-0.004149,-0.873417,0.00032,0.000651,5.554741e-06


# Cointegration

In [12]:
# Carregar os dados
df = pd.read_csv("../cointegration_results/operations.csv")  

# Garantir tipos corretos
df["Retorno total"] = pd.to_numeric(df["Retorno total"], errors="coerce")
df["Dias"] = pd.to_numeric(df["Dias"], errors="coerce")
df["Semestre"] = pd.to_numeric(df["Semestre"], errors="coerce")

# Retorno diário
df["Retorno diário"] = df["Retorno total"] / df["Dias"]

# Parâmetros
risk_free_rate = 0.0
target_return = 0.0
confidence_level = 0.95

# Função para calcular métricas de risco
def calcular_metricas(daily_returns):
    if len(daily_returns) < 2:
        return [np.nan]*10

    sharpe = (daily_returns.mean() - risk_free_rate) / daily_returns.std()

    downside = daily_returns[daily_returns < target_return]
    sortino = (daily_returns.mean() - target_return) / downside.std() if downside.std() > 0 else np.nan

    downside_m3 = ((target_return - downside) ** 3).mean()
    kappa_3 = (daily_returns.mean() - target_return) / (downside_m3 ** (1/3)) if downside_m3 > 0 else np.nan

    pos = daily_returns[daily_returns > target_return] - target_return
    neg = target_return - daily_returns[daily_returns < target_return]
    omega = pos.sum() / neg.sum() if neg.sum() > 0 else np.nan

    var = np.percentile(daily_returns, 100 * (1 - confidence_level))
    cvar = daily_returns[daily_returns <= var].mean()

    cumulative = (1 + daily_returns).cumprod()
    max_run = cumulative.cummax()
    drawdown = (cumulative - max_run) / max_run
    max_dd = drawdown.min()

    ann_return = daily_returns.mean() * 252
    calmar = ann_return / abs(max_dd) if max_dd < 0 else np.nan

    dd_threshold = drawdown[drawdown < -0.10]
    avg_dd = dd_threshold.abs().mean()
    sterling = ann_return / avg_dd if avg_dd > 0 else np.nan

    burke_den = np.sqrt(np.sum(drawdown[drawdown < 0] ** 2))
    burke = ann_return / burke_den if burke_den > 0 else np.nan

    return [sharpe, sortino, kappa_3, omega, var, cvar, max_dd, calmar, sterling, burke]

# Inicializa DataFrame de resultados
resultados = []

# Agrupar por semestre e calcular métricas
for semestre, grupo in df.groupby("Semestre"):
    retornos = grupo["Retorno diário"].dropna()
    metrics = calcular_metricas(retornos)
    resultados.append([semestre] + metrics)

# Converter para DataFrame
colunas = ["Semestre", "Sharpe Ratio", "Sortino Ratio", "Kappa 3", "Omega Ratio", 
           "VaR (95%)", "CVaR (95%)", "Maximum Drawdown", "Calmar Ratio", 
           "Sterling Ratio", "Burke Ratio"]

cointegration_df_metricas = pd.DataFrame(resultados, columns=colunas)

# Ordenar por semestre
cointegration_df_metricas = cointegration_df_metricas.sort_values(by="Semestre").reset_index(drop=True)

cointegration_df_metricas.to_csv(f"../cointegration_results/cointegration_riskfree_measures.csv")
cointegration_df_metricas

Unnamed: 0,Semestre,Sharpe Ratio,Sortino Ratio,Kappa 3,Omega Ratio,VaR (95%),CVaR (95%),Maximum Drawdown,Calmar Ratio,Sterling Ratio,Burke Ratio
0,0,0.84283,1.525586,0.95376,17.686955,-0.001203,-0.005334,-0.011737,116.041328,,68.309715
1,1,0.616479,1.606243,0.977209,13.549178,-0.002141,-0.004424,-0.01161,91.5858,,67.815249
2,2,0.354446,0.20591,0.126769,3.646518,-0.00392,-0.019926,-0.067305,12.576653,,4.029266
3,3,0.684024,1.804936,0.968705,10.360671,-0.002787,-0.00401,-0.005091,153.525201,,62.199855
4,4,0.608663,0.882979,0.554749,10.564387,-0.001639,-0.005908,-0.014437,65.781433,,40.786471
5,11,0.773297,3.347027,1.81454,32.422642,-0.000832,-0.002189,-0.004577,245.731135,,187.278213
6,12,0.674902,0.69986,0.425069,10.793255,-0.001609,-0.005207,-0.021011,37.985497,,27.084542
7,14,0.463643,0.478527,0.283372,4.102042,-0.005242,-0.01091,-0.024845,24.752194,,7.698903
8,18,0.599058,0.810248,0.479459,8.292675,-0.002817,-0.006959,-0.017618,49.078647,,21.375763
9,19,0.761219,2.228343,1.217984,15.652488,-0.001824,-0.003166,-0.006128,144.061582,,64.753045
