In [2]:
import pandas as pd
import numpy as np
import logging
from pathlib import Path
from typing import Literal, Optional

import pandas as pd
from entities import EM_CDS_TRACKER_DICT, FX_TRACKER_DICT
from utils import Backtest, load_trackers, clean_stale_prices
from pathlib import Path
from aamalha.utils.performance import Performance

# Trackers Statistics

In [None]:
fx = load_trackers(FX_TRACKER_DICT).rename(columns=lambda col: col + "_fx")
cds = load_trackers(EM_CDS_TRACKER_DICT).rename(columns=lambda col: col + "_cds")

trackers = pd.concat(
    [fx, cds],
    axis=1,
).fillna(method="ffill")
trackers = pd.concat(
    [clean_stale_prices(trackers[col].copy()) for col in trackers], axis=1
)
trackers

In [26]:
perf_trackers = Performance(trackers)

In [33]:
df_perf = perf_trackers.table.T.copy()
DICT_COLS = {
    "Return": "Retorno",
    "Vol": "Volatilidade",
    "Skew": "Assimetria",
    "Kurt": "Curtose",
    "Sharpe": "Sharpe",
    "Sortino": "Sortino",
    "DD 1%q": "DD (1%)",
    "Max DD": "Max DD",
    "Start Date": "Início",
}
df_perf.rename(
    columns=DICT_COLS, index=lambda idx: f"{idx[:3]} ({idx[4:].upper()})"
).to_clipboard()

In [48]:
DICT_COLS = {
    "count": "N obs",
    "mean": "Média",
    "std": "DP",
    "min": "Min",
    "max": "Max",
}
trackers_rolling_sharpe = perf_trackers.rolling_sharpe.describe().T
trackers_rolling_sharpe["Início"] = perf_trackers.rolling_sharpe.apply(
    lambda col: col.first_valid_index()
)
trackers_rolling_sharpe.rename(
    columns=DICT_COLS, index=lambda idx: f"{idx[:3]} ({idx[4:].upper()})"
).to_clipboard()

# Backtests

In [None]:
Path(
    r"C:\Users\pcampos\OneDrive - Insper - Instituto de Ensino e Pesquisa/Dissertação Mestrado/Analysis/backtests/VALUE-FX-PAIRED-1.xlsx"
).is_file()

In [None]:
FOLDER = Path(
    "C:/Users/pcampos/OneDrive - Insper - Instituto de Ensino e Pesquisa/Dissertação Mestrado/Analysis/backtests"
)
list_series = []
for file_path in FOLDER.glob("*PAIRED-*.xlsx"):
    file_path.is_file()
    s_backtest = pd.read_excel(file_path, index_col=0)["backtest"]
    s_backtest.name = file_path.stem
    list_series.append(s_backtest)
df_backtest = pd.concat(list_series, axis=1).sort_index(ascending=True)
df_backtest

# Estatísticas Descritivas dos  Trackers - Retornos Diários									

In [36]:
performance = Performance(df_backtest)

In [38]:
df_perf = performance.table.T.copy()
DICT_COLS = {
    "Return": "Retorno",
    "Vol": "Volatilidade",
    "Skew": "Assimetria",
    "Kurt": "Curtose",
    "Sharpe": "Sharpe",
    "Sortino": "Sortino",
    "DD 1%q": "DD (1%)",
    "Max DD": "Max DD",
    "Start Date": "Início",
}
df_perf.rename(columns=DICT_COLS).to_clipboard()

# Estatísticas Descritivas dos Portfólios - Sharpe Média Móvel 1 Ano														

In [None]:
performance.rolling_sharpe.describe().index

In [36]:
DICT_COLS = {
    "count": "N obs",
    "mean": "Média",
    "std": "DP",
    "min": "Min",
    "max": "Max",
}
performance.rolling_sharpe.describe().T.rename(columns=DICT_COLS).to_clipboard()

NameError: name 'performance' is not defined

# old

In [None]:
data = df_backtest.copy()
def export_latex_annual_returns(data: pd.DataFrame): 
    INDEX_NAME = "Ano"
    COLUMN_NAME = "Backtests"
    CAPTION = "Retornos Anuais dos Backtests"
    SOURCE = "Pedro Monzú, 2024"
    LABEL = "annual_returns"

    output = (
        "\\begin{table}[htbp]\n"
        "\t\centering\n"
        "\t\caption{%s}\n"
        "\n\t%s\n"
        "\t\\vspace{0.3cm}\n"
        "\t\label{tab:%s}\n"
        "\t\source{%s}\n"
        "\end{table}\n"
    )

    df_year_returns = (
        np.exp(np.log(data).diff(1).groupby(data.index.to_period("Y")).sum()) - 1
    )
    df_year_returns.index.name = INDEX_NAME
    df_year_returns.columns.name = COLUMN_NAME

    formatted_df = df_year_returns.applymap(
        lambda x: f"({abs(x):.2%})" if x < 0 else f"{x:.2%}"
    )
    formatted_df = formatted_df.replace(np.nan, "-")
    latex_output = formatted_df.to_latex(
        escape=False, column_format="c" * (len(formatted_df.columns) + 1)
    ).replace("%", "\%").replace("\n", "\n\t")
    return output % (CAPTION, latex_output, LABEL, SOURCE)

print(export_latex_annual_returns(data))

In [61]:
performance = Performance(data)


In [None]:
x = -1.2345
precision = 7
percentage = True
parentheses = False
formatted_string = f"{'('if parentheses else '-'}{abs(x):.{precision}{'%' if percentage else 'f'}}{')'if parentheses else ''}"
formatted_string

In [None]:
x = np.nan
x is np.nan

In [293]:
def float_to_string(value: float, pattern: str) -> str:
    # Parentheses
    parentheses == pattern[0] == "(" and pattern[-1] == ")"

    # Decimals
    match = re.search(r"\.(\d+)", pattern)
    decimals = len(match.group(1)) if match else 0

    # Percentage
    percentage = format_pos[-1] == "%"

    # Thousands Divisor
    pattern_aux = pattern[: -1 if percentage else None]
    match = re.search(r"(\,+)$", pattern_aux)
    divisor_thousands = len(match.group(1)) if match else 0

    # Integer part
    pattern_aux = pattern.split(".")[0].rstrip("%,")
    pos_last_zero = pattern_aux.rfind("0")
    integer_part = max(1, pattern_aux.count("0"))

    # Thousands Separator
    comma_separator = "," in pattern_aux  # TODO: improve the separators part

    # Adjusted Value
    adjusted_value = value / (1000 ** (divisor_thousands))

    # String Length
    decimal_length = decimals + 1 if decimals else 0
    percentage_length = 1 if percentage else 0
    value_aux = adjusted_value * (100 if percentage else 1)
    commas_length = int(np.log10(value_aux) // 3) if comma_separator else 0
    string_length = integer_part + decimal_length + percentage_length + commas_length

    formatted_value = f"{adjusted_value:0{string_length}{',' if comma_separator else ''}.{decimals}{'%' if percentage else 'f'}}"
    formatted_value = f"({formatted_value})" if parentheses else formatted_value
    return formatted_value

In [None]:
import re

x = 123456.78911111111
format_template = ",000,000,000.000%;(#,##0.00,%);-"


def formatter(value: float, format_template) -> str:
    formats = format_template.split(";")
    format_pos = formats[0]
    format_neg = format_pos if len(formats) < 2 else formats[1]
    format_missing = format_pos if len(formats) == 3 else ""

    if value is np.nan:
        return format_missing
    else:
        return float_to_string(value, format_pos if value >= 0 else format_neg)

formatter(x, format_template)

In [None]:
from typing import Optional, Union


def format_float(
    data: Union[pd.Series, pd.DataFrame],
    precision: Optional[int] = 0,
    percentage: bool = False,
    parentheses: bool = True,
    missing: [str] = "-",
):
    formatting = lambda x: (
        f"{'('if parentheses else '-'}"  # only if x <0 else ''
        f"{abs(x):.{precision}{'%' if percentage else 'f'}}"
        f"{')'if parentheses else ''}"  # only if x <0 else ''
    )
    return data.applymap(formatting).replace(np.nan, missing)


df_perf = performance.table.T.copy()
DICT_COLS = {
    "Return": "Retorno",
    "Vol": "Volatilidade",
    "Skew": "Assimetria",
    "Kurt": "Curtose",
    "Sharpe": "Sharpe",
    "Sortino": "Sortino",
    "DD 1%q": "DD (1%)",
    "Max DD": "Max DD",
    "Start Date": "Início",
}
COLS_FLOAT_PCT = [
    "Retorno",
    "Volatilidade",
    "Drawdown (1º Percentil)",
    "Máximo Drawdown",
]
COLS_FLOAT_DEFAULT = [
    "Índice de Sharpe",
    "Assimetria",
    "Curtose",
    "Índice de Sortino",
]
DATE_COLS = [
    "Início da Série",
]
df_perf[COLS_FLOAT_PCT] = format_float(
    df_perf[COLS_FLOAT_PCT], precision=2, percentage=True, parentheses=True, missing="-"
)
df_perf[COLS_FLOAT_DEFAULT] = format_float(
    df_perf[COLS_FLOAT_DEFAULT],
    precision=3,
    percentage=False,
    parentheses=True,
    missing="-",
)
df_perf = df_perf.rename(columns=DICT_COLS)
df_perf