In [5]:
import pandas as pd
import numpy as np

from pathlib import Path
from aamalha.utils.performance import Performance

In [None]:
paths = [
    r"C:\Users\pcampos\OneDrive - Insper - Institudo de Ensino e Pesquisa\Dissertação Mestrado\Analysis\backtests\L-EW.xlsx",
    r"C:\Users\pcampos\OneDrive - Insper - Institudo de Ensino e Pesquisa\Dissertação Mestrado\Analysis\backtests\TSMOM-1.xlsx",
]
list_series = []
for file_path in map(Path, paths):
    s_backtest = pd.read_excel(file_path, index_col=0)["backtest"]
    s_backtest.name = file_path.stem
    list_series.append(s_backtest)
df_backtest = pd.concat(list_series, axis=1).sort_index(ascending=True)
df_backtest

In [70]:
data = df_backtest.copy()
def export_latex_annual_returns(data: pd.DataFrame): 
    INDEX_NAME = "Ano"
    COLUMN_NAME = "Backtests"
    CAPTION = "Retornos Anuais dos Backtests"
    SOURCE = "Pedro Monzú, 2024"
    LABEL = "annual_returns"

    output = (
        "\\begin{table}[htbp]\n"
        "\t\centering\n"
        "\t\caption{%s}\n"
        "\n\t%s\n"
        "\t\\vspace{0.3cm}\n"
        "\t\label{tab:%s}\n"
        "\t\source{%s}\n"
        "\end{table}\n"
    )

    df_year_returns = (
        np.exp(np.log(data).diff(1).groupby(data.index.to_period("Y")).sum()) - 1
    )
    df_year_returns.index.name = INDEX_NAME
    df_year_returns.columns.name = COLUMN_NAME

    formatted_df = df_year_returns.applymap(
        lambda x: f"({abs(x):.2%})" if x < 0 else f"{x:.2%}"
    )
    formatted_df = formatted_df.replace(np.nan, "-")
    latex_output = formatted_df.to_latex(
        escape=False, column_format="c" * (len(formatted_df.columns) + 1)
    ).replace("%", "\%").replace("\n", "\n\t")
    return output % (CAPTION, latex_output, LABEL, SOURCE)

print(export_latex_annual_returns(data))

\begin{table}[htbp]
	\centering
	\caption{Retornos Anuais dos Backtests}

	\begin{tabular}{ccc}
	\toprule
	Backtests &      L-EW &   TSMOM-1 \\
	Ano  &           &           \\
	\midrule
	1999 &     4.19\% &   (6.03\%) \\
	2000 &  (10.51\%) &    18.69\% \\
	2001 &   (7.28\%) &    50.60\% \\
	2002 &    16.54\% &    39.32\% \\
	2003 &    36.96\% &    41.89\% \\
	2004 &    26.35\% &    32.26\% \\
	2005 &     2.40\% &     1.68\% \\
	2006 &    11.57\% &   (7.84\%) \\
	2007 &    20.46\% &    11.22\% \\
	2008 &  (19.48\%) &    36.45\% \\
	2009 &    12.20\% &     5.25\% \\
	2010 &     6.60\% &   (3.93\%) \\
	2011 &  (10.65\%) &   (0.81\%) \\
	2012 &     9.26\% &     4.63\% \\
	2013 &   (5.45\%) &   (4.84\%) \\
	2014 &  (12.53\%) &     2.07\% \\
	2015 &  (10.53\%) &    12.12\% \\
	2016 &    10.50\% &     1.35\% \\
	2017 &    24.13\% &    22.61\% \\
	2018 &  (10.64\%) &  (18.45\%) \\
	2019 &     5.26\% &   (7.39\%) \\
	2020 &  (13.31\%) &    36.41\% \\
	2021 &   (8.25\%) &  (10.63\%) \\
	2022 & 

  latex_output = formatted_df.to_latex(


In [61]:
performance = Performance(data)


In [87]:
x = -1.2345
precision = 7
percentage = True
parentheses = False
formatted_string = f"{'('if parentheses else '-'}{abs(x):.{precision}{'%' if percentage else 'f'}}{')'if parentheses else ''}"
formatted_string

'-123.4500000%'

In [285]:
import re

x = 123456.78911111111
format_template = ",000,000,000.000%;(#,##0.00,%);-"


formats = format_template.split(";")
format_pos = formats[0]
format_neg = format_pos if len(formats) < 2 else formats[1]
format_missing = format_pos if len(formats) == 3 else ""


pattern = format_pos

# Parentheses
parentheses == pattern[0] == "(" and pattern[-1] == ")"

# Decimals
match = re.search(r"\.(\d+)", pattern)
decimals = len(match.group(1)) if match else 0

# Percentage
percentage = format_pos[-1] == "%"

# Thousands Divisor
pattern_aux = pattern[: -1 if percentage else None]
match = re.search(r"(\,+)$", pattern_aux)
divisor_thousands = len(match.group(1)) if match else 0

# Integer part
pattern_aux = pattern.split(".")[0].rstrip("%,")
pos_last_zero = pattern_aux.rfind("0")
integer_part = max(1, pattern_aux.count("0"))

# Thousands Separator
comma_separator = "," in pattern_aux  # TODO: improve the separators part

# Adjusted Value
adjusted_value = x / (1000 ** (divisor_thousands))

# String Length
decimal_length = decimals + 1 if decimals else 0
percentage_length = 1 if percentage else 0
value_aux = adjusted_value * (100 if percentage else 1)
commas_length = int(np.log10(value_aux) // 3) if comma_separator else 0
string_length = integer_part + decimal_length + percentage_length + commas_length

formatted_value = f"{adjusted_value:0{string_length}{',' if comma_separator else ''}.{decimals}{'%' if percentage else 'f'}}"
formatted_value = f"({formatted_value})" if parentheses else formatted_value
formatted_value

'012,345,678.911%'

In [286]:
parentheses == pattern[0] == "(" and pattern[-1] == ")"
parentheses

False

In [89]:
from typing import Optional, Union


def format_float(
    data: Union[pd.Series, pd.DataFrame],
    precision: Optional[int] = 0,
    percentage: bool = False,
    parentheses: bool = True,
    missing: [str] = "-",
):
    formatting = lambda x: (
        f"{'('if parentheses else '-'}"  # only if x <0 else ''
        f"{abs(x):.{precision}{'%' if percentage else 'f'}}"
        f"{')'if parentheses else ''}"  # only if x <0 else ''
    )
    return data.applymap(formatting).replace(np.nan, missing)


df_perf = performance.table.T.copy()
DICT_COLS = {
    "Return": "Retorno",
    "Vol": "Volatilidade",
    "Sharpe": "Índice de Sharpe",
    "Skew": "Assimetria",
    "Kurt": "Curtose",
    "Sortino": "Índice de Sortino",
    "DD 1%q": "Drawdown (1º Percentil)",
    "Max DD": "Máximo Drawdown",
    "Start Date": "Início da Série",
}
COLS_FLOAT_PCT = [
    "Retorno",
    "Volatilidade",
    "Drawdown (1º Percentil)",
    "Máximo Drawdown",
]
COLS_FLOAT_DEFAULT = [
    "Índice de Sharpe",
    "Assimetria",
    "Curtose",
    "Índice de Sortino",
]
DATE_COLS = [
    "Início da Série",
]
df_perf = df_perf.rename(columns=DICT_COLS)
df_perf[COLS_FLOAT_PCT] = format_float(
    df_perf[COLS_FLOAT_PCT], precision=2, percentage=True, parentheses=True, missing="-"
)
df_perf[COLS_FLOAT_DEFAULT] = format_float(
    df_perf[COLS_FLOAT_DEFAULT],
    precision=3,
    percentage=False,
    parentheses=True,
    missing="-",
)
df_perf

Unnamed: 0_level_0,Retorno,Volatilidade,Índice de Sharpe,Assimetria,Curtose,Índice de Sortino,Drawdown (1º Percentil),Máximo Drawdown,Início da Série
Backtests,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
L-EW,(2.72%),(10.66%),(0.255),(0.738),(9.034),(0.315),(28.79%),(45.15%),1999-06-30
TSMOM-1,(8.73%),(11.27%),(0.775),(1.378),(25.352),(1.125),(22.49%),(31.71%),1999-06-30


In [90]:
performance.table.T.copy()

Unnamed: 0_level_0,Return,Vol,Sharpe,Skew,Kurt,Sortino,DD 1%q,Max DD,Start Date
Backtests,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
L-EW,0.027201,0.106556,0.255273,-0.737965,9.033851,0.314624,-0.287885,-0.451458,1999-06-30
TSMOM-1,0.087329,0.112738,0.774618,1.377698,25.351796,1.12528,-0.224892,-0.317134,1999-06-30
