In [None]:
import pandas as pd
import numpy as np

In [None]:
dates = pd.date_range(start="2024-06-01", periods=5, freq='D')
data = {
    'AAPL': [130.41, 131.40, 130.85, 133.56, 133.94],
    'GOOGL': [2750.00, 2745.50, 2760.40, 2770.30, 2780.10],
    'MSFT': [310.50, 311.60, 309.90, 312.45, 313.70]
}
df = pd.DataFrame(data, index=dates)
df

In [None]:
features = pd.read_csv("../data/02_intermediate/price_data.csv")
prices = features.pivot(
    index="date", columns="stock_ticker", values="adj_close"
)
prices

In [None]:
signals = pd.read_csv(
    "../data/07_model_output/ml_technique_modeling/signals_concatenated"
)
signals_time_index = signals.set_index("timestamp")
signals_time_index.index = signals_time_index.index.astype("str")
signals_time_index

In [None]:
weights = pd.DataFrame(
    np.full(signals_time_index.shape, 1 / signals_time_index.shape[1]),
    index=signals_time_index.index,
    columns=signals_time_index.columns,
)
weights

In [None]:
def calc_portfolio_returns(
    prices: pd.DataFrame, weights: pd.DataFrame
) -> pd.DataFrame:
    returns = prices.pct_change(fill_method=None)
    portfolio_returns = (returns * weights).sum(axis=1)
    return portfolio_returns.to_frame(name="Portfolio Returns")

In [None]:
def adj_portfolio_returns(
    portfolio_returns: pd.DataFrame,
    signals: pd.DataFrame,
    trading_cost_params: dict[str, float],
) -> pd.DataFrame:

    bp_trading_cost = trading_cost_params["bp_trading_cost"] / 10000

    portfolio_returns.loc[:, "Cumulative Returns"] = (
        1 + portfolio_returns.loc[:, "Portfolio Returns"]
    ).cumprod()

    # Calculate the changes in trade signals (buy/sell)
    trade_signals = signals.diff().fillna(signals)  # TODO: Not sure whether that is correct to fill the values with the original signals since I would usually buy them before the signal is given on the closing price yday.
    # Calculate the position sizes at each time step based on weights and cumulative returns
    position_sizes = portfolio_returns.loc[:, "Cumulative Returns"].shift(1).fillna(1)
    investment_changes = trade_signals.multiply(position_sizes, axis=0).abs()
    # Calculate trading costs
    trading_costs = investment_changes * bp_trading_cost
    portfolio_returns.loc[:, "Total Trading Costs"] = trading_costs.sum(axis=1)
    portfolio_returns.loc[:, "Normalized Trading Costs"] = (
        portfolio_returns.loc[:, "Total Trading Costs"]
        / portfolio_returns.loc[:, "Cumulative Returns"]
    )
    portfolio_returns.loc[:, "Adjusted Portfolio Returns"] = portfolio_returns.loc[
        :, "Portfolio Returns"
    ] - portfolio_returns.loc[:, "Normalized Trading Costs"]
    return portfolio_returns

In [None]:
def mean_return(returns: pd.Series) -> float:
    """Calculate the mean return of returns."""
    return returns.mean()


def std_deviation(returns: pd.Series) -> float:
    """Calculate the standard deviation of returns."""
    return returns.std()


def cagr(returns: pd.Series) -> float:
    """Calculate the Compound Annual Growth Rate (CAGR) of returns."""
    returns.index = pd.to_datetime(returns.index)
    total_period = (returns.index[-1] - returns.index[0]).days / 365.25
    cumulative_return = (1 + returns).prod() - 1
    cagr_value = (1 + cumulative_return) ** (1 / total_period) - 1
    return cagr_value


def max_drawdown(returns: pd.Series) -> float:
    """Calculate the maximum drawdown of returns."""
    cumulative = (1 + returns).cumprod()
    peak = cumulative.cummax()
    drawdown = (cumulative - peak) / peak
    max_dd = drawdown.min()
    return max_dd


def calmar_ratio(returns: pd.Series) -> float:
    """Calculate the Calmar ratio of returns."""
    return cagr(returns) / abs(max_drawdown(returns))


def sharpe_ratio(returns: pd.Series, risk_free_rate: float = 0.0) -> float:
    """Calculate the Sharpe ratio of returns."""
    excess_returns = returns - risk_free_rate / 252
    return excess_returns.mean() / returns.std() * np.sqrt(252)


def calculate_performance_metrics(portfolio_returns: pd.DataFrame) -> pd.DataFrame:
    portfolio_returns = portfolio_returns.loc[:, "Adjusted Portfolio Returns"]
    return {
        "mean_return": mean_return(portfolio_returns),
        "variance_return": std_deviation(portfolio_returns),
        "sharpe_ratio": sharpe_ratio(portfolio_returns),
        "cagr": cagr(portfolio_returns),
        "max_drawdown": max_drawdown(portfolio_returns),
        "calmar_ratio": calmar_ratio(portfolio_returns),
        "sharpe_ratio": sharpe_ratio(portfolio_returns),
    }

In [None]:
from fpdf import FPDF


def dict_to_pdf(dictionary, pdf_filename):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.set_font("Arial", size=12)

    # Define the width of columns (total page width is 210mm for A4 size)
    col_width = pdf.w / 2.5
    row_height = pdf.font_size * 1.5

    # Add a title
    pdf.set_font("Arial", "B", 14)
    pdf.cell(0, 10, "Strategy Performance", 0, 1, "C")
    pdf.set_font("Arial", size=12)

    # Create the table
    for key, value in dictionary.items():
        pdf.cell(col_width, row_height, str(key), border=1)
        pdf.cell(col_width, row_height, str(round(value, 2)), border=1, ln=1)

    pdf.output(pdf_filename)
dict_to_pdf(returns, "performance_metrics.pdf")

In [None]:
def plot_columns(
    plotting_dataframe: pd.DataFrame, plotting_params: dict[str, list[str]]
):
    """
    Plots specified columns from a DataFrame based on the provided parameters.

    Parameters:
    plotting_dataframe (pd.DataFrame): DataFrame containing the data to plot.
    plotting_params (dict[str, list[str]]): Dictionary with a single key 'columns'
                                            and a list of column names to plot.
    """
    columns_to_plot = plotting_params.get("columns", [])

    plt.figure(figsize=(10, 6))

    for column in columns_to_plot:
        if column in plotting_dataframe.columns:
            plt.plot(plotting_dataframe[column], label=column)
        else:
            print(f"Warning: Column '{column}' not found in DataFrame.")

    plt.legend()
    plt.xlabel("Index")
    plt.ylabel("Value")
    plt.title("Plot of Specified Columns")
    plt.grid(True)
    return plt

In [None]:
return_df = calc_portfolio_returns(prices, weights)
adj_return_df = adj_portfolio_returns(
    portfolio_returns, signals_time_index, {"bp_trading_cost": 10}
)
plotting_params = {"columns": ["Portfolio Returns", "Adjusted Portfolio Returns"]}
plot = plot_columns(adj_return_df, plotting_params)