In [44]:
import numpy as np
import pandas as pd
from plotly import express as px
import seaborn as sns

In [43]:
# =========================
# 2. LOAD RAW DATA
# =========================
df = pd.read_csv("../data/raw/XAUT_USD_history.csv")

df["date"] = pd.to_datetime(df["date"])
df = df.sort_values("date").reset_index(drop=True)

In [None]:
# =========================
# 3. FEATURE ENGINEERING
# =========================

def log_returns(series: pd.Series) -> pd.Series:
    return np.log(series / series.shift(1))


def reconstruct_price_from_log_returns(
    anchor_price: float, log_ret: pd.Series
) -> pd.Series:
    return anchor_price * np.exp(log_ret.cumsum())


# -----------------
# Rolling stats
# -----------------


def rolling_mean(x: pd.Series, window: int):
    return x.rolling(window).mean()


def rolling_std(x: pd.Series, window: int):
    return x.rolling(window).std(ddof=0)


def ema(x: pd.Series, span: int):
    return x.ewm(span=span, adjust=False).mean()


# -----------------
# Z-score
# -----------------


def rolling_zscore(x: pd.Series, window: int):
    mu = rolling_mean(x, window)
    sigma = rolling_std(x, window)
    sigma = sigma.replace(0, np.nan)
    return (x - mu) / sigma


def zscore_of_log_returns(close: pd.Series, window: int):
    r = log_returns(close)
    return rolling_zscore(r, window)


# -----------------
# Volatility
# -----------------


def rolling_volatility(close: pd.Series, window: int, annualize=False, periods=252):
    r = log_returns(close)
    vol = rolling_std(r, window)

    if annualize:
        vol *= np.sqrt(periods)

    return vol


def rolling_corr(x: pd.Series, y: pd.Series, window: int):
    return x.rolling(window).corr(y)


def rolling_cov(x: pd.Series, y: pd.Series, window: int):
    return x.rolling(window).cov(y)