In [None]:
import sys
import os
sys.path.append(os.path.abspath('../../'))

from rcgp.morcgp import MOGPRegressor, MORCGPRegressor, MOGPRegressor_NC, MORCGPRegressor_NC, MORCGPRegressor_NC_fixed_weights, MORCGPRegressor_fixed_weights, MORCGPRegressor_PM
from rcgp.kernels import ConstantMean, RBFKernel, SineMean
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import datetime
import matplotlib.dates as mdates

plt.rcParams.update({
    "text.usetex": True,         
    "font.family": "serif",       
    "text.latex.preamble": r"\usepackage{amsmath}",
    'font.size': 24,         
    'axes.labelsize': 24,    
    'xtick.labelsize': 24,   
    'ytick.labelsize': 24,  
    'legend.fontsize': 20,
    'lines.linewidth': 3,    
    'lines.markersize': 6  
})

In [None]:
def generate_A(d, r=1, base_strength=1.0, noise_level=0.1, seed=None):
    if seed is not None:
        np.random.seed(seed)
    shared_component = base_strength * np.ones((d, r))
    noise = noise_level * np.random.randn(d, r)
    A = shared_component + noise
    return A

# Fetching data and preprocessing

In [None]:
import yfinance as yf
import pandas as pd

tickers = ["^DJI", "^GSPC", "^IXIC"]
start_date = "2024-01-01"
end_date = "2025-01-01"

data = yf.download(
    tickers,
    start=start_date,
    end=end_date,
    interval="1d",
    group_by="ticker",
    auto_adjust=True
)

close_df = pd.DataFrame()

for ticker in tickers:
    close_df[ticker.replace('^', '')] = data[ticker]['Close']

close_df.index = pd.to_datetime(close_df.index)

print(close_df.head())


In [None]:
close_df.iloc[183]

In [None]:
def add_focused_outliers(Y, begin_idx, num_outliers, y_value):
    """
    Replace `num_outliers` values in the first column of Y starting from `begin_idx` with `y_value`.

    Parameters:
    Y (np.ndarray): Original N x D array
    begin_idx (int): Starting index for the outliers
    num_outliers (int): Number of outliers to insert
    y_value (float): The value to use for the outliers

    Returns:
    np.ndarray: Modified array with outliers
    """
    Y_with_outliers = Y.copy()
    end_idx = min(begin_idx + num_outliers, Y.shape[0]) 
    Y_with_outliers[begin_idx:end_idx, 0] = y_value
    return Y_with_outliers

def vshaped_outliers(Y, begin_idx, y_values):
    """
    Replace values in the first column of Y starting from `begin_idx` with 
    the original value multiplied by corresponding values in `y_values`.

    Parameters:
    Y (np.ndarray): Original N x D array
    begin_idx (int): Starting index for modifying values
    y_values (list of float): List of fractions to multiply with the original values

    Returns:
    np.ndarray: Modified array with V-shaped outliers
    """
    Y_modified = Y.copy()
    end_idx = min(begin_idx + len(y_values), Y.shape[0]) 
    num_replace = end_idx - begin_idx

    y_values_arr = np.array(y_values[:num_replace])

    Y_modified[begin_idx:end_idx, 0] *= y_values_arr

    return Y_modified

In [None]:
clean_df = close_df.dropna()

N = clean_df.shape[0]

X = np.arange(1, N + 1).reshape(-1, 1)
X_test = np.linspace(1, N+1, 1000).reshape(-1, 1)

Y = clean_df.to_numpy()

print("X shape:", X.shape)  # (N, 1)
print("Y shape:", Y.shape)  # (N, 4)

# Y_with_outliers = add_focused_outliers(Y, 120, 10, 35_000)
Y_with_outliers = vshaped_outliers(Y=Y, begin_idx=180, y_values=[0.99, 0.98, 0.96, 0.9, 0.92, 0.94, 0.96])


# PLOTTING
fig, axes = plt.subplots(3, 1, figsize=(10, 9), sharex=True)

for i in range(3):
    axes[i].plot(X, Y_with_outliers[:, i], 'o', color='black', alpha=0.6)
    axes[i].set_ylabel(f"Series {i+1}")

axes[-1].set_xlabel("Time Index")

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()


In [None]:
scaler_Y_outliers = StandardScaler()
Y_with_outliers_standardized = scaler_Y_outliers.fit_transform(Y_with_outliers)

scaler_Y = StandardScaler()
Y_standardized = scaler_Y.fit_transform(Y)

In [None]:
corr_matrix = np.corrcoef(Y_with_outliers.T)

plt.figure(figsize=(6, 5))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1,
            xticklabels=[f'Series {i+1}' for i in range(3)],
            yticklabels=[f'Series {i+1}' for i in range(3)])
plt.title("Correlation Matrix of Time Series with Outliers")
plt.tight_layout()
plt.show()

## MOGP

In [None]:
initial_A = generate_A(d = 3, r = 2)
mogp = MOGPRegressor(mean=0, length_scale=3.4, noise=np.array([0.02]*3), A=initial_A)
mogp.fit(X, Y_with_outliers_standardized)
print('fitted data')
# Optimize hyperparameters
mogp.optimize_hyperparameters(print_opt_param=True, print_iter_param=True)
optim_lengthscale = mogp.length_scale
optim_noise = mogp.noise
optim_A = mogp.A
optim_B = optim_A @ optim_A.T

mu_mogp_standardized, var_mogp_standardized = mogp.predict(X_test)
# std_mogp = np.sqrt(var_mogp + mogp.noise)

# PLOTTING
fig, axes = plt.subplots(3, 1, figsize=(10, 9), sharex=True)

for i in range(3):
    axes[i].plot(X, Y_with_outliers_standardized[:, i], 'o', color='black', alpha=0.6)
    axes[i].plot(X_test, mu_mogp_standardized[:, i], '-', color='green')
    axes[i].set_ylabel(f"Series {i+1}")

axes[-1].set_xlabel("Time Index")

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

In [None]:
morcgp = MORCGPRegressor_fixed_weights(mean = 0, length_scale=optim_lengthscale, noise = optim_noise, A=optim_A)
predictive_mean, predictive_variances = morcgp.fit(X, Y_with_outliers_standardized, B_weighted=optim_B, noise_weighted=optim_noise)

predictive_mean, predictive_variances = morcgp.optimize_loo_cv(weighted=True, print_opt_param = True, print_iter_param=True, update_weights=True)

mu_morcgp_standardized, var_morcgp_standardized = morcgp.predict(X_test)
# std_morcgp = np.sqrt(var_morcgp + morcgp.noise)

In [None]:
morcgp_pm = MORCGPRegressor_PM(mean = 0, length_scale=3.5, noise = optim_noise, A=optim_A, epsilons=np.array([7/252, 0, 0]))
morcgp_pm.fit(X, Y_with_outliers_standardized)

# print(morcgp.loo_cv(length_scale=0.1, noise=0.04, A=A, weighted=True, B_weighted=B))

# Optimize hyperparameters
morcgp_pm.optimize_loo_cv(weighted=True, print_opt_param = True, print_iter_param=True, 
                    #    B_weighted=optim_B, noise_weighted=optim_noise
                       )


mu_pm_standardized, _ = morcgp_pm.predict(X_test)

In [None]:
mu_mogp = scaler_Y_outliers.inverse_transform(mu_mogp_standardized)
mu_morcgp = scaler_Y_outliers.inverse_transform(mu_morcgp_standardized)
mu_pm = scaler_Y_outliers.inverse_transform(mu_pm_standardized)

morcgp_w01 = morcgp.w01.reshape(3,-1).T
pm_w01 = morcgp_pm.w01.reshape(3,-1).T

In [None]:
# PLOTTING
fig, axes = plt.subplots(3, 1, figsize=(10, 9), sharex=True)

# Plot each Y column in its respective subplot
for i in range(3):
    axes[i].plot(X, Y_with_outliers[:, i], 'o', color='black', alpha=0.6)
    axes[i].plot(X_test, mu_pm[:, i], '-', color='Orange')
    axes[i].plot(X_test, mu_mogp[:, i], '-', color='Green')
    axes[i].plot(X_test, mu_morcgp[:, i], '-', color='RoyalBlue')
    axes[i].set_ylabel(f"Series {i+1}")

# Set common x-label
axes[-1].set_xlabel("Time Index")

# Adjust layout
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

In [None]:
def plot_predictions(X, Y_with_outliers, mu_mogp, mu_morcgp, mu_pm, w_morcgp, w_pm, X_test):
    N = X.shape[0]
    N_test = X_test.shape[0]

    start_date = datetime.date(2024, 1, 2)
    dates = []
    current_date = start_date
    while len(dates) < N:
        if current_date.weekday() < 5:
            dates.append(current_date)
        current_date += datetime.timedelta(days=1)

    quarters = [1, 4, 7, 10]
    seen = set()
    tick_dates = []
    tick_positions = []
    for i, date in enumerate(dates):
        key = (date.year, date.month)
        if date.month in quarters and key not in seen:
            tick_dates.append(date)
            tick_positions.append(i) 
            seen.add(key)

    tick_labels = [d.strftime('%b') for d in tick_dates]
    exclude_slice = slice(180, 187)

    fig, axs = plt.subplots(4, 1, figsize=(12, 10), sharex=True,
                            gridspec_kw={'height_ratios': [2, 0.4, 0.75, 0.75]})

    y_lims = [(36_000, 48_000), (0, 1), (4500, 6200), (14_000, 21_000)]
    y_ticks = [[37500, 42500], None, [4700, 5500], [15000, 20000]]
    labels = [r'DJIA', r'$w_{\scriptscriptstyle 1}(\mathbf{x}, \mathbf{y})$', r'S\&P500', r'NASDAQ']

    i = 0
    mean_val = np.mean(Y_with_outliers[:, i])
    axs[i].axhline(mean_val, color='grey', linestyle='--', linewidth=3)

    non_outlier_idx = [j for j in range(N) if j not in range(exclude_slice.start, exclude_slice.stop)]
    axs[i].plot(
        non_outlier_idx,
        Y_with_outliers[[j-1 for j in non_outlier_idx], i],
        'o', color='black', alpha=0.5
    )
    outlier_idx = list(range(exclude_slice.start, exclude_slice.stop))
    axs[i].plot(
        outlier_idx,
        Y_with_outliers[outlier_idx, i],
        'x', color='red', markersize=8,
    )

    axs[i].plot(X_test.flatten(), mu_mogp[:, i], '-', color='DarkOrange', label='MOGP')
    axs[i].plot(X_test.flatten(), mu_pm[:, i], '-', color='Green', label='PM')
    axs[i].plot(X_test.flatten(), mu_morcgp[:, i], '-', color='RoyalBlue', label='MORCGP')

    axs[i].set_ylabel(labels[i], labelpad=15)
    axs[i].set_xticks(tick_positions)
    axs[i].set_xticklabels(tick_labels)
    axs[i].set_xlim(0, N - 1)
    axs[i].set_ylim(y_lims[i])
    axs[i].set_yticks(y_ticks[i])


    i = 1
    axs[i].plot(X.flatten(), w_pm[:, 0], '--', color='Green', linewidth=3, label='PM')
    axs[i].plot(X.flatten(), w_morcgp[:, 0], '--', color='RoyalBlue', linewidth=3, label='MORCGP')
    axs[i].set_ylabel(labels[i], labelpad=15)
    axs[i].set_xticks(tick_positions)
    axs[i].set_xticklabels(tick_labels)
    axs[i].set_xlim(0, N - 1)
    axs[i].set_ylim(y_lims[i])
    if y_ticks[i]:
        axs[i].set_yticks(y_ticks[i])


    i = 2
    mean_val = np.mean(Y_with_outliers[:, i - 1])
    axs[i].axhline(mean_val, color='grey', linestyle='--')
    axs[i].plot(X.flatten(), Y_with_outliers[:, 1], 'o', color='black', alpha=0.5)
    axs[i].plot(X_test.flatten(), mu_mogp[:, 1], '-', color='DarkOrange')
    axs[i].plot(X_test.flatten(), mu_pm[:, 1], '-', color='Green')
    axs[i].plot(X_test.flatten(), mu_morcgp[:, 1], '-', color='RoyalBlue')
    axs[i].set_ylabel(labels[i], labelpad=15)
    axs[i].set_xticks(tick_positions)
    axs[i].set_xticklabels(tick_labels)
    axs[i].set_xlim(0, N - 1)
    axs[i].set_ylim(y_lims[i])
    axs[i].set_yticks(y_ticks[i])


    i = 3
    mean_val = np.mean(Y_with_outliers[:, i - 1])
    axs[i].axhline(mean_val, color='grey', linestyle='--')
    axs[i].plot(X.flatten(), Y_with_outliers[:, 2], 'o', color='black', alpha=0.5)
    axs[i].plot(X_test.flatten(), mu_mogp[:, 2], '-', color='DarkOrange')
    axs[i].plot(X_test.flatten(), mu_pm[:, 2], '-', color='Green')
    axs[i].plot(X_test.flatten(), mu_morcgp[:, 2], '-', color='RoyalBlue')
    axs[i].set_ylabel(labels[i], labelpad=15)
    axs[i].set_xticks(tick_positions)
    axs[i].set_xticklabels(tick_labels)
    axs[i].set_xlim(0, N - 1)
    axs[i].set_ylim(y_lims[i])
    axs[i].set_yticks(y_ticks[i])

    axs[-1].set_xlabel("Date", labelpad=20)
    fig.align_ylabels(axs)

    fig_height = 0.95  
    top = 0.95
    bottom = 0.05
    gap_large = 0.05
    gap_small = 0.01

    h1 = 0.25
    h2 = 0.07
    h3 = 0.125
    h4 = 0.125

    b1 = top - h1
    b2 = b1 - gap_small - h2
    b3 = b2 - gap_large - h3
    b4 = b3 - gap_large - h4

    axs[0].set_position([0.1, b1, 0.85, h1])
    axs[1].set_position([0.1, b2, 0.85, h2])
    axs[2].set_position([0.1, b3, 0.85, h3])
    axs[3].set_position([0.1, b4, 0.85, h4])

    plt.show()
    # fig.savefig("./financial_MOGP_weights.pdf", format='pdf', bbox_inches='tight')

plot_predictions(X=X, Y_with_outliers=Y_with_outliers, mu_mogp=mu_mogp, mu_morcgp=mu_morcgp, mu_pm=mu_pm, w_morcgp=morcgp_w01, w_pm=pm_w01, X_test=X_test)

In [None]:
import matplotlib.pyplot as plt
import datetime
import numpy as np
from mpl_toolkits.axes_grid1.inset_locator import inset_axes, mark_inset

def plot_predictions(X, Y_with_outliers, mu_mogp, mu_morcgp, mu_pm, w_morcgp, w_pm, X_test):
    N = X.shape[0]
    N_test = X_test.shape[0]

    start_date = datetime.date(2024, 1, 2)
    dates = []
    current_date = start_date
    while len(dates) < N:
        if current_date.weekday() < 5:
            dates.append(current_date)
        current_date += datetime.timedelta(days=1)

    quarters = [1, 4, 7, 10]
    seen = set()
    tick_dates = []
    tick_positions = []
    for i, date in enumerate(dates):
        key = (date.year, date.month)
        if date.month in quarters and key not in seen:
            tick_dates.append(date)
            tick_positions.append(i)
            seen.add(key)

    tick_labels = [d.strftime('%b') for d in tick_dates]
    exclude_slice = slice(180, 187)

    fig, axs = plt.subplots(4, 1, figsize=(10, 12), sharex=True,
                            gridspec_kw={'height_ratios': [2.5, 0.4, 0.3, 0.3]})

    y_lims = [(37_000, 48_000), (0, 1), (4500, 6200), (14_000, 21_000)]
    y_ticks = [[38000, 42500, 47000], [0, 1], [4800, 5800], [15000, 20000]]
    labels = [r'DJIA', r'$\breve{w}_{\scriptscriptstyle 1}(\mathbf{x}, \mathbf{y})$', r'S\&P500', r'NASDAQ']


    i = 0
    mean_val = np.mean(Y_with_outliers[:, i])
    axs[i].axhline(mean_val, color='grey', linestyle='--', linewidth=3)

    non_outlier_idx = [j for j in range(N) if j not in range(exclude_slice.start, exclude_slice.stop)]
    axs[i].plot(
        non_outlier_idx[1:],
        Y_with_outliers[[j-1 for j in non_outlier_idx[1:]], i],
        'o', color='black', alpha=0.5
    )
    outlier_idx = list(range(exclude_slice.start, exclude_slice.stop))
    axs[i].plot(
        outlier_idx,
        Y_with_outliers[outlier_idx, i],
        'x', color='red', markersize=8,
    )
    axs[i].plot(X_test.flatten(), mu_mogp[:, i], '-', color='DarkOrange', label='MOGP')
    axs[i].plot(X_test.flatten(), mu_pm[:, i], '-', color='Green', label=r'MORCGP ($w_{\scriptscriptstyle \textup{RCGP}}$)')
    axs[i].plot(X_test.flatten(), mu_morcgp[:, i], '-', color='RoyalBlue', label=r'MORCGP ($w_{\scriptscriptstyle \textup{MORCGP}}$)')

    axins = inset_axes(
        axs[0],
        width=0.8,
        height=1.8,
        loc='upper center',
        bbox_to_anchor=(0.45, 1),
        bbox_transform=axs[0].transAxes
    )
    axins.axhline(mean_val, color='grey', linestyle='--', linewidth=2)
    axins.plot(
        non_outlier_idx,
        Y_with_outliers[[j for j in non_outlier_idx], 0],
        'o', color='black', alpha=0.5
    )
    axins.plot(
        outlier_idx,
        Y_with_outliers[outlier_idx, 0],
        'x', color='red', markersize=8,
    )
    axins.plot(X_test.flatten(), mu_mogp[:, 0], '-', color='DarkOrange')
    axins.plot(X_test.flatten(), mu_pm[:, 0], '-', color='Green')
    axins.plot(X_test.flatten(), mu_morcgp[:, 0], '-', color='RoyalBlue')
    axins.set_xlim(177, 191)
    axins.set_ylim(37500, 41000)
    axins.set_xticks([])
    axins.set_yticks([])
    mark_inset(axs[0], axins, loc1=3, loc2=1, fc="none", ec="0.5")

    axs[i].set_ylabel(labels[i], labelpad=15)
    axs[i].set_xticks(tick_positions)
    axs[i].set_xticklabels(tick_labels)
    axs[i].set_xlim(0, N - 1)
    axs[i].set_ylim(y_lims[i])
    axs[i].set_yticks(y_ticks[i])
    axs[i].legend(loc='upper left', frameon=False)

    i = 1
    axs[i].plot(X.flatten(), w_pm[:, 0], '-', color='Green', linewidth=3, label='PM')
    axs[i].plot(X.flatten(), w_morcgp[:, 0], '-', color='RoyalBlue', linewidth=3, label='MORCGP')

    axs[i].set_ylabel(labels[i], labelpad=1, rotation=0, ha='right', va='center', fontsize=18)

    axs[i].tick_params(axis='y', labelsize=18)

    axs[i].set_xticks(tick_positions)
    axs[i].set_xticklabels(tick_labels)
    axs[i].set_xlim(0, N - 1)
    axs[i].set_ylim(y_lims[i])
    if y_ticks[i]:
        axs[i].set_yticks(y_ticks[i])



    i = 2
    mean_val = np.mean(Y_with_outliers[:, i - 1])
    axs[i].axhline(mean_val, color='grey', linestyle='--')
    axs[i].plot(X.flatten(), Y_with_outliers[:, 1], 'o', color='black', alpha=0.5)
    axs[i].plot(X_test.flatten(), mu_mogp[:, 1], '-', color='DarkOrange')
    axs[i].plot(X_test.flatten(), mu_pm[:, 1], '-', color='Green')
    axs[i].plot(X_test.flatten(), mu_morcgp[:, 1], '-', color='RoyalBlue')
    axs[i].set_ylabel(labels[i], labelpad=15)
    axs[i].set_xticks(tick_positions)
    axs[i].set_xticklabels(tick_labels)
    axs[i].set_xlim(0, N - 1)
    axs[i].set_ylim(y_lims[i])
    axs[i].set_yticks(y_ticks[i])


    i = 3
    mean_val = np.mean(Y_with_outliers[:, i - 1])
    axs[i].axhline(mean_val, color='grey', linestyle='--')
    axs[i].plot(X.flatten(), Y_with_outliers[:, 2], 'o', color='black', alpha=0.5)
    axs[i].plot(X_test.flatten(), mu_mogp[:, 2], '-', color='DarkOrange')
    axs[i].plot(X_test.flatten(), mu_pm[:, 2], '-', color='Green')
    axs[i].plot(X_test.flatten(), mu_morcgp[:, 2], '-', color='RoyalBlue')
    axs[i].set_ylabel(labels[i], labelpad=15)
    axs[i].set_xticks(tick_positions)
    axs[i].set_xticklabels(tick_labels)
    axs[i].set_xlim(0, N - 1)
    axs[i].set_ylim(y_lims[i])
    axs[i].set_yticks(y_ticks[i])

    axs[-1].set_xlabel("Date (2024)", labelpad=20)
    labels_to_align = [0,2,3]
    fig.align_ylabels(axs[labels_to_align])


    fig_height = 0.95
    top = 0.95
    bottom = 0.05
    gap_large = 0.04
    gap_small = 0.015

    h1 = 0.25
    h2 = 0.06
    h3 = 0.1
    h4 = 0.1

    b1 = top - h1
    b2 = b1 - gap_small - h2
    b3 = b2 - gap_large - h3
    b4 = b3 - gap_large - h4

    axs[0].set_position([0.1, b1, 0.85, h1])
    axs[1].set_position([0.1, b2, 0.85, h2])
    axs[2].set_position([0.1, b3, 0.85, h3])
    axs[3].set_position([0.1, b4, 0.85, h4])

    for ax in axs:
        for spine in ax.spines.values():
            spine.set_linewidth(1.5)

    for spine in axins.spines.values():
        spine.set_linewidth(1.5)

    plt.show()
    # fig.savefig("./financial_MOGP_weights.pdf", format='pdf', bbox_inches='tight')


plot_predictions(X=X, Y_with_outliers=Y_with_outliers, mu_mogp=mu_mogp, mu_morcgp=mu_morcgp, mu_pm=mu_pm, w_morcgp=morcgp_w01, w_pm=pm_w01, X_test=X_test)