In [None]:
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.stattools import acf
import numpy as np

# Remove the first row with NaN values
log_returns_clean = log_returns.dropna()

# Select only the log return columns (exclude 'No' and 'Date')
log_return_cols = [col for col in log_returns_clean.columns if '_LogReturn' in col]

# Set up the plot for all assets
n_cols = 3
n_rows = int(np.ceil(len(log_return_cols) / n_cols))

fig, axes = plt.subplots(n_rows, n_cols, figsize=(18, 4*n_rows))
axes = axes.flatten()

# Plot ACF for each log return series
for idx, col in enumerate(log_return_cols):
    # Calculate ACF with confidence intervals
    # nlags = 40 means we check correlation up to 40 lags
    plot_acf(log_returns_clean[col], lags=500, ax=axes[idx], alpha=0.05)
    
    # Clean up the column name for title
    asset_name = col.replace('_LogReturn', '')
    axes[idx].set_title(f'ACF: {asset_name}', fontsize=10, fontweight='bold')
    axes[idx].set_xlabel('Lag')
    axes[idx].set_ylabel('Autocorrelation')
    axes[idx].grid(True, alpha=0.3)

# Hide any unused subplots
for idx in range(len(log_return_cols), len(axes)):
    axes[idx].set_visible(False)

plt.suptitle('Autocorrelation Functions for All Asset Log Returns\n(Blue shaded area = 95% confidence bounds)', 
             fontsize=14, fontweight='bold', y=1.0)
plt.tight_layout()
plt.savefig("progress_report/media/acf_all_assets.pdf", bbox_inches='tight')
plt.show()

print(f"Plotted ACF for {len(log_return_cols)} assets")
print("\nInterpretation:")
print("- Bars outside the blue confidence bounds indicate statistically significant autocorrelation")
print("- Use the lag where autocorrelation becomes insignificant to determine optimal window size")

In [None]:

# ACF on raw price data (not log returns)
# Set up the plot for all assets
n_cols = 3
n_rows = int(np.ceil(len(price_vol_columns) / n_cols))

fig, axes = plt.subplots(n_rows, n_cols, figsize=(18, 4*n_rows))
axes = axes.flatten()

# Plot ACF for each raw price series
for idx, col in enumerate(price_vol_columns):
    # Calculate ACF with confidence intervals for raw data
    plot_acf(all_data[col].dropna(), lags=400, ax=axes[idx], alpha=0.05)
    
    axes[idx].set_title(f'ACF: {col} (Raw Price)', fontsize=10, fontweight='bold')
    axes[idx].set_xlabel('Lag')
    axes[idx].set_ylabel('Autocorrelation')
    axes[idx].grid(True, alpha=0.3)

# Hide any unused subplots
for idx in range(len(price_vol_columns), len(axes)):
    axes[idx].set_visible(False)

plt.suptitle('Autocorrelation Functions for Raw Asset Prices\n(Blue shaded area = 95% confidence bounds)', 
             fontsize=14, fontweight='bold', y=1.0)
plt.tight_layout()
plt.savefig("progress_report/media/acf_raw_prices.pdf", bbox_inches='tight')
plt.show()

print(f"Plotted ACF for {len(price_vol_columns)} raw price series")
print("\nNote: Raw prices typically show very high autocorrelation (non-stationary)")
print("Log returns are preferred for time series modeling as they tend to be more stationary")

In [None]:
from pyts.decomposition import SingularSpectrumAnalysis
import numpy as np
import matplotlib.pyplot as plt

# Example signal
x = np.sin(np.linspace(0, 8 * np.pi, 200)) + 0.3 * np.random.randn(200)

# Apply SSA
ssa = SingularSpectrumAnalysis(window_size=40, groups=None)
X_ssa = ssa.fit_transform(x.reshape(1, -1))  # shape: (n_components, n_timestamps)

# Plot reconstructed components
plt.figure(figsize=(10, 6))
plt.plot(x, label="Original", color='black', linewidth=1.2)
for i, comp in enumerate(X_ssa):
    plt.plot(comp, label=f'Component {i+1}')
plt.legend()
plt.title("Singular Spectrum Analysis (SSA) Decomposition")
plt.show()
