In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from darts import TimeSeries
from darts.models import ExponentialSmoothing, ARIMA, AutoARIMA
from darts.utils.utils import ModelMode, SeasonalityMode
from darts.metrics import mape
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from statsmodels.tsa.stattools import acf, pacf
from plotly.subplots import make_subplots
import plotly.graph_objs as go
import numpy as np
from scipy.stats import spearmanr, pearsonr
from IPython.display import clear_output, display
import holidays
from darts.dataprocessing.transformers import Scaler
from darts.models import BlockRNNModel
import matplotlib.pyplot as plt
import warnings

# Suppress all warnings
warnings.filterwarnings('ignore')


# Define parameters for the notebook
ticker = 'SPY'
period = '10y'
time_frame = 50
use_returns = True
train_percentage = 0.90
forecast_horizon = 30 # Number of days to forecast into the future



#TODO FIX LOAD AND PREPARE DATA****************************************************************************************

In [None]:
#Functions /Classes

def time_series_to_series(ts: TimeSeries) -> pd.Series:
    """
    Convert a Darts TimeSeries object to a pandas Series.

    Args:
        ts (TimeSeries): The Darts TimeSeries object to convert.

    Returns:
        pd.Series: A pandas Series with the time index and values from the TimeSeries.
    """
    # Extract time index and values
    time_index = ts.time_index
    values = ts.values()

    # Ensure values are 1-dimensional
    if values.ndim > 1:
        values = values.flatten()

    # Create and return pandas Series
    return pd.Series(values, index=time_index)

import yfinance as yf
import pandas as pd

def load_and_prepare_data(ticker='SPY', period='5y', time_frame=21, use_returns=False, train_percentage=0.8):
    # Retrieve data from Yahoo Finance
    data = yf.download(ticker, period=period)
    
    # Convert to pandas DataFrame
    df = pd.DataFrame(data['Close'])
    df.index.name = 'Date'
    
    # Fill missing values with forward fill
    df = df.asfreq('B')  # Set frequency to business day
    df = df.fillna(method='ffill')  # Forward fill missing values
    
    # Handle potential fragmentation by ensuring a continuous index
    df = df.reindex(pd.date_range(start=df.index.min(), end=df.index.max(), freq='B')).fillna(method='ffill')

    # Calculate returns if requested
    if use_returns:
        df['Return'] = df['Close'].pct_change(periods=time_frame)
        df = df.dropna(subset=['Return'])  # Drop rows with NaN values after pct_change
        df = df[['Return']]  # Keep only the 'Return' column
    else:
        df = df[['Close']]  # Keep only the 'Close' column

    # Split data into training and test sets
    split_index = int(len(df) * train_percentage)
    train_df = df.iloc[:split_index].copy()
    test_df = df.iloc[split_index:].copy()
    
    return df, train_df, test_df


def load_and_prepare_data(ticker='SPY', period='5y', time_frame=21, use_returns=False, train_percentage=0.8):
    # Retrieve data from Yahoo Finance
    data = yf.download(ticker, period=period)
    
    # Convert to pandas DataFrame
    df = pd.DataFrame(data['Close'])
    df.index.name = 'Date'
    
    # Fill missing values with forward fill
    df = df.asfreq('B')  # Set frequency to business day
    df = df.fillna(method='ffill')  # Forward fill missing values
    
    # Handle potential fragmentation by ensuring a continuous index
    df = df.reindex(pd.date_range(start=df.index.min(), end=df.index.max(), freq='B')).fillna(method='ffill')

    # Calculate returns if requested
    if use_returns:
        df['Return'] = df['Close'].pct_change(periods=time_frame)
        df = df.dropna(subset=['Return'])  # Drop rows with NaN values after pct_change
        df = df[['Return']]  # Keep only the 'Return' column
        df.columns = ['Close']  # Rename 'Return' column to 'Close'
    else:
        df = df[['Close']]  # Keep only the 'Close' column

    # Split data into training and test sets
    split_index = int(len(df) * train_percentage)
    train_df = df.iloc[:split_index].copy()
    test_df = df.iloc[split_index:].copy()
    
    return df, train_df, test_df




def fit_and_forecast_model(model, series, forecast_horizon):
    """
    Fit a given model and forecast future values.

    Parameters:
        model: A Darts forecasting model.
        series (TimeSeries): The time series data to fit the model on.
        forecast_horizon (int): The number of days to forecast into the future.

    Returns:
        forecast (TimeSeries): The forecasted time series data.
    """
    try:
        model.fit(series)  # Fit on the provided series
        forecast = model.predict(forecast_horizon)  # Predict future values
        return forecast
    except Exception as e:
        print(f"An error occurred: {e}")
        return None


import plotly.graph_objects as go

def plot_forecast(train, test, forecast_test, forecast_future):
    """
    Plot the training series, testing series, and forecasted values with shaded regions
    for 1 and 2 standard deviations.

    Parameters:
        train (TimeSeries): The training time series data.
        test (TimeSeries): The testing time series data.
        forecast_test (TimeSeries): The forecasted values for the test period.
        forecast_future (TimeSeries): The forecasted values for future periods.
    """
    # Convert TimeSeries objects to pandas DataFrames
    train_df = train.pd_dataframe()
    test_df = test.pd_dataframe()
    forecast_test_df = forecast_test.pd_dataframe()
    forecast_future_df = forecast_future.pd_dataframe()

    # Compute rolling statistics
    rolling_std = train_df['Close'].rolling(window=time_frame).std()
    rolling_mean = train_df['Close'].rolling(window=time_frame).mean()

    # Create a Plotly figure
    fig = go.Figure()

    # Add traces for the training series, testing series, and forecast
    fig.add_trace(go.Scatter(x=train_df.index, y=train_df['Close'], mode='lines', name='Training Series'))
    fig.add_trace(go.Scatter(x=test_df.index, y=test_df['Close'], mode='lines', name='Testing Series'))
    fig.add_trace(go.Scatter(x=forecast_test_df.index, y=forecast_test_df['Close'], mode='lines', name='Forecast Test'))
    fig.add_trace(go.Scatter(x=forecast_future_df.index, y=forecast_future_df['Close'], mode='lines', name='Forecast Future'))

    # Add shaded regions for 1 and 2 standard deviations around the training data
    fig.add_trace(go.Scatter(
        x=train_df.index,
        y=rolling_mean + rolling_std,
        mode='lines',
        name='1 Std Dev (Upper)',
        line=dict(color='rgba(255,0,0,0.2)'),  # Red with transparency
        fill='tonexty',
        fillcolor='rgba(255,0,0,0.1)'  # Light red fill
    ))
    fig.add_trace(go.Scatter(
        x=train_df.index,
        y=rolling_mean - rolling_std,
        mode='lines',
        name='1 Std Dev (Lower)',
        line=dict(color='rgba(255,0,0,0.2)'),  # Red with transparency
        fill='tonexty',
        fillcolor='rgba(255,0,0,0.1)'  # Light red fill
    ))

    fig.add_trace(go.Scatter(
        x=train_df.index,
        y=rolling_mean + 2 * rolling_std,
        mode='lines',
        name='2 Std Dev (Upper)',
        line=dict(color='rgba(255,0,0,0.4)'),  # Red with more transparency
        fill='tonexty',
        fillcolor='rgba(255,0,0,0.2)'  # Light red fill
    ))
    fig.add_trace(go.Scatter(
        x=train_df.index,
        y=rolling_mean - 2 * rolling_std,
        mode='lines',
        name='2 Std Dev (Lower)',
        line=dict(color='rgba(255,0,0,0.4)'),  # Red with more transparency
        fill='tonexty',
        fillcolor='rgba(255,0,0,0.2)'  # Light red fill
    ))

    # Update layout for better visualization
    fig.update_layout(
        title='Time Series Forecast with Confidence Intervals',
        xaxis_title='Date',
        yaxis_title='Value',
        legend_title='Series',
        template='plotly_dark'  # Optional: set the template to dark for better visibility
    )

    # Show the plot
    fig.show()

def compute_lags(series, num_lags):

    lagged_series = pd.DataFrame(index=series.index)

    # Compute lagged features
    for lag in range(1, num_lags + 1):
        lagged_series[f'lag_{lag}'] = series.shift(lag)

    # Include the original series as well
    lagged_series['original'] = series


    
    return lagged_series
def compute_seasonal_features(series):
    """
    Computes seasonal features for daily and yearly patterns using sine and cosine transformations.

    Args:
        series (pd.Series): The input time series with a datetime index.

    Returns:
        pd.DataFrame: A DataFrame with seasonal features.
    """
    # Ensure the series has a datetime index
    if not isinstance(series.index, pd.DatetimeIndex):
        raise ValueError("Series index must be a DatetimeIndex")
    
    # Extract components from the datetime index
    day_of_year = series.index.dayofyear
    day_of_week = series.index.dayofweek
    month_of_year = series.index.month
    
    # Define seasonal periods
    days_in_year = 365
    days_in_week = 7
    months_in_year = 12
    
    # Compute sine and cosine transformations
    seasonal_features = {
        'sin_day_of_year': np.sin(2 * np.pi * day_of_year / days_in_year),
        'cos_day_of_year': np.cos(2 * np.pi * day_of_year / days_in_year),
        'sin_day_of_week': np.sin(2 * np.pi * day_of_week / days_in_week),
        'cos_day_of_week': np.cos(2 * np.pi * day_of_week / days_in_week),
        'sin_month_of_year': np.sin(2 * np.pi * month_of_year / months_in_year),
        'cos_month_of_year': np.cos(2 * np.pi * month_of_year / months_in_year)
    }

    # Create a DataFrame with seasonal features
    seasonal_features_df = pd.DataFrame(seasonal_features, index=series.index)
    
    return seasonal_features_df

def generate_holidays_for_series(series, country='US'):
    """
    Generates holiday dates for the range of years present in the series index.

    Args:
        series (pd.Series): The input time series with a datetime index.
        country (str): The country code for generating holidays.

    Returns:
        pd.DatetimeIndex: A DatetimeIndex of holiday dates.
    """
    # Ensure the series has a datetime index
    if not isinstance(series.index, pd.DatetimeIndex):
        raise ValueError("Series index must be a DatetimeIndex")

    # Extract the range of years from the series index
    years = range(series.index.year.min(), series.index.year.max() + 1)

    # Generate holiday dates for the range of years
    holiday_dates = set()
    for year in years:
        country_holidays = holidays.CountryHoliday(country, years=[year])
        holiday_dates.update(country_holidays.keys())

    # Convert to DatetimeIndex
    return pd.DatetimeIndex(sorted(holiday_dates))

def compute_moving_averages(series, windows):
    """
    Computes moving averages for a given time series.

    Args:
        series (pd.Series): The input time series with a datetime index.
        windows (list of int): A list of window sizes for which to compute moving averages.

    Returns:
        pd.DataFrame: A DataFrame with moving averages.
    """
    moving_averages = pd.DataFrame(index=series.index)

    for window in windows:
        moving_averages[f'ma_{window}'] = series.rolling(window=window).mean()
    
    # Include the original series
    moving_averages['original'] = series
    
    return moving_averages

def compute_holiday_features(series, country='US'):
    """
    Computes holiday features for a given time series.

    Args:
        series (pd.Series): The input time series with a datetime index.
        country (str): The country code for generating holidays.

    Returns:
        pd.DataFrame: A DataFrame with holiday features.
    """
    # Generate holiday dates for the series
    holiday_dates = generate_holidays_for_series(series, country)

    # Create a DataFrame with holiday indicators
    holiday_features = pd.DataFrame(index=series.index)
    holiday_features['is_holiday'] = series.index.isin(holiday_dates).astype(int)
    
    return holiday_features

def plot_seasonal_features(seasonal_features_df):
    """
    Plots the seasonal features contained in the DataFrame using Plotly.

    Args:
        seasonal_features_df (pd.DataFrame): DataFrame with seasonal features.
    """
    fig = go.Figure()

    # Plot each feature
    for column in seasonal_features_df.columns:
        fig.add_trace(go.Scatter(x=seasonal_features_df.index, y=seasonal_features_df[column],
                                 mode='lines',
                                 name=column))

    # Update layout for better visualization
    fig.update_layout(
        title='Seasonal Features',
        xaxis_title='Date',
        yaxis_title='Value',
        legend_title='Feature',
        template='plotly_dark'
    )
    
    fig.show()

def plot_lags(lagged_df):
    """
    Plots the lagged features and the original series using Plotly.

    Args:
        lagged_df (pd.DataFrame): DataFrame with lagged features and original series.
    """
    fig = go.Figure()

    # Plot the original series
    fig.add_trace(go.Scatter(x=lagged_df.index, y=lagged_df['original'],
                             mode='lines',
                             name='Original Series',
                             line=dict(width=2, color='blue')))

    # Plot each lagged feature
    for column in lagged_df.columns:
        if column != 'original':
            fig.add_trace(go.Scatter(x=lagged_df.index, y=lagged_df[column],
                                     mode='lines',
                                     name=column,
                                     line=dict(width=1)))

    # Update layout for better visualization
    fig.update_layout(
        title='Time Series with Lagged Features',
        xaxis_title='Date',
        yaxis_title='Value',
        legend_title='Features',
        template='plotly_dark'
    )
    
    fig.show()
def plot_holiday_features(holiday_features):
    """
    Plots the holiday features using Plotly.

    Args:
        holiday_features (pd.DataFrame): DataFrame with holiday features.
    """
    fig = go.Figure()

    # Plot the holiday indicator feature
    fig.add_trace(go.Scatter(x=holiday_features.index, y=holiday_features['is_holiday'],
                             mode='markers',
                             name='Holiday Indicator',
                             marker=dict(color='red', size=8)))
    
    # Update layout for better visualization
    fig.update_layout(
        title='Holiday Indicator Feature',
        xaxis_title='Date',
        yaxis_title='Holiday Indicator',
        legend_title='Feature',
        template='plotly_dark'
    )
    
    fig.show()

def plot_moving_averages(moving_averages_df):
    """
    Plots the original series and moving averages using Plotly.

    Args:
        moving_averages_df (pd.DataFrame): DataFrame with moving averages and original series.
    """
    fig = go.Figure()

    # Plot the original series
    fig.add_trace(go.Scatter(x=moving_averages_df.index, y=moving_averages_df['original'],
                             mode='lines',
                             name='Original Series',
                             line=dict(width=2, color='blue')))

    # Plot each moving average
    for column in moving_averages_df.columns:
        if column != 'original':
            fig.add_trace(go.Scatter(x=moving_averages_df.index, y=moving_averages_df[column],
                                     mode='lines',
                                     name=column,
                                     line=dict(width=1)))

    # Update layout for better visualization
    fig.update_layout(
        title='Time Series with Moving Averages',
        xaxis_title='Date',
        yaxis_title='Value',
        legend_title='Features',
        template='plotly_dark'
    )
    
    fig.show()


def plot_standard_deviations(standard_deviations_df, original_col=None):
    """
    Plots the original series and standard deviations using Plotly.

    Args:
        standard_deviations_df (pd.DataFrame): DataFrame with standard deviations and possibly an original series.
        original_col (str): Column name of the original series in the DataFrame (if available).
    """
    fig = go.Figure()

    # Check if the original column exists
    if original_col and original_col in standard_deviations_df.columns:
        # Plot the original series
        fig.add_trace(go.Scatter(x=standard_deviations_df.index, y=standard_deviations_df[original_col],
                                 mode='lines',
                                 name='Original Series',
                                 line=dict(width=2, color='blue')))

    # Plot each standard deviation
    for column in standard_deviations_df.columns:
        if column != original_col:
            fig.add_trace(go.Scatter(x=standard_deviations_df.index, y=standard_deviations_df[column],
                                     mode='lines',
                                     name=column,
                                     line=dict(width=1)))

    # Update layout for better visualization
    fig.update_layout(
        title='Time Series with Standard Deviations',
        xaxis_title='Date',
        yaxis_title='Value',
        legend_title='Features',
        template='plotly_dark'
    )
    
    fig.show()

def plot_garman_klass_volatility(garman_klass_df, original_col=None):
    """
    Plots the original series and Garman-Klass volatility features using Plotly.

    Args:
        garman_klass_df (pd.DataFrame): DataFrame with Garman-Klass volatility features and possibly an original series.
        original_col (str): Column name of the original series in the DataFrame (if available).
    """
    fig = go.Figure()

    # Check if the original column exists
    if original_col and original_col in garman_klass_df.columns:
        # Plot the original series
        fig.add_trace(go.Scatter(x=garman_klass_df.index, y=garman_klass_df[original_col],
                                 mode='lines',
                                 name='Original Series',
                                 line=dict(width=2, color='blue')))

    # Plot each Garman-Klass volatility feature
    for column in garman_klass_df.columns:
        if column != original_col:
            fig.add_trace(go.Scatter(x=garman_klass_df.index, y=garman_klass_df[column],
                                     mode='lines',
                                     name=column,
                                     line=dict(width=1)))

    # Update layout for better visualization
    fig.update_layout(
        title='Time Series with Garman-Klass Volatility',
        xaxis_title='Date',
        yaxis_title='Volatility',
        legend_title='Features',
        template='plotly_dark'
    )
    
    fig.show()
    
def generate_lagged_features(df, num_lags=5):
    lagged_features_list = []
    
    for ticker in df.columns:
        series = df[ticker]
        
        # Compute lagged features
        lagged_features = compute_lags(series, num_lags)
        
        # Rename columns to include ticker name for clarity
        lagged_features.columns = [f'{ticker}_{col}' for col in lagged_features.columns]
        
        # Append the result to the list
        lagged_features_list.append(lagged_features)
    
    # Concatenate all features horizontally
    all_lagged_features_df = pd.concat(lagged_features_list, axis=1, join='inner')
    return all_lagged_features_df

def generate_seasonal_features(df):
    seasonal_features_list = []
    
    for ticker in df.columns:
        series = df[ticker]
        
        # Compute seasonal features
        seasonal_features = compute_seasonal_features(series)
        
        # Rename columns to include ticker name for clarity
        seasonal_features.columns = [f'{ticker}_{col}' for col in seasonal_features.columns]
        
        # Append the result to the list
        seasonal_features_list.append(seasonal_features)
    
    # Concatenate all features horizontally
    all_seasonal_features_df = pd.concat(seasonal_features_list, axis=1, join='inner')
    return all_seasonal_features_df
    



def compute_standard_deviations(series, windows):
    """
    Compute rolling standard deviations for a given time series and windows.

    Parameters:
    series (pd.Series): The time series data.
    windows (iterable): Range or list of window sizes for rolling standard deviation.

    Returns:
    pd.DataFrame: DataFrame with rolling standard deviations and the original series.
    """
    std_devs = pd.DataFrame(index=series.index)

    for window in windows:
        std_devs[f'std_{window}'] = series.rolling(window=window).std()
    
    # Include the original series
    std_devs['original'] = series
    
    return std_devs

def compute_garman_klass_volatility(df, window=252):
    """
    Computes the Garman-Klass volatility using high-frequency price data.
    
    Args:
        df (pd.DataFrame): DataFrame with high-frequency price data including 'Open', 'High', 'Low', and 'Close' columns.
        window (int): Rolling window size for computing standard deviation.
    
    Returns:
        pd.Series: Garman-Klass volatility for each period.
    """
    # Ensure the DataFrame contains the required columns
    required_columns = ['Open', 'High', 'Low', 'Close']
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"DataFrame must contain columns: {', '.join(required_columns)}")
    
    # Calculate the components of the Garman-Klass estimator
    df['log_HL'] = np.log(df['High'] / df['Low'])
    df['log_CO'] = np.log(df['Close'] / df['Open'])
    
    # Compute the terms for the Garman-Klass estimator
    term1 = 0.5 * df['log_HL'] ** 2
    term2 = df['log_CO'] ** 2
    
    # Calculate the Garman-Klass variance and then the daily volatility
    garman_klass_variance = term1 - term2
    garman_klass_volatility = np.sqrt(garman_klass_variance)
    
    # Apply rolling window to the variance and annualize the volatility
    rolling_variance = garman_klass_variance.rolling(window=window).mean()
    annualized_volatility = np.sqrt(rolling_variance) * np.sqrt(252)
    
    return annualized_volatility

def generate_standard_deviations_features(df, windows=range(1, 11)):
    """
    Generate rolling standard deviations features for each ticker in the DataFrame.

    Parameters:
    df (pd.DataFrame): DataFrame with tickers as columns and time series data.
    windows (iterable): Range or list of window sizes for rolling standard deviation.

    Returns:
    pd.DataFrame: DataFrame with rolling standard deviations features for each ticker.
    """
    std_devs_list = []
    
    for ticker in df.columns:
        series = df[ticker]
        
        # Compute rolling standard deviations
        std_devs = compute_standard_deviations(series, windows)
        
        # Rename columns to include ticker name for clarity
        std_devs.columns = [f'{ticker}_{col}' for col in std_devs.columns]
        
        # Append the result to the list
        std_devs_list.append(std_devs)
    
    # Concatenate all features horizontally
    all_std_devs_df = pd.concat(std_devs_list, axis=1, join='inner')
    
    return all_std_devs_df

def generate_moving_averages_features(df, windows=range(1, 11)):

    moving_averages_list = []
    
    for ticker in df.columns:
        series = df[ticker]
        
        # Compute moving averages
        moving_averages = compute_moving_averages(series, windows)
        
        # Rename columns to include ticker name for clarity
        moving_averages.columns = [f'{ticker}_{col}' for col in moving_averages.columns]
        
        # Append the result to the list
        moving_averages_list.append(moving_averages)
    
    # Concatenate all features horizontally
    all_moving_averages_df = pd.concat(moving_averages_list, axis=1, join='inner')
    
    return all_moving_averages_df

def generate_garman_klass_features(df, windows=range(1, 11)):
    """
    Generate rolling Garman-Klass volatility features for a DataFrame with 'Open', 'High', 'Low', 'Close' columns.

    Parameters:
    df (pd.DataFrame): DataFrame with 'Open', 'High', 'Low', 'Close' columns.
    windows (iterable): Range or list of window sizes for rolling Garman-Klass volatility.

    Returns:
    pd.DataFrame: DataFrame with rolling Garman-Klass volatility features and original volatility.
    """
    garman_klass_list = []
    
    # Ensure the DataFrame contains the required columns
    required_columns = ['Open', 'High', 'Low', 'Close']
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"DataFrame must contain columns: {', '.join(required_columns)}")
    
    # Compute Garman-Klass volatility for different rolling windows
    for window in windows:
        # Avoid windows larger than the DataFrame length
        if window > len(df):
            raise ValueError(f"Window size {window} is larger than the number of data points in the DataFrame.")
        
        garman_klass = compute_garman_klass_volatility(df, window=window)
        garman_klass = garman_klass.rename(f'gk_std_{window}')
        garman_klass_list.append(garman_klass)
    
    # Concatenate all features horizontally
    all_garman_klass_df = pd.concat(garman_klass_list, axis=1)
    
    # Include the original Garman-Klass volatility
    original_garman_klass = compute_garman_klass_volatility(df, window=len(df))
    original_garman_klass = original_garman_klass.rename('original')
    all_garman_klass_df = pd.concat([all_garman_klass_df, original_garman_klass], axis=1)
    
    # Backfill NaN values
    all_garman_klass_df = all_garman_klass_df.fillna(method='bfill')
    
    return all_garman_klass_df
    

In [None]:
#Load data
#TODO FIX THIS
series, train, test = load_and_prepare_data(ticker=ticker, period=period, time_frame=time_frame, use_returns=use_returns, train_percentage=train_percentage)

'''exogenous_data = yf.download(['^VIX', 'VIXY','VIXM','^VVIX'],period=period,interval='1d')
'''
exogenous_data = yf.download(['^VIX','^VVIX'],period=period,interval='1d')


print(series)



In [None]:
#Autocorrelation analysis
import numpy as np
from statsmodels.tsa.stattools import acf, pacf
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Ensure the series is a numpy array
series_np = series.to_numpy() if hasattr(series, 'to_numpy') else np.array(series)
# Step 1: Determine the maximum number of lags allowed for PACF
max_lags_pacf = len(series_np) // 2  # PACF can be computed up to 50% of the sample size

# Step 2: Compute the ACF and PACF values with adjusted lags
lags = min(len(series_np) - 1, max_lags_pacf)  # Use the smaller value to ensure it fits the limit
acf_values = acf(series_np, nlags=lags)
pacf_values = pacf(series_np, nlags=lags)

# Calculate the 95% confidence interval
conf_interval = 1.96 / np.sqrt(len(series_np))  # 95% confidence interval

# Find the first lag where ACF enters the confidence interval
try:
    first_within_conf_idx = next(i for i, val in enumerate(acf_values) if -conf_interval < val < conf_interval)
except StopIteration:
    first_within_conf_idx = None

# Find the first lag where ACF enters the 75% correlation territory
try:
    first_below_75_idx = next(i for i, val in enumerate(acf_values) if abs(val) <= 0.75)
except StopIteration:
    first_below_75_idx = None

# Find the first lag where ACF enters the 50% correlation territory
try:
    first_below_50_idx = next(i for i, val in enumerate(acf_values) if abs(val) <= 0.50)
except StopIteration:
    first_below_50_idx = None

# Step 3: Create subplots
fig = make_subplots(
    rows=2, cols=1,  # Create a 2-row, 1-column grid
    subplot_titles=('ACF Plot', 'PACF Plot'),  # Titles for each subplot
    vertical_spacing=0.3  # Adjust spacing between plots
)

# Add the ACF values to the first subplot
fig.add_trace(go.Bar(
    x=np.arange(lags + 1),  # Lags from 0 to specified number
    y=acf_values,
    name='ACF',
    marker_color='blue'
), row=1, col=1)

# Add shaded areas for the confidence interval in the ACF subplot
fig.add_shape(
    type="rect",
    x0=0, y0=-conf_interval, x1=lags, y1=conf_interval,
    line=dict(color="red", dash="dash"),
    fillcolor="rgba(255, 0, 0, 0.2)",
    row=1, col=1
)

# Highlight the first lag where ACF enters the confidence interval if it exists
if first_within_conf_idx is not None:
    fig.add_trace(go.Scatter(
        x=[first_within_conf_idx],
        y=[acf_values[first_within_conf_idx]],
        mode='markers',
        marker=dict(color='red', size=10),
        name='First within 95% CI'
    ), row=1, col=1)

    # Add an annotation for this point
    fig.add_annotation(
        x=first_within_conf_idx,
        y=acf_values[first_within_conf_idx],
        text=f"First within 95% CI at lag {first_within_conf_idx}",
        showarrow=True,
        arrowhead=2,
        ax=-40,
        ay=-30,
        row=1, col=1
    )

# Highlight the first lag where ACF enters the 75% correlation territory if it exists
if first_below_75_idx is not None:
    fig.add_shape(
        type="line",
        x0=first_below_75_idx, x1=first_below_75_idx,
        y0=min(acf_values), y1=max(acf_values),
        line=dict(color="orange", dash="dash"),
        row=1, col=1
    )

    # Add an annotation for this point
    fig.add_annotation(
        x=first_below_75_idx,
        y=acf_values[first_below_75_idx],
        text=f"First within 75% correlation at lag {first_below_75_idx}",
        showarrow=True,
        arrowhead=2,
        ax=-40,
        ay=-30,
        row=1, col=1
    )

# Highlight the first lag where ACF enters the 50% correlation territory if it exists
if first_below_50_idx is not None:
    fig.add_shape(
        type="line",
        x0=first_below_50_idx, x1=first_below_50_idx,
        y0=min(acf_values), y1=max(acf_values),
        line=dict(color="purple", dash="dash"),
        row=1, col=1
    )

    # Add an annotation for this point
    fig.add_annotation(
        x=first_below_50_idx,
        y=acf_values[first_below_50_idx],
        text=f"First within 50% correlation at lag {first_below_50_idx}",
        showarrow=True,
        arrowhead=2,
        ax=-40,
        ay=-30,
        row=1, col=1
    )

# Add the PACF values to the second subplot
fig.add_trace(go.Bar(
    x=np.arange(lags + 1),  # Lags from 0 to specified number
    y=pacf_values,
    name='PACF',
    marker_color='green'
), row=2, col=1)

# Add shaded areas for the confidence interval in the PACF subplot
fig.add_shape(
    type="rect",
    x0=0, y0=-conf_interval, x1=lags, y1=conf_interval,
    line=dict(color="red", dash="dash"),
    fillcolor="rgba(255, 0, 0, 0.2)",
    row=2, col=1
)

# Add annotations for confidence interval
fig.add_annotation(
    xref="paper", yref="paper",
    x=1, y=1,
    text="95% Confidence Interval",
    showarrow=False,
    align="right",
    font=dict(size=12, color="red")
)

# Update the layout to make it look like traditional ACF and PACF plots
fig.update_layout(
    title='ACF and PACF Plots',
    autosize=True,  # Enable autosizing to allow dynamic resizing
    height=1000,  # Initial height, but will dynamically adjust
    xaxis_title='Lags',
    yaxis_title='Autocorrelation',
    xaxis=dict(
        dtick=1,  # Set x-axis ticks to be at integer lags
        tickmode='linear'
    ),
    yaxis=dict(
        tickvals=np.linspace(-1, 1, 5)  # Set y-axis ticks for better readability
    ),
    xaxis2=dict(
        dtick=1,  # Set x-axis ticks for integer lags on the second subplot
        tickmode='linear'
    ),
    yaxis2=dict(
        tickvals=np.linspace(-1, 1, 5)  # Set y-axis ticks for better readability on the second subplot
    ),
    template='plotly_white'
)

# Show the plot
fig.show(config={'responsive': True})


In [None]:
#feature engineering
def compute_pairwise_differences(df):
    """
    Compute the pairwise differences between each pair of columns in a DataFrame.
    
    Parameters:
    df (pd.DataFrame): DataFrame with columns to compute pairwise differences for.
    
    Returns:
    pd.DataFrame: DataFrame with pairwise differences between columns.
    """
    # Create an empty DataFrame to store pairwise differences
    pairwise_diff_df = pd.DataFrame(index=df.index)
    
    # Compute pairwise differences
    columns = df.columns
    for i in range(len(columns)):
        for j in range(i + 1, len(columns)):
            col1 = columns[i]
            col2 = columns[j]
            diff_col_name = f'{col1} - {col2}'
            pairwise_diff_df[diff_col_name] = df[col1] - df[col2]
    
    # Drop columns that are completely NaN (if any)
    pairwise_diff_df = pairwise_diff_df.dropna(axis=1, how='all')
    
    return pairwise_diff_df

def align_features_to_index(features_dict, reference_index):
    aligned_features = {}
    for key, df in features_dict.items():
        aligned_features[key] = df.reindex(reference_index).fillna(method='ffill')
    return aligned_features
df_garman_klass = yf.download(ticker, period=period)

# Compute features
features_lags = compute_lags(series, time_frame)
features_lags_differences = compute_pairwise_differences(features_lags)
features_moving_averages = compute_moving_averages(series, windows=range(1,  time_frame, 1))
features_moving_averages_differences = compute_pairwise_differences(features_moving_averages)
features_garmanklass = generate_garman_klass_features(df_garman_klass, windows=range(1,time_frame, 1))
features_garmanklass_difference = compute_pairwise_differences(features_garmanklass)

features_seasonal = compute_seasonal_features(series)
features_holidays = compute_holiday_features(series=series, country='US')
features_standard_deviations = compute_standard_deviations(series, windows=range(1, len(test) + 1, 1))

# Align all features to the index of the main series
features_dict = {
    'lags': features_lags,
    'lags_differences': features_lags_differences,
    'moving_averages': features_moving_averages,
    'moving_averages_differences': features_moving_averages_differences,
    'garman_klass': features_garmanklass,
    'garman_klass_differences': features_garmanklass_difference,
    'seasonal': features_seasonal,
    'holidays': features_holidays,
    'standard_deviations': features_standard_deviations,
}

# Ensure index alignment
reference_index = series.index
aligned_features = align_features_to_index(features_dict, reference_index)

# Extract aligned features
features_lags = aligned_features['lags']
features_lags_differences = aligned_features['lags_differences']
features_moving_averages = aligned_features['moving_averages']
features_moving_averages_differences = aligned_features['moving_averages_differences']
features_garmanklass = aligned_features['garman_klass']
features_garmanklass_differences = aligned_features['garman_klass_differences']

features_seasonal = aligned_features['seasonal']
features_holidays = aligned_features['holidays']
features_standard_deviations = aligned_features['standard_deviations']

#there are series mismatches here
'''features_exogenous_lags = generate_lagged_features(exogenous_data['Close'], num_lags=forecast_horizon)
features_exogenous_seasonal = generate_seasonal_features(exogenous_data['Close'])
features_exogenous_moving_average = generate_moving_averages_features(exogenous_data['Close'], windows=range(1, len(test)))
all_features = pd.concat([features_lags, features_seasonal, features_holidays, features_moving_averages,
                          features_exogenous_lags,features_exogenous_moving_average,features_exogenous_seasonal], axis=1)
'''
'''all_features = pd.concat([features_lags, features_seasonal, features_holidays, features_moving_averages,features_standard_deviations,features_garmanklass], axis=1)
'''
all_features = pd.concat([features_lags,features_lags_differences, 
                          features_moving_averages,
                          features_moving_averages_differences,
                          features_garmanklass,
                          features_garmanklass_differences,
                          features_seasonal,
                          features_holidays,
                          features_standard_deviations
                          ], axis=1)

'''plot_lags(features_lags)
plot_seasonal_features(features_seasonal)
plot_holiday_features(features_holidays)
plot_moving_averages(features_moving_averages)
plot_standard_deviations(features_standard_deviations)
plot_garman_klass_volatility(features_garmanklass)'''


In [None]:
#feature selection


import pandas as pd
from sklearn.feature_selection import SelectKBest, f_regression, mutual_info_regression

# Function to perform feature selection
def perform_feature_selection(X, y, score_func, k):
    # Feature selection
    selector = SelectKBest(score_func=score_func, k=k)  # Adjust k as needed
    X_new = selector.fit_transform(X, y)

    # Get the selected feature indices
    selected_features_indices = selector.get_support(indices=True)

    # Get the selected features DataFrame
    selected_features_df = pd.DataFrame(
        X.iloc[:, selected_features_indices],  # DataFrame with selected columns
        index=X.index,  # Ensure index is maintained
        columns=X.columns[selected_features_indices]  # Ensure column names are maintained
    )

    return selected_features_df, X.columns[selected_features_indices]

k = 30
# Assuming all_features and series are your initial data
X = all_features.fillna(method='bfill').fillna(0)
y = series.fillna(method='bfill')
print("X has NaNs:", X.isnull().values.any())
print("y has NaNs:", y.isnull().values.any())
print("Type of X:", type(X))
print("Length of y:", len(y))
print("Type of y:", type(y))
# Perform feature selection with f_regression
selected_features_df_regression, features_regression = perform_feature_selection(X, y, f_regression, k=k)
print("Selected features (regression):")
print(selected_features_df_regression.columns)

# Perform feature selection with mutual_info_regression
selected_features_df_mutual_info, features_mutual_info = perform_feature_selection(X, y, mutual_info_regression, k=k)
print("Selected features (mutual information):")
print(selected_features_df_mutual_info.columns)

# Find features unique to each method
features_regression_set = set(features_regression)
features_mutual_info_set = set(features_mutual_info)

unique_to_regression = features_regression_set - features_mutual_info_set
unique_to_mutual_info = features_mutual_info_set - features_regression_set

print("\nFeatures selected by f_regression but not by mutual_info_regression:")
print(unique_to_regression)

print("\nFeatures selected by mutual_info_regression but not by f_regression:")
print(unique_to_mutual_info)
'''df = features_moving_averages
ma_columns = [col for col in df.columns if col.startswith('ma_')]
ma_df = df[ma_columns]

difference_dfs = []
for i in range(len(ma_columns)):
    for j in range(i + 1, len(ma_columns)):
        diff_col = ma_df[ma_columns[i]] - ma_df[ma_columns[j]]
        diff_df = pd.DataFrame(diff_col, columns=[f'{ma_columns[i]}_minus_{ma_columns[j]}'])
        difference_dfs.append(diff_df)

difference_df = pd.concat(difference_dfs, axis=1)
difference_df_clean = difference_df.dropna()
series_clean = series.loc[difference_df_clean.index]
def pearson_correlation(x, y):
    # Ensure x and y have the same length
    assert len(x) == len(y)
    
    # Compute means
    mean_x = np.mean(x)
    mean_y = np.mean(y)
    
    # Compute covariance
    covariance = np.mean((x - mean_x) * (y - mean_y))
    
    # Compute standard deviations
    std_x = np.std(x, ddof=1)
    std_y = np.std(y, ddof=1)
    
    # Compute Pearson correlation coefficient
    correlation = covariance / (std_x * std_y)
    return correlation

# Calculate correlations manually
correlations = {}
for col in difference_df_clean.columns:
    correlations[col] = pearson_correlation(difference_df_clean[col],series_clean)

# Convert to DataFrame for plotting
correlation_df = pd.DataFrame(list(correlations.items()), columns=['Difference Column', 'Correlation'])

correlation_df.plot()'''

In [None]:
#Load and prepare features
series, train, test = load_and_prepare_data(
    ticker=ticker,
    period=period,
    time_frame=time_frame,
    use_returns=use_returns,
    train_percentage=train_percentage
)

# Convert to TimeSeries objects
series_ = TimeSeries.from_dataframe(series)
test_ = TimeSeries.from_dataframe(test)
train_ = TimeSeries.from_dataframe(train)

# Feature generation & selection (Data Transformation for fitting)
scaler_features = Scaler()

# Convert features to TimeSeries
all_features_ts = TimeSeries.from_dataframe(all_features)

# Normalize the features
all_features_ts_normalized = scaler_features.fit_transform(all_features_ts)

# Split the normalized features into training and test sets
split_point_features = int(len(all_features_ts_normalized) * 0.8)
train_features = all_features_ts_normalized[:split_point_features]
test_features = all_features_ts_normalized[split_point_features:]

# Forward fill missing values in features
train_features = TimeSeries.from_dataframe(train_features.pd_dataframe().ffill())
test_features = TimeSeries.from_dataframe(test_features.pd_dataframe().ffill())

# Combine training and test features
combined_features_ts = train_features.concatenate(test_features)

# Normalize the main series using a separate scaler
scaler_series = Scaler()
normalized_series = scaler_series.fit_transform(series_)

# Split the normalized main series into training and test sets
split_point_series = int(len(normalized_series) * 0.8)
train_series = normalized_series[:split_point_series]
test_series = normalized_series[split_point_series:]

# Ensure train_features and train_series have the same index
train_features_df = train_features.pd_dataframe().reindex(train_series.pd_dataframe().index)
train_features = TimeSeries.from_dataframe(train_features_df.ffill())

# Set input_chunk_length
input_chunk_length = min(len(train_series), 25)


In [None]:
#Model: Exponential smoothing
from darts import TimeSeries
from darts.models import ExponentialSmoothing
import plotly.graph_objects as go

# Define the function to fit and forecast with a model
import pandas as pd
import plotly.graph_objects as go




# Example usage with ExponentialSmoothing model
# Assuming `train`, `test`, and `series` are TimeSeries objects
train_ = TimeSeries.from_dataframe(train)
test_  = TimeSeries.from_dataframe(test)
series_ = TimeSeries.from_dataframe(series)
# Fit and forecast over the test period using the training data
model = ExponentialSmoothing()
forecast_test = fit_and_forecast_model(model, series=train_, forecast_horizon=len(test))

# Fit and forecast future values using the entire series
forecast_future = fit_and_forecast_model(model, series=series_, forecast_horizon=30)  # Example forecast horizon of 30 days

# Plot the results
if forecast_test is not None and forecast_future is not None:
    plot_forecast(train_, test_, forecast_test, forecast_future)

#Model: Exponential soothing parameter optimization / MAPE
parameters = {
    #'trend': [ModelMode.ADDITIVE, ModelMode.MULTIPLICATIVE],
    #'seasonal': [SeasonalityMode.ADDITIVE, SeasonalityMode.MULTIPLICATIVE],
    'trend': [ModelMode.ADDITIVE] if use_returns else [ModelMode.ADDITIVE, ModelMode.MULTIPLICATIVE],
    'seasonal': [SeasonalityMode.ADDITIVE] if use_returns else[SeasonalityMode.ADDITIVE, SeasonalityMode.MULTIPLICATIVE],
    'seasonal_periods': list(range(2, 48))
}
# Iterate through the parameter grid and find the best model
best_mape = float('inf')
best_model = None
best_params = {}
train_ = TimeSeries.from_dataframe(train)
test_  = TimeSeries.from_dataframe(test)
for trend in parameters['trend']:
    for seasonal in parameters['seasonal']:
        for seasonal_periods in parameters['seasonal_periods']:
            model = ExponentialSmoothing(
                trend=trend,
                seasonal=seasonal,
                seasonal_periods=seasonal_periods
            )
            model.fit(train_)
            forecast = model.predict(len(test))
            error = mape(test_, forecast)
            #clear_output(wait=True)  # Clear previous output

            if error < best_mape:
                best_mape = error
                best_model = model
                best_params = {
                    'trend': trend,
                    'seasonal': seasonal,
                    'seasonal_periods': seasonal_periods
                }

#print(f'Best Parameters: {best_params} - Best MAPE: {best_mape:.4f}')

# Fit the best model on the entire series and generate forecasts
if best_model:
    # Forecast over the test period
    forecast_test = fit_and_forecast_model(best_model, series=TimeSeries.from_dataframe(train), forecast_horizon=len(test))
    
    # Forecast future values beyond the dataset
    forecast_future = fit_and_forecast_model(best_model, series=TimeSeries.from_dataframe(series), forecast_horizon=30)  # Example forecast horizon of 30 days
    
    # Plot the results
    if forecast_test is not None and forecast_future is not None:
        plot_forecast(TimeSeries.from_dataframe(train), TimeSeries.from_dataframe(test), forecast_test, forecast_future)



        #Model: Exponential smoothing / Correlation optimization
#Rolling Pearson Correlation of Forecast
import numpy as np
import plotly.graph_objects as go
from darts import TimeSeries
from scipy.stats import pearsonr

# Define rolling window size
rolling_window_size = 20  # Adjust this as needed

def rolling_pearson_corr(actual_series, forecast_series, window_size):
    rolling_corrs = []
    num_windows = len(actual_series) - window_size + 1
    
    for start in range(num_windows):
        end = start + window_size
        actual_window = actual_series[start:end]
        forecast_window = forecast_series[start:end]
        
        # Calculate Pearson correlation for this window
        if len(actual_window) > 1 and len(forecast_window) > 1:  # Ensure enough data points
            corr, _ = pearsonr(actual_window, forecast_window)
            rolling_corrs.append(corr)
        else:
            rolling_corrs.append(np.nan)  # Not enough data to compute correlation
    
    return rolling_corrs

# Example usage
# Assuming `test_series` and `forecast_series` are your Darts TimeSeries objects
# Convert your time series data to Darts TimeSeries if not already
# train_series = TimeSeries.from_dataframe(train_df)
# test_series = TimeSeries.from_dataframe(test_df)
# forecast_series = best_model.predict(len(test_series))

# Convert TimeSeries to numpy arrays
test_np = test.values.flatten()
forecast_np = forecast.pd_dataframe().values.flatten()

# Compute rolling Pearson correlation
rolling_corrs = rolling_pearson_corr(test_np, forecast_np, rolling_window_size)

# Create a Plotly figure
fig = go.Figure()

# Add rolling Pearson correlation trace
fig.add_trace(
    go.Scatter(
        x=list(range(1, len(rolling_corrs) + 1)),
        y=rolling_corrs,
        mode='lines+markers',
        name='Rolling Pearson Correlation',
        marker=dict(color='blue'),
        line=dict(color='blue')
    )
)

# Update layout for better visualization
fig.update_layout(
    title='Rolling Pearson Correlation of Forecast',
    xaxis_title='Rolling Window',
    yaxis_title='Pearson Correlation',
    template='plotly_white'
)

# Show the plot
fig.show()


parameters = {
    'trend': [ModelMode.ADDITIVE] if use_returns else [ModelMode.ADDITIVE, ModelMode.MULTIPLICATIVE],
    'seasonal': [SeasonalityMode.ADDITIVE] if use_returns else [SeasonalityMode.ADDITIVE, SeasonalityMode.MULTIPLICATIVE],
    'seasonal_periods': list(range(2, 48))
}

# Initialize variables for the best model
best_corr = -float('inf')  # Start with the lowest possible correlation
best_model = None
best_params = {}
train_ = TimeSeries.from_dataframe(train)
test_  = TimeSeries.from_dataframe(test)
# Iterate through the parameter grid
for trend in parameters['trend']:
    for seasonal in parameters['seasonal']:
        for seasonal_periods in parameters['seasonal_periods']:
            model = ExponentialSmoothing(
                trend=trend,
                seasonal=seasonal,
                seasonal_periods=seasonal_periods
            )
            model.fit(train_)
            forecast = model.predict(len(test_))
            
            # Convert forecasts and actual values to numpy arrays for correlation calculation
            forecast_np = forecast.pd_dataframe().values.flatten()
            test_np = test_.pd_dataframe().values.flatten()
            
            # Calculate Pearson correlation for this forecast
            if len(forecast_np) > 1 and len(test_np) > 1:  # Ensure enough data points
                corr, _ = pearsonr(test_np, forecast_np)
            else:
                corr = np.nan
            
            #clear_output(wait=True)  # Clear previous output
            #print(f'Trend: {trend}, Seasonal: {seasonal}, Seasonal Periods: {seasonal_periods} - Pearson Correlation: {corr:.4f}')
            display()

            if corr > best_corr:
                best_corr = corr
                best_model = model
                best_params = {
                    'trend': trend,
                    'seasonal': seasonal,
                    'seasonal_periods': seasonal_periods
                }

print(f'Best Parameters: {best_params} - Best Pearson Correlation: {best_corr:.4f}')


# Forecast future values using the chosen model
forecast_test = fit_and_forecast_model(best_model, series=TimeSeries.from_dataframe(train), forecast_horizon=len(test))

# Fit and forecast future values using the entire series
forecast_future = fit_and_forecast_model(best_model, series=TimeSeries.from_dataframe(series), forecast_horizon=30)  # Example forecast horizon of 30 days

# Plot the results
if forecast_test is not None and forecast_future is not None:
    plot_forecast(TimeSeries.from_dataframe(train), TimeSeries.from_dataframe(test), forecast_test, forecast_future)
import yfinance as yf
import pandas as pd
from darts import TimeSeries
from darts.models import ExponentialSmoothing
import plotly.graph_objs as go
from darts.utils.utils import ModelMode, SeasonalityMode
from darts.metrics import mape
from scipy.stats import spearmanr
import numpy as np
train_ = TimeSeries.from_dataframe(train)
test_  = TimeSeries.from_dataframe(test)
series_ = TimeSeries.from_dataframe(series)
# Define parameter grid
parameters = {
    'trend': [ModelMode.ADDITIVE] if use_returns else [ModelMode.ADDITIVE, ModelMode.MULTIPLICATIVE],
    'seasonal': [SeasonalityMode.ADDITIVE] if use_returns else [SeasonalityMode.ADDITIVE, SeasonalityMode.MULTIPLICATIVE],
    'seasonal_periods': list(range(2, 48))
}

# Initialize variables for the best model
best_score = -float('inf')  # Start with the lowest possible score
best_model = None
best_params = {}

# Initialize variables to keep track of model performance
model_performance = []

# Iterate through the parameter grid
for trend in parameters['trend']:
    for seasonal in parameters['seasonal']:
        for seasonal_periods in parameters['seasonal_periods']:
            model = ExponentialSmoothing(
                trend=trend,
                seasonal=seasonal,
                seasonal_periods=seasonal_periods
            )
            model.fit(train_)
            forecast = model.predict(len(test_))
            
            # Convert forecasts and actual values to numpy arrays for correlation calculation
            forecast_np = forecast.pd_dataframe().values.flatten()
            test_np = test_.pd_dataframe().values.flatten()
            
            # Calculate Spearman correlation for this forecast
            if len(forecast_np) > 1 and len(test_np) > 1:  # Ensure enough data points
                corr, _ = spearmanr(test_np, forecast_np)
            else:
                corr = np.nan
            
            # Calculate MAPE for this forecast
            mape_value = mape(test_, forecast)
            
            # Combine metrics (simple example: weighted sum)
            # Adjust weights as needed
            weight_mape = 0.5
            weight_corr = 0.5
            score = weight_mape * (1 - mape_value) + weight_corr * corr
            
            # Store model performance
            model_performance.append({
                'trend': trend,
                'seasonal': seasonal,
                'seasonal_periods': seasonal_periods,
                'MAPE': mape_value,
                'Correlation': corr,
                'Score': score
            })
            
            #clear_output(wait=True)  # Clear previous output
            #print(f'Trend: {trend}, Seasonal: {seasonal}, Seasonal Periods: {seasonal_periods} - MAPE: {mape_value:.4f}, Correlation: {corr:.4f}, Score: {score:.4f}')
            display()
            
            # Update the best model if the current score is better
            if score > best_score:
                best_score = score
                best_model = model
                best_params = {
                    'trend': trend,
                    'seasonal': seasonal,
                    'seasonal_periods': seasonal_periods
                }

print(f'Best Parameters: {best_params} - Best Score: {best_score:.4f}')

# Fit the best model on the entire series and generate forecasts
if best_model:
    # Forecast over the test period
    forecast_test = fit_and_forecast_model(best_model, train_, forecast_horizon=len(test_))
    
    # Forecast future values beyond the dataset
    forecast_future = fit_and_forecast_model(best_model, series_, forecast_horizon=30)  # Example forecast horizon of 30 days
    
    # Plot the results
    if forecast_test is not None and forecast_future is not None:
        plot_forecast(train_, test_, forecast_test, forecast_future)


In [None]:
#Model: ARIMA
best_model = AutoARIMA(seasonal=True, m=12)  # m=12 for monthly data with annual seasonality
series_ = TimeSeries.from_dataframe(series)
test_ = TimeSeries.from_dataframe(test)
train_ = TimeSeries.from_dataframe(train)

# Fit the best model on the entire series and generate forecasts
if best_model:
    # Forecast over the test period
    forecast_test = fit_and_forecast_model(best_model, train_, forecast_horizon=len(test_))
    
    # Forecast future values beyond the dataset
    forecast_future = fit_and_forecast_model(best_model, series_, forecast_horizon=30)  # Example forecast horizon of 30 days
    
    # Plot the results
    if forecast_test is not None and forecast_future is not None:
        plot_forecast(train_, test_, forecast_test, forecast_future)


In [10]:
#Model: Prophet

from darts.models import Prophet
from darts.metrics import mape
from darts import TimeSeries
from darts.utils.timeseries_generation import datetime_attribute_timeseries
import pandas as pd
from tqdm import tqdm
from IPython.display import clear_output  # Ensure clear_output is imported

# Define the parameter grid
parameters = {
    'growth': ['linear','logistic'],
    'n_changepoints': [10, 25],
    'changepoint_range': [0.1, 0.5],
    'seasonality_mode': ['additive', 'multiplicative'],
    'changepoint_prior_scale': [0.01, 0.1],
    'cap': [None, 100],  # Only used if growth is logistic
    'floor': [None, 0],  # Only used if growth is logistic
    'yearly_seasonality': [True], #[True, False],
    'weekly_seasonality':[True], #[True, False],
    'daily_seasonality':[True] #[True, False]
}

# Initialize variables
best_mape = float('inf')
best_model = None
best_params = {}

series, train, test = load_and_prepare_data(
    ticker=ticker,
    period=period,
    time_frame=time_frame,
    use_returns=use_returns,
    train_percentage=train_percentage
)

# Convert to TimeSeries objects
series_ = TimeSeries.from_dataframe(series)
test_ = TimeSeries.from_dataframe(test)
train_ = TimeSeries.from_dataframe(train)

# Grid search
for growth in parameters['growth']:
    for n_changepoints in parameters['n_changepoints']:
        for changepoint_range in parameters['changepoint_range']:
            for seasonality_mode in parameters['seasonality_mode']:
                for changepoint_prior_scale in parameters['changepoint_prior_scale']:
                    for yearly_seasonality in parameters['yearly_seasonality']:
                        for weekly_seasonality in parameters['weekly_seasonality']:
                            for daily_seasonality in parameters['daily_seasonality']:
                                # Set cap and floor for logistic growth only
                                cap = parameters['cap'][1] if growth == 'logistic' else None
                                floor = parameters['floor'][1] if growth == 'logistic' else None
                                
                                # Create and fit the model
                                model = Prophet(
                                    growth=growth,
                                    n_changepoints=n_changepoints,
                                    changepoint_range=changepoint_range,
                                    seasonality_mode=seasonality_mode,
                                    changepoint_prior_scale=changepoint_prior_scale,
                                    cap=cap,
                                    floor=floor
                                )
                                
                                # Add seasonalities


                                
                                model.fit(train_)
                                forecast = model.predict(len(test_))
                                error = mape(test_, forecast)
                                clear_output(wait=True)  # Clear previous output

                                # Update the best model if the current one is better
                                if error < best_mape:
                                    best_mape = error
                                    best_model = model
                                    best_params = {
                                        'growth': growth,
                                        'n_changepoints': n_changepoints,
                                        'changepoint_range': changepoint_range,
                                        'seasonality_mode': seasonality_mode,
                                        'changepoint_prior_scale': changepoint_prior_scale,
                                        'cap': cap,
                                        'floor': floor,
                                        'yearly_seasonality': yearly_seasonality,
                                        'weekly_seasonality': weekly_seasonality,
                                        'daily_seasonality': daily_seasonality
                                    }

print(f'Best Parameters: {best_params} - Best MAPE: {best_mape:.4f}')

# Fit the best model on the entire series and generate forecasts
if best_model:
    # Fit the model on the training series
    best_model.fit(train_)
    
    # Forecast the test period
    forecast_test = best_model.predict(len(test_))
    
    # Fit the model on the entire series and forecast future values
    best_model.fit(series_)
    forecast_future = best_model.predict(len(test_))  # Example forecast horizon of 30 days
    
    # Plot the results
    if forecast_test is not None and forecast_future is not None:
        plot_forecast(train_, test_, forecast_test, forecast_future)


08:05:26 - cmdstanpy - INFO - Chain [1] start processing
08:05:26 - cmdstanpy - INFO - Chain [1] done processing


Best Parameters: {'growth': 'logistic', 'n_changepoints': 10, 'changepoint_range': 0.01, 'seasonality_mode': 'multiplicative', 'changepoint_prior_scale': 0.01, 'cap': 100, 'floor': 0, 'yearly_seasonality': True, 'weekly_seasonality': True, 'daily_seasonality': True} - Best MAPE: 100.0000


08:05:30 - cmdstanpy - INFO - Chain [1] start processing
08:05:30 - cmdstanpy - INFO - Chain [1] done processing


In [None]:
#Model: N-BEATS
from darts.models import NBEATSModel
from darts import TimeSeries

def fit_and_forecast_model(model, series, forecast_horizon, past_covariates=None, future_covariates=None):
    """
    Fit a given model and forecast future values.

    Parameters:
        model: A Darts forecasting model.
        series (TimeSeries): The time series data to fit the model on.
        forecast_horizon (int): The number of days to forecast into the future.
        past_covariates (TimeSeries, optional): Past covariates for models that support them.
        future_covariates (TimeSeries, optional): Future covariates for models that support them.

    Returns:
        forecast (TimeSeries): The forecasted time series data.
    """
    try:
        # Fit the model based on whether it supports covariates
        if hasattr(model, 'fit'):
            if past_covariates is not None and hasattr(model, 'past_covariates'):
                if future_covariates is not None and hasattr(model, 'future_covariates'):
                    # Fit the model with both past and future covariates
                    model.fit(series, past_covariates=past_covariates, future_covariates=future_covariates)
                else:
                    # Fit the model with past covariates only
                    model.fit(series, past_covariates=past_covariates)
            else:
                # Fit the model without covariates
                model.fit(series)

        # Forecast future values based on whether it supports covariates
        if hasattr(model, 'predict'):
            if future_covariates is not None and hasattr(model, 'future_covariates'):
                forecast = model.predict(forecast_horizon, future_covariates=future_covariates)
            else:
                forecast = model.predict(forecast_horizon)
        else:
            raise ValueError("Model does not support prediction")

        return forecast
    except Exception as e:
        print(f"An error occurred: {e}")
        return None


# N-BEATS MODEL
def encode_year(idx):
    return (idx.year - 1950) / 50

n = 10

model = NBEATSModel(
    input_chunk_length=forecast_horizon * n,
    output_chunk_length=forecast_horizon,
    dropout=0.1,
    n_epochs=100,
    batch_size=32,
    optimizer_kwargs={'lr': 1e-3}
)

# Fit the model on the training series with past covariates
forecast_test = fit_and_forecast_model(
    model,
    series=train_series,  # Fit the model on the training series
    forecast_horizon=len(test_series),  # Forecast horizon should match the length of the test series
    past_covariates=train_features  # Use past covariates corresponding to the training series
)

# Fit the model on the entire series with past covariates
forecast_future = fit_and_forecast_model(
    model,
    series=normalized_series,  # Fit the model on the entire series
    forecast_horizon=len(test_series),  # Example forecast horizon of 30 days
    past_covariates=combined_features_ts  # Use past covariates for forecasting
)

# Inverse transform the forecasts to the original scale
if forecast_test is not None:
    forecast_test_original = scaler_series.inverse_transform(forecast_test)
else:
    forecast_test_original = None

if forecast_future is not None:
    forecast_future_original = scaler_series.inverse_transform(forecast_future)
else:
    forecast_future_original = None

# Inverse transform the train and test series to the original scale
train_series_original = scaler_series.inverse_transform(train_series)
test_series_original = scaler_series.inverse_transform(test_series)

# Plot the results
if forecast_test_original is not None and forecast_future_original is not None:
    plot_forecast(
        train_series_original,  # Original scale of the training series
        test_series_original,   # Original scale of the test series
        forecast_test_original, # Forecast on the test period
        forecast_future_original # Forecast for future values
    )

In [None]:
#Model: N-HITS
from darts.models import NHiTSModel

from darts import TimeSeries

def fit_and_forecast_model(model, series, forecast_horizon, past_covariates=None, future_covariates=None):
    """
    Fit a given model and forecast future values.

    Parameters:
        model: A Darts forecasting model.
        series (TimeSeries): The time series data to fit the model on.
        forecast_horizon (int): The number of days to forecast into the future.
        past_covariates (TimeSeries, optional): Past covariates for models that support them.
        future_covariates (TimeSeries, optional): Future covariates for models that support them.

    Returns:
        forecast (TimeSeries): The forecasted time series data.
    """
    try:
        # Fit the model based on whether it supports covariates
        if hasattr(model, 'fit'):
            if past_covariates is not None and hasattr(model, 'past_covariates'):
                if future_covariates is not None and hasattr(model, 'future_covariates'):
                    # Fit the model with both past and future covariates
                    model.fit(series, past_covariates=past_covariates, future_covariates=future_covariates)
                else:
                    # Fit the model with past covariates only
                    model.fit(series, past_covariates=past_covariates)
            else:
                # Fit the model without covariates
                model.fit(series)

        # Forecast future values based on whether it supports covariates
        if hasattr(model, 'predict'):
            if future_covariates is not None and hasattr(model, 'future_covariates'):
                forecast = model.predict(forecast_horizon, future_covariates=future_covariates)
            else:
                forecast = model.predict(forecast_horizon)
        else:
            raise ValueError("Model does not support prediction")

        return forecast
    except Exception as e:
        print(f"An error occurred: {e}")
        return None


# N-BEATS MODEL
def encode_year(idx):
    return (idx.year - 1950) / 50

n = 10

model =  NHiTSModel(
    input_chunk_length=forecast_horizon * n,
    output_chunk_length=forecast_horizon,
    dropout=0.1,
    n_epochs=100,
    batch_size=32,
    optimizer_kwargs={'lr': 1e-3}
)

# Fit the model on the training series with past covariates
forecast_test = fit_and_forecast_model(
    model,
    series=train_series,  # Fit the model on the training series
    forecast_horizon=len(test_series),  # Forecast horizon should match the length of the test series
    past_covariates=train_features  # Use past covariates corresponding to the training series
)

# Fit the model on the entire series with past covariates
forecast_future = fit_and_forecast_model(
    model,
    series=normalized_series,  # Fit the model on the entire series
    forecast_horizon=len(test_series),  # Example forecast horizon of 30 days
    past_covariates=combined_features_ts  # Use past covariates for forecasting
)

# Inverse transform the forecasts to the original scale
if forecast_test is not None:
    forecast_test_original = scaler_series.inverse_transform(forecast_test)
else:
    forecast_test_original = None

if forecast_future is not None:
    forecast_future_original = scaler_series.inverse_transform(forecast_future)
else:
    forecast_future_original = None

# Inverse transform the train and test series to the original scale
train_series_original = scaler_series.inverse_transform(train_series)
test_series_original = scaler_series.inverse_transform(test_series)

# Plot the results
if forecast_test_original is not None and forecast_future_original is not None:
    plot_forecast(
        train_series_original,  # Original scale of the training series
        test_series_original,   # Original scale of the test series
        forecast_test_original, # Forecast on the test period
        forecast_future_original # Forecast for future values
    )

In [None]:
#Model: RNN
from darts import TimeSeries
from pytorch_lightning.callbacks import EarlyStopping


def fit_and_forecast_model(model, series, forecast_horizon, past_covariates=None, future_covariates=None):
    """
    Fit a given model and forecast future values.

    Parameters:
        model: A Darts forecasting model.
        series (TimeSeries): The time series data to fit the model on.
        forecast_horizon (int): The number of days to forecast into the future.
        past_covariates (TimeSeries, optional): Past covariates for models that support them.
        future_covariates (TimeSeries, optional): Future covariates for models that support them.

    Returns:
        forecast (TimeSeries): The forecasted time series data.
    """
    try:
        # Fit the model based on whether it supports covariates
        if hasattr(model, 'fit'):
            if past_covariates is not None and hasattr(model, 'past_covariates'):
                if future_covariates is not None and hasattr(model, 'future_covariates'):
                    # Fit the model with both past and future covariates
                    model.fit(series, past_covariates=past_covariates, future_covariates=future_covariates)
                else:
                    # Fit the model with past covariates only
                    model.fit(series, past_covariates=past_covariates)
            else:
                # Fit the model without covariates
                model.fit(series)

        # Forecast future values based on whether it supports covariates
        if hasattr(model, 'predict'):
            if future_covariates is not None and hasattr(model, 'future_covariates'):
                forecast = model.predict(forecast_horizon, future_covariates=future_covariates)
            else:
                forecast = model.predict(forecast_horizon)
        else:
            raise ValueError("Model does not support prediction")

        return forecast
    except Exception as e:
        print(f"An error occurred: {e}")
        return None



# RNN MODEL
def encode_year(idx):
    return (idx.year - 1950) / 50

# Define the BlockRNNModel with named functions in add_encoders
# Define the BlockRNNModel with named functions in add_encoders
'''
pl_trainer_kwargs = {
    "callbacks": [my_stopper],
}'''


n = 5

model = BlockRNNModel(
    input_chunk_length=forecast_horizon*n,
    output_chunk_length=forecast_horizon,
    hidden_dim=25*n,
    n_rnn_layers=1*n,
    dropout=0.1,
    n_epochs=100,
    batch_size=32,
    optimizer_kwargs={'lr': 1e-3},
    add_encoders={
        'cyclic': {'future': ['month']},
        'datetime_attribute': {'future': ['hour', 'dayofweek']},
        'custom': {'past': [encode_year]},
        'transformer': scaler_features
    },

)

# Fit the model on the training series with past covariates
forecast_test = fit_and_forecast_model(
    model,
    series=train_series,  # Fit the model on the training series
    forecast_horizon=len(test_series),  # Forecast horizon should match the length of the test series
    past_covariates=train_features  # Use past covariates corresponding to the training series
)

# Fit the model on the entire series with past covariates
forecast_future = fit_and_forecast_model(
    model,
    series=normalized_series,  # Fit the model on the entire series
    forecast_horizon=30,  # Example forecast horizon of 30 days
    past_covariates=combined_features_ts# Use past covariates for forecasting
)

# Inverse transform the forecasts to the original scale
if forecast_test is not None:
    forecast_test_original = scaler_series.inverse_transform(forecast_test)
else:
    forecast_test_original = None

if forecast_future is not None:
    forecast_future_original = scaler_series.inverse_transform(forecast_future)
else:
    forecast_future_original = None

# Inverse transform the train and test series to the original scale
train_series_original = scaler_series.inverse_transform(train_series)
test_series_original = scaler_series.inverse_transform(test_series)

# Plot the results
if forecast_test_original is not None and forecast_future_original is not None:
    plot_forecast(
        train_series_original,  # Original scale of the training series
        test_series_original,   # Original scale of the test series
        forecast_test_original, # Forecast on the test period
        forecast_future_original # Forecast for future values
    )

In [None]:
#Model: LSTM
from darts import TimeSeries
from darts.models import RNNModel  # Replace with the LSTM model from darts


def fit_and_forecast_model(model, series, forecast_horizon, past_covariates=None, future_covariates=None):
    """
    Fit a given model and forecast future values.

    Parameters:
        model: A Darts forecasting model.
        series (TimeSeries): The time series data to fit the model on.
        forecast_horizon (int): The number of days to forecast into the future.
        past_covariates (TimeSeries, optional): Past covariates for models that support them.
        future_covariates (TimeSeries, optional): Future covariates for models that support them.

    Returns:
        forecast (TimeSeries): The forecasted time series data.
    """
    try:
        # Fit the model based on whether it supports covariates
        if hasattr(model, 'fit'):
            if past_covariates is not None and hasattr(model, 'past_covariates'):
                if future_covariates is not None and hasattr(model, 'future_covariates'):
                    # Fit the model with both past and future covariates
                    model.fit(series, past_covariates=past_covariates, future_covariates=future_covariates)
                else:
                    # Fit the model with past covariates only
                    model.fit(series, past_covariates=past_covariates)
            else:
                # Fit the model without covariates
                model.fit(series)

        # Forecast future values based on whether it supports covariates
        if hasattr(model, 'predict'):
            if future_covariates is not None and hasattr(model, 'future_covariates'):
                forecast = model.predict(forecast_horizon, future_covariates=future_covariates)
            else:
                forecast = model.predict(forecast_horizon)
        else:
            raise ValueError("Model does not support prediction")

        return forecast
    except Exception as e:
        print(f"An error occurred: {e}")
        return None


# LSTM MODEL
def encode_year(idx):
    return (idx.year - 1950) / 50

n = 10

model = BlockRNNModel(
    input_chunk_length=forecast_horizon*n,
    output_chunk_length=forecast_horizon,
    hidden_dim=25*n,
    dropout=0.1,
    n_epochs=100,
    batch_size=32,
    optimizer_kwargs={'lr': 1e-3},
    model='LSTM'  # Specify LSTM or other RNN types if supported
)

# Fit the model on the training series with past covariates
forecast_test = fit_and_forecast_model(
    model,
    series=train_series,  # Fit the model on the training series
    forecast_horizon=len(test_series),  # Forecast horizon should match the length of the test series
    past_covariates=train_features  # Use past covariates corresponding to the training series
)

# Fit the model on the entire series with past covariates
forecast_future = fit_and_forecast_model(
    model,
    series=normalized_series,  # Fit the model on the entire series
    forecast_horizon=len(test_series),  # Example forecast horizon of 30 days
    past_covariates=combined_features_ts  # Use past covariates for forecasting
)

# Inverse transform the forecasts to the original scale
if forecast_test is not None:
    forecast_test_original = scaler_series.inverse_transform(forecast_test)
else:
    forecast_test_original = None

if forecast_future is not None:
    forecast_future_original = scaler_series.inverse_transform(forecast_future)
else:
    forecast_future_original = None

# Inverse transform the train and test series to the original scale
train_series_original = scaler_series.inverse_transform(train_series)
test_series_original = scaler_series.inverse_transform(test_series)

# Plot the results
if forecast_test_original is not None and forecast_future_original is not None:
    plot_forecast(
        train_series_original,  # Original scale of the training series
        test_series_original,   # Original scale of the test series
        forecast_test_original, # Forecast on the test period
        forecast_future_original # Forecast for future values
    )


In [None]:
#Model: Transformer
#LSTM MODEL
from darts import TimeSeries
from darts.models import TransformerModel


def fit_and_forecast_model(model, series, forecast_horizon, past_covariates=None, future_covariates=None):
    """
    Fit a given model and forecast future values.

    Parameters:
        model: A Darts forecasting model.
        series (TimeSeries): The time series data to fit the model on.
        forecast_horizon (int): The number of days to forecast into the future.
        past_covariates (TimeSeries, optional): Past covariates for models that support them.
        future_covariates (TimeSeries, optional): Future covariates for models that support them.

    Returns:
        forecast (TimeSeries): The forecasted time series data.
    """
    try:
        # Fit the model based on whether it supports covariates
        if hasattr(model, 'fit'):
            if past_covariates is not None and hasattr(model, 'past_covariates'):
                if future_covariates is not None and hasattr(model, 'future_covariates'):
                    # Fit the model with both past and future covariates
                    model.fit(series, past_covariates=past_covariates, future_covariates=future_covariates)
                else:
                    # Fit the model with past covariates only
                    model.fit(series, past_covariates=past_covariates)
            else:
                # Fit the model without covariates
                model.fit(series)

        # Forecast future values based on whether it supports covariates
        if hasattr(model, 'predict'):
            if future_covariates is not None and hasattr(model, 'future_covariates'):
                forecast = model.predict(forecast_horizon, future_covariates=future_covariates)
            else:
                forecast = model.predict(forecast_horizon)
        else:
            raise ValueError("Model does not support prediction")

        return forecast
    except Exception as e:
        print(f"An error occurred: {e}")
        return None


# LSTM MODEL
def encode_year(idx):
    return (idx.year - 1950) / 50

n = 10

model = TransformerModel(
    input_chunk_length=forecast_horizon*n,
    output_chunk_length=forecast_horizon,
    dropout=0.1,
    n_epochs=100,
    batch_size=32,
    optimizer_kwargs={'lr': 1e-3},
    )

# Fit the model on the training series with past covariates
forecast_test = fit_and_forecast_model(
    model,
    series=train_series,  # Fit the model on the training series
    forecast_horizon=len(test_series),  # Forecast horizon should match the length of the test series
    past_covariates=train_features  # Use past covariates corresponding to the training series
)

# Fit the model on the entire series with past covariates
forecast_future = fit_and_forecast_model(
    model,
    series=normalized_series,  # Fit the model on the entire series
    forecast_horizon=len(test_series),  # Example forecast horizon of 30 days
    past_covariates=combined_features_ts  # Use past covariates for forecasting
)

# Inverse transform the forecasts to the original scale
if forecast_test is not None:
    forecast_test_original = scaler_series.inverse_transform(forecast_test)
else:
    forecast_test_original = None

if forecast_future is not None:
    forecast_future_original = scaler_series.inverse_transform(forecast_future)
else:
    forecast_future_original = None

# Inverse transform the train and test series to the original scale
train_series_original = scaler_series.inverse_transform(train_series)
test_series_original = scaler_series.inverse_transform(test_series)

# Plot the results
if forecast_test_original is not None and forecast_future_original is not None:
    plot_forecast(
        train_series_original,  # Original scale of the training series
        test_series_original,   # Original scale of the test series
        forecast_test_original, # Forecast on the test period
        forecast_future_original # Forecast for future values
    )


In [None]:
#VOLATILITY: LOAD DATA
from dataclasses import dataclass
import plotly.express as px
from plotly.subplots import make_subplots
from plotly import graph_objects as go
import matplotlib.pyplot as plt
from collections import Counter
from IPython.display import display
from sklearn.preprocessing import StandardScaler

plt.rcParams["figure.figsize"] = (20, 7)

import yfinance as yf
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import coint
#from numpy_ext import rolling_apply
from arch import arch_model

#from Quantapp.Algorithm   import Algorithm
#from Quantapp.Computation import Computation
#from Quantapp.Plot        import Plot
#from Quantapp.DataManager import DataManager
#from Quantapp.Universe    import Universe

time_frames = [10]

csv_file_paths = [
    'csv_files/S&P 500.csv',
]


period = '1y'
interval='1d'
# Retrieve benchmark data using yfinance
benchmark_ticker = 'SPY'
benchmark = yf.download(benchmark_ticker, period=period, interval=interval)
benchmark_returns = benchmark['Close'].pct_change().fillna(0)

# Retrieve risk-free rate data using yfinance
rf_ticker = '^IRX'
rf_data = yf.download(rf_ticker, period=period, interval=interval)
rf = rf_data['Close'].pct_change().fillna(0)  # Convert to returns

# If the data needs to be reindexed like benchmark
rf = rf.reindex_like(benchmark_returns)


In [None]:
#Model: LPPLS

"""from lppls import lppls, data_loader
import numpy as np
import pandas as pd
from datetime import datetime as dt
import yfinance as yf
%matplotlib inline
# read example dataset into df 
#data = data_loader.nasdaq_dotcom()
data = yf.Ticker(ticker).history(period=period)
data['Date'] = pd.to_datetime(data.index).strftime("%Y-%m-%d")
# convert time to ordinal
time = [pd.Timestamp.toordinal(dt.strptime(t1, '%Y-%m-%d')) for t1 in data['Date']]

# create list of observation data
price = np.log(data['Close'].values)

# create observations array (expected format for LPPLS observations)
observations = np.array([time, price])

# set the max number for searches to perform before giving-up
# the literature suggests 25
MAX_SEARCHES = 25

# instantiate a new LPPLS model with the Nasdaq Dot-com bubble dataset
lppls_model = lppls.LPPLS(observations=observations)

# fit the model to the data and get back the params
tc, m, w, a, b, c, c1, c2, O, D = lppls_model.fit(MAX_SEARCHES)

# visualize the fit
lppls_model.plot_fit()

# should give a plot like the following...

# compute the confidence indicator
res = lppls_model.mp_compute_nested_fits(
    workers=8,
    window_size=120, 
    smallest_window_size=30, 
    outer_increment=1, 
    inner_increment=5, 
    max_searches=25,
    # filter_conditions_config={} # not implemented in 0.6.x
)

lppls_model.plot_confidence_indicators(res)
# should give a plot like the following...""
"""
"""from lppls import lppls_cmaes
lppls_model = lppls_cmaes.LPPLSCMAES(observations=observations)
tc, m, w, a, b, c, c1, c2, O, D = lppls_model.fit(max_iteration=2500, pop_size=4)"""

In [None]:
#VOLATILITY: PLOT
#am = arch_model(returns, p=1, o=1, q=1, power=1.0, dist="StudentsT")
arch_models = {}

arch_models['garch']      = arch_model(benchmark_returns)
arch_models['egarch']     = arch_model(benchmark_returns,vol='EGARCH')
arch_models['gjrgarch']   = arch_model(benchmark_returns, p=1, o=1, q=1)
arch_models['tgarch']     = arch_model(benchmark_returns, p=1, o=1, q=1, power=1.0)
arch_models['studenttgarch']   = arch_model(benchmark_returns, p=1, o=1, q=1, power=1.0, dist="StudentsT")

arch_model_results = {}

for key, value in arch_models.items():
    arch_model_results[key] = value.fit(disp="off")

arch_model_results["garch"].plot()
arch_model_results["gjrgarch"].plot()
#results = garch.fit(disp=off)
#results.summary()
#results.plot()