In [None]:
#Parameters / Functions /Classes
import yfinance as yf
import pandas as pd
from darts import TimeSeries
from darts.models import ExponentialSmoothing, ARIMA
import plotly.graph_objs as go
from darts.utils.utils import ModelMode, SeasonalityMode
from darts.metrics import mape
from scipy.stats import spearmanr
import numpy as np
from scipy.stats import pearsonr  # Import pearsonr
from IPython.display import clear_output, display

# Define parameters
ticker = 'SPY'
period = '2y'
time_frame = 21
use_returns = False
train_percentage = 0.8
forecast_horizon = 30  # Number of days to forecast into the future

def load_and_prepare_data(ticker='SPY', period='5y', time_frame=21, use_returns=False, train_percentage=0.8):
    """
    Load data from yfinance, prepare it for modeling, and split it into training and test sets.
    
    Parameters:
        ticker (str): The stock ticker symbol.
        period (str): Data period, e.g., '5y' for 5 years.
        time_frame (int): Time frame for percentage change.
        use_returns (bool): Whether to use returns instead of raw prices.
        train_percentage (float): The percentage of data to use for training (between 0 and 1).
    
    Returns:
        tuple: (series, train_series, test_series) where:
            - series is the complete TimeSeries object with either raw prices or percentage returns.
            - train_series is the TimeSeries object for the training data.
            - test_series is the TimeSeries object for the test data.
    """
    # Retrieve data from yfinance
    data = yf.download(ticker, period=period)
    
    # Convert to pandas DataFrame
    df = pd.DataFrame(data['Close']).reset_index()
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.set_index('Date')
    df = df.asfreq('B')  # 'B' stands for business day frequency
    df = df.fillna(method='ffill')  # Forward fill missing values

    # Calculate returns if requested
    if use_returns:
        df['Return'] = df['Close'].pct_change(periods=time_frame).dropna()
        df = df.dropna(subset=['Return'])  # Drop rows with NaN values after pct_change

        # Create TimeSeries object for returns
        series = TimeSeries.from_dataframe(df, None, 'Return', fill_missing_dates=True, freq='B')
    else:
        # Create TimeSeries object for raw prices
        series = TimeSeries.from_dataframe(df, None, 'Close', fill_missing_dates=True, freq='B')

    # Split data into training and test sets
    split_index = int(len(series) * train_percentage)
    train, test = series.split_after(series.time_index[split_index])
    
    return series, train, test

def plot_forecast(series, forecast, title='Price Forecast'):
    """
    Plot the actual and forecasted data with highlighted training, testing, and forecast periods.
    
    Parameters:
        series (TimeSeries): The full time series data.
        forecast (TimeSeries): The forecasted data.
        title (str): Title of the plot.
    
    Returns:
        fig (plotly.graph_objs.Figure): The plotly figure object.
    """
    # Extract training and forecast periods
    split_index = int(len(series) * train_percentage)
    train_end_date = series.time_index[split_index - 1]
    test_end_date = series.time_index[-1]
    forecast_start_date = test_end_date + pd.DateOffset(days=1)
    forecast_end_date = forecast.time_index[-1]

    actual = go.Scatter(x=series.time_index, y=series.values().flatten(), mode='lines', name='Actual')
    forecasted = go.Scatter(x=forecast.time_index, y=forecast.values().flatten(), mode='lines', name='Forecast')

    layout = go.Layout(
        title=title, 
        xaxis=dict(title='Date'), 
        yaxis=dict(title='Price'),
        shapes=[
            # Highlight the training period
            dict(
                type="rect",
                xref="x",
                yref="paper",
                x0=series.time_index[0].strftime('%Y-%m-%d'),
                y0=0,
                x1=train_end_date.strftime('%Y-%m-%d'),
                y1=1,
                fillcolor="LightBlue",
                opacity=0.3,
                layer="below",
                line_width=0,
            ),
            # Highlight the testing period
            dict(
                type="rect",
                xref="x",
                yref="paper",
                x0=(train_end_date + pd.DateOffset(days=1)).strftime('%Y-%m-%d'),
                y0=0,
                x1=test_end_date.strftime('%Y-%m-%d'),
                y1=1,
                fillcolor="LightCoral",
                opacity=0.3,
                layer="below",
                line_width=0,
            ),
            # Highlight the forecast period
            dict(
                type="rect",
                xref="x",
                yref="paper",
                x0=forecast_start_date.strftime('%Y-%m-%d'),
                y0=0,
                x1=forecast_end_date.strftime('%Y-%m-%d'),
                y1=1,
                fillcolor="LightGreen",
                opacity=0.3,
                layer="below",
                line_width=0,
            )
        ],
        annotations=[
            # Annotation for training period
            dict(
                x=train_end_date,
                y=max(series.values().flatten()),
                xref="x",
                yref="y",
                text="Training Period",
                showarrow=True,
                arrowhead=7,
                ax=0,
                ay=-40,
                font=dict(size=12, color="blue"),
                bgcolor="LightBlue",
                opacity=0.7
            ),
            # Annotation for testing period
            dict(
                x=test_end_date,
                y=max(series.values().flatten()),
                xref="x",
                yref="y",
                text="Testing Period",
                showarrow=True,
                arrowhead=7,
                ax=0,
                ay=-40,
                font=dict(size=12, color="red"),
                bgcolor="LightCoral",
                opacity=0.7
            ),
            # Annotation for forecast period
            dict(
                x=forecast_start_date,
                y=max(forecast.values().flatten()),
                xref="x",
                yref="y",
                text="Forecast Period",
                showarrow=True,
                arrowhead=7,
                ax=0,
                ay=-40,
                font=dict(size=12, color="green"),
                bgcolor="LightGreen",
                opacity=0.7
            )
        ]
    )
    
    fig = go.Figure(data=[actual, forecasted], layout=layout)
    return fig

def fit_and_forecast_model(model, series, forecast_horizon=30):
    """
    Fit a given model and forecast future values.
    
    Parameters:
        model: A Darts forecasting model.
        series (TimeSeries): The full time series data.
        forecast_horizon (int): The number of days to forecast into the future.
    
    Returns:
        future_forecast (TimeSeries): The forecasted time series data.
    """
    model.fit(series)  # Fit on the entire series

    # Predict future values
    future_forecast = model.predict(forecast_horizon)

    # Align future forecast index with future dates
    last_date = series.time_index[-1]
    future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=forecast_horizon, freq='B')
    future_forecast = TimeSeries.from_times_and_values(future_dates, future_forecast.values())

    return future_forecast

# Load and prepare data
series, train, test = load_and_prepare_data(ticker=ticker, period=period, time_frame=time_frame, use_returns=use_returns, train_percentage=train_percentage)




In [None]:
#Exponential smoothing / Default
model = ExponentialSmoothing()

# Forecast future values using the chosen model
future_forecast = fit_and_forecast_model(model, series, forecast_horizon)

# Plot the results using Plotly
fig = plot_forecast(series, future_forecast)
fig.show()


In [None]:
# Exponential soothing parameter optimization / MAPE
parameters = {
    #'trend': [ModelMode.ADDITIVE, ModelMode.MULTIPLICATIVE],
    #'seasonal': [SeasonalityMode.ADDITIVE, SeasonalityMode.MULTIPLICATIVE],
    'trend': [ModelMode.ADDITIVE] if use_returns else [ModelMode.ADDITIVE, ModelMode.MULTIPLICATIVE],
    'seasonal': [SeasonalityMode.ADDITIVE] if use_returns else[SeasonalityMode.ADDITIVE, SeasonalityMode.MULTIPLICATIVE],
    'seasonal_periods': list(range(2, 48))
}
# Iterate through the parameter grid and find the best model
best_mape = float('inf')
best_model = None
best_params = {}

for trend in parameters['trend']:
    for seasonal in parameters['seasonal']:
        for seasonal_periods in parameters['seasonal_periods']:
            model = ExponentialSmoothing(
                trend=trend,
                seasonal=seasonal,
                seasonal_periods=seasonal_periods
            )
            model.fit(train)
            forecast = model.predict(len(test))
            error = mape(test, forecast)
            clear_output(wait=True)  # Clear previous output
            print(f'Trend: {trend}, Seasonal: {seasonal}, Seasonal Periods: {seasonal_periods} - Pearson Correlation: {corr:.4f}')
            display()

            if error < best_mape:
                best_mape = error
                best_model = model
                best_params = {
                    'trend': trend,
                    'seasonal': seasonal,
                    'seasonal_periods': seasonal_periods
                }

print(f'Best Parameters: {best_params} - Best MAPE: {best_mape:.4f}')

# Forecast future values using the chosen model
future_forecast = fit_and_forecast_model(best_model, series, forecast_horizon)

# Plot the results using Plotly
fig = plot_forecast(series, future_forecast)
fig.show()

In [None]:
#Exponential smoothing / Pearson Correlation
parameters = {
    'trend': [ModelMode.ADDITIVE] if use_returns else [ModelMode.ADDITIVE, ModelMode.MULTIPLICATIVE],
    'seasonal': [SeasonalityMode.ADDITIVE] if use_returns else [SeasonalityMode.ADDITIVE, SeasonalityMode.MULTIPLICATIVE],
    'seasonal_periods': list(range(2, 48))
}

# Initialize variables for the best model
best_corr = -float('inf')  # Start with the lowest possible correlation
best_model = None
best_params = {}

# Iterate through the parameter grid
for trend in parameters['trend']:
    for seasonal in parameters['seasonal']:
        for seasonal_periods in parameters['seasonal_periods']:
            model = ExponentialSmoothing(
                trend=trend,
                seasonal=seasonal,
                seasonal_periods=seasonal_periods
            )
            model.fit(train)
            forecast = model.predict(len(test))
            
            # Convert forecasts and actual values to numpy arrays for correlation calculation
            forecast_np = forecast.pd_dataframe().values.flatten()
            test_np = test.pd_dataframe().values.flatten()
            
            # Calculate Pearson correlation for this forecast
            if len(forecast_np) > 1 and len(test_np) > 1:  # Ensure enough data points
                corr, _ = pearsonr(test_np, forecast_np)
            else:
                corr = np.nan
            
            clear_output(wait=True)  # Clear previous output
            print(f'Trend: {trend}, Seasonal: {seasonal}, Seasonal Periods: {seasonal_periods} - Pearson Correlation: {corr:.4f}')
            display()

            if corr > best_corr:
                best_corr = corr
                best_model = model
                best_params = {
                    'trend': trend,
                    'seasonal': seasonal,
                    'seasonal_periods': seasonal_periods
                }

print(f'Best Parameters: {best_params} - Best Pearson Correlation: {best_corr:.4f}')


# Forecast future values using the chosen model
future_forecast = fit_and_forecast_model(best_model, series, forecast_horizon)

# Plot the results using Plotly
fig = plot_forecast(series, future_forecast)
fig.show()

In [None]:
#Rolling Pearson Correlation of Forecast
import numpy as np
import plotly.graph_objects as go
from darts import TimeSeries
from scipy.stats import pearsonr

# Define rolling window size
rolling_window_size = 20  # Adjust this as needed

def rolling_pearson_corr(actual_series, forecast_series, window_size):
    rolling_corrs = []
    num_windows = len(actual_series) - window_size + 1
    
    for start in range(num_windows):
        end = start + window_size
        actual_window = actual_series[start:end]
        forecast_window = forecast_series[start:end]
        
        # Calculate Pearson correlation for this window
        if len(actual_window) > 1 and len(forecast_window) > 1:  # Ensure enough data points
            corr, _ = pearsonr(actual_window, forecast_window)
            rolling_corrs.append(corr)
        else:
            rolling_corrs.append(np.nan)  # Not enough data to compute correlation
    
    return rolling_corrs

# Example usage
# Assuming `test_series` and `forecast_series` are your Darts TimeSeries objects
# Convert your time series data to Darts TimeSeries if not already
# train_series = TimeSeries.from_dataframe(train_df)
# test_series = TimeSeries.from_dataframe(test_df)
# forecast_series = best_model.predict(len(test_series))

# Convert TimeSeries to numpy arrays
test_np = test.pd_dataframe().values.flatten()
forecast_np = forecast.pd_dataframe().values.flatten()

# Compute rolling Pearson correlation
rolling_corrs = rolling_pearson_corr(test_np, forecast_np, rolling_window_size)

# Create a Plotly figure
fig = go.Figure()

# Add rolling Pearson correlation trace
fig.add_trace(
    go.Scatter(
        x=list(range(1, len(rolling_corrs) + 1)),
        y=rolling_corrs,
        mode='lines+markers',
        name='Rolling Pearson Correlation',
        marker=dict(color='blue'),
        line=dict(color='blue')
    )
)

# Update layout for better visualization
fig.update_layout(
    title='Rolling Pearson Correlation of Forecast',
    xaxis_title='Rolling Window',
    yaxis_title='Pearson Correlation',
    template='plotly_white'
)

# Show the plot
fig.show()


In [None]:
# # Exponential soothing parameter optimization / Spearman Corr
parameters = {
    'trend': [ModelMode.ADDITIVE] if use_returns else [ModelMode.ADDITIVE, ModelMode.MULTIPLICATIVE],
    'seasonal': [SeasonalityMode.ADDITIVE] if use_returns else [SeasonalityMode.ADDITIVE, SeasonalityMode.MULTIPLICATIVE],
    'seasonal_periods': list(range(2, 48))
}

# Initialize variables for the best model
best_corr = -float('inf')  # Start with the lowest possible correlation
best_model = None
best_params = {}

# Iterate through the parameter grid
for trend in parameters['trend']:
    for seasonal in parameters['seasonal']:
        for seasonal_periods in parameters['seasonal_periods']:
            model = ExponentialSmoothing(
                trend=trend,
                seasonal=seasonal,
                seasonal_periods=seasonal_periods
            )
            model.fit(train)
            forecast = model.predict(len(test))
            
            # Convert forecasts and actual values to numpy arrays for correlation calculation
            forecast_np = forecast.pd_dataframe().values.flatten()
            test_np = test.pd_dataframe().values.flatten()
            
            # Calculate Spearman correlation for this forecast
            if len(forecast_np) > 1 and len(test_np) > 1:  # Ensure enough data points
                corr, _ = spearmanr(test_np, forecast_np)
            else:
                corr = np.nan
            
            clear_output(wait=True)  # Clear previous output
            print(f'Trend: {trend}, Seasonal: {seasonal}, Seasonal Periods: {seasonal_periods} - Pearson Correlation: {corr:.4f}')
            display()
            
            if corr > best_corr:
                best_corr = corr
                best_model = model
                best_params = {
                    'trend': trend,
                    'seasonal': seasonal,
                    'seasonal_periods': seasonal_periods
                }

print(f'Best Parameters: {best_params} - Best Spearman Correlation: {best_corr:.4f}')


# Forecast future values using the chosen model
future_forecast = fit_and_forecast_model(best_model, series, forecast_horizon)

# Plot the results using Plotly
fig = plot_forecast(series, future_forecast)
fig.show()

In [None]:
# # Exponential soothing parameter optimization / Weighted

import yfinance as yf
import pandas as pd
from darts import TimeSeries
from darts.models import ExponentialSmoothing
import plotly.graph_objs as go
from darts.utils.utils import ModelMode, SeasonalityMode
from darts.metrics import mape
from scipy.stats import spearmanr
import numpy as np

# Define parameter grid
parameters = {
    'trend': [ModelMode.ADDITIVE] if use_returns else [ModelMode.ADDITIVE, ModelMode.MULTIPLICATIVE],
    'seasonal': [SeasonalityMode.ADDITIVE] if use_returns else [SeasonalityMode.ADDITIVE, SeasonalityMode.MULTIPLICATIVE],
    'seasonal_periods': list(range(2, 48))
}

# Initialize variables for the best model
best_score = -float('inf')  # Start with the lowest possible score
best_model = None
best_params = {}

# Initialize variables to keep track of model performance
model_performance = []

# Iterate through the parameter grid
for trend in parameters['trend']:
    for seasonal in parameters['seasonal']:
        for seasonal_periods in parameters['seasonal_periods']:
            model = ExponentialSmoothing(
                trend=trend,
                seasonal=seasonal,
                seasonal_periods=seasonal_periods
            )
            model.fit(train)
            forecast = model.predict(len(test))
            
            # Convert forecasts and actual values to numpy arrays for correlation calculation
            forecast_np = forecast.pd_dataframe().values.flatten()
            test_np = test.pd_dataframe().values.flatten()
            
            # Calculate Spearman correlation for this forecast
            if len(forecast_np) > 1 and len(test_np) > 1:  # Ensure enough data points
                corr, _ = spearmanr(test_np, forecast_np)
            else:
                corr = np.nan
            
            # Calculate MAPE for this forecast
            mape_value = mape(test, forecast)
            
            # Combine metrics (simple example: weighted sum)
            # Adjust weights as needed
            weight_mape = 0.5
            weight_corr = 0.5
            score = weight_mape * (1 - mape_value) + weight_corr * corr
            
            # Store model performance
            model_performance.append({
                'trend': trend,
                'seasonal': seasonal,
                'seasonal_periods': seasonal_periods,
                'MAPE': mape_value,
                'Correlation': corr,
                'Score': score
            })
            
            clear_output(wait=True)  # Clear previous output
            print(f'Trend: {trend}, Seasonal: {seasonal}, Seasonal Periods: {seasonal_periods} - MAPE: {mape_value:.4f}, Correlation: {corr:.4f}, Score: {score:.4f}')
            display()
            

            
            # Update the best model if the current score is better
            if score > best_score:
                best_score = score
                best_model = model
                best_params = {
                    'trend': trend,
                    'seasonal': seasonal,
                    'seasonal_periods': seasonal_periods
                }

print(f'Best Parameters: {best_params} - Best Score: {best_score:.4f}')


# Forecast future values using the chosen model
future_forecast = fit_and_forecast_model(best_model, series, forecast_horizon)

# Plot the results using Plotly
fig = plot_forecast(series, future_forecast)
fig.show()


In [None]:
# Define SARIMA parameters
p = 1
d = 1
q = 1
P = 1
D = 1
Q = 1
seasonal_periods = 12  # For example, monthly data with annual seasonality

# Initialize ARIMA model with seasonal order
model = ARIMA(
    p=p,
    d=d,
    q=q,
    seasonal_order=(P, D, Q, seasonal_periods)  # Correct format for seasonal order
)



# Forecast future values using the chosen model
future_forecast = fit_and_forecast_model(model, series, forecast_horizon)

# Plot the results using Plotly
fig = plot_forecast(series, future_forecast)
fig.show()
