## Machine Learning Models

In [None]:
import yfinance as yf

prove = yf.download('EC', '2024-01-01', '2025-01-30')

print(prove.tail(10))


In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import numpy as np
import yfinance as yf
import logging
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)

# Function to load and preprocess stock data
def load_data(ticker, start_date, end_date):
    try:
        data = yf.download(ticker, start=start_date, end=end_date)
        if 'Close' not in data.columns:
            raise ValueError("No 'Close' price found in the data.")
        
        data = data[['Close']].dropna().asfreq('B')
        data['Close'] = data['Close'].ffill().interpolate(method='time').interpolate(method='spline', order=3)
        return data
    except Exception as e:
        logger.error(f"Error loading data: {e}")
        return None

# Add time-based features
def add_features(data):
    data = data.copy()
    data['Day'] = data.index.day
    data['Month'] = data.index.month
    data['Year'] = data.index.year
    data['DayOfWeek'] = data.index.dayofweek
    data['SMA_10'] = data['Close'].rolling(window=10, min_periods=1).mean()
    data['SMA_30'] = data['Close'].rolling(window=30, min_periods=1).mean()
    return data

# Rolling window split
def split_data(data, window_size=0.8, step_size=5):
    n_samples = len(data)
    window_size = int(n_samples * window_size) if isinstance(window_size, float) else window_size
    window_size = max(1, min(window_size, n_samples - 1))
    
    splits = []
    for start in range(0, n_samples - window_size, step_size):
        train = data.iloc[start:start + window_size]
        test = data.iloc[start + window_size:start + window_size + step_size]
        if len(test) == 0:
            break
        splits.append((train, test))
    
    logger.info(f"Generated {len(splits)} rolling windows with size {window_size} and step {step_size}.")
    return splits

# Train model with rolling window
def train_model(model, param_grid, data_splits):
    all_y_test, all_y_pred = [], []
    best_model = None
    
    for i, (train, test) in enumerate(data_splits):
        X_train, y_train = train.drop(columns=['Close']), train[['Close']].squeeze()
        X_test, y_test = test.drop(columns=['Close']), test[['Close']].squeeze()

        if X_train.empty or X_test.empty:
            logger.error(f"Skipping window {i+1}: No valid features for training.")
            continue

        try:
            if param_grid:
                grid_search = GridSearchCV(model, param_grid, cv=3, scoring='neg_mean_absolute_error', n_jobs=-1)
                grid_search.fit(X_train, y_train)
                best_model = grid_search.best_estimator_
            else:
                best_model = model.fit(X_train, y_train)

            y_pred = best_model.predict(X_test)

            # Ensure values are arrays before extending lists
            all_y_test.extend(np.atleast_1d(y_test))
            all_y_pred.extend(np.atleast_1d(y_pred))
        except Exception as e:
            logger.error(f"Error training {type(model).__name__} on window {i+1}: {e}")
            continue

    return {'y_test_all': all_y_test, 'y_pred_all': all_y_pred, 'best_model': best_model}

# Evaluate model
def evaluate_model(y_test, y_pred, model_name="Model"):
    if len(y_test) == 0 or len(y_pred) == 0:
        logger.error(f"No valid predictions for {model_name}. Skipping evaluation.")
        return
    mae, mse, r2 = mean_absolute_error(y_test, y_pred), mean_squared_error(y_test, y_pred), r2_score(y_test, y_pred)
    logger.info(f"{model_name} - MAE: {mae:.4f}, MSE: {mse:.4f}, R²: {r2:.4f}")

# Plot forecasts
def plot_forecast_single_step(data_splits, models):
    plt.figure(figsize=(12, 6))
    best_models = {}
    
    for name, (model, param_grid) in models.items():
        results = train_model(model, param_grid, data_splits)
        if not results['y_pred_all']:
            logger.error(f"No valid predictions for {name}. Skipping plot.")
            continue
        
        best_models[name] = results['best_model']
        test_dates = np.concatenate([test.index for _, test in data_splits])
        plt.plot(test_dates, results['y_pred_all'], linestyle='dashed', marker='x', label=f'{name} (Predicted)')
        evaluate_model(results['y_test_all'], results['y_pred_all'], name)
    
    full_test_dates = np.concatenate([test.index for _, test in data_splits])
    full_test_close = np.concatenate([test['Close'].values for _, test in data_splits])
    plt.plot(full_test_dates, full_test_close, label='Actual', marker='o', color='black', linewidth=2)
    
    # Plot moving averages
    plt.plot(full_test_dates, np.concatenate([test['SMA_10'].values for _, test in data_splits]), label='SMA 10', linestyle='dotted', color='blue')
    plt.plot(full_test_dates, np.concatenate([test['SMA_30'].values for _, test in data_splits]), label='SMA 30', linestyle='dotted', color='red')
    
    plt.title('Rolling Window Forecasts (Single-Step)', fontsize=14)
    plt.xlabel('Time', fontsize=12)
    plt.ylabel('Close Price', fontsize=12)
    plt.legend(fontsize=10)
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
    plt.show(block=True)
    return best_models

# Define models and hyperparameters
models = {
    'DecisionTree': (DecisionTreeRegressor(random_state=42), {'max_depth': [5, 10], 'min_samples_split': [2, 5]}),
    'RandomForest': (RandomForestRegressor(random_state=42), {'n_estimators': [50, 100], 'max_depth': [3, 5]}),
    'LinearRegression': (LinearRegression(), {}),
    'SVM': (SVR(), {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}),
    'PerceptronNN': (MLPRegressor(max_iter=500, random_state=42), {'hidden_layer_sizes': [(50,), (100,)], 'alpha': [0.0001, 0.001]})
}

# Run the pipeline
FinIns_close = add_features(load_data('EC', '2022-01-01', '2025-01-30'))
data_splits = split_data(FinIns_close, window_size=0.8, step_size=20)
plot_forecast_single_step(data_splits, models)


In [27]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import yfinance as yf
import logging
import json
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)

# Function to load and preprocess stock data
def load_data(ticker, start_date, end_date):
    try:
        data = yf.download(ticker, start=start_date, end=end_date)
        if 'Close' not in data.columns:
            raise ValueError("No 'Close' price found in the data.")
        
        data = data[['Close']].dropna().asfreq('B')
        data['Close'] = data['Close'].ffill().interpolate(method='time').interpolate(method='spline', order=3)
        # Handle MultiIndex Columns
        if isinstance(data.columns, pd.MultiIndex):
            data.columns = data.columns.get_level_values(0)
        return data
    except Exception as e:
        logger.error(f"Error loading data: {e}")
        return None

# Add time-based features
def add_features(data):
    data = data.copy()
    data['Day'] = data.index.day
    data['Month'] = data.index.month
    data['Year'] = data.index.year
    data['DayOfWeek'] = data.index.dayofweek
    data['SMA_10'] = data['Close'].rolling(window=10, min_periods=1).mean()
    data['SMA_30'] = data['Close'].rolling(window=30, min_periods=1).mean()
    return data

def create_lagged_features(data, n_lags=30, n_forecasts=10):
    """
    Transforms a time series into a supervised learning format, ensuring compatibility
    with time series train-test-validation splitting.

    Args:
        data (pd.Series or np.ndarray): Time series data.
        n_lags (int): Number of past observations to use as features.
        n_forecasts (int): Number of future steps to predict.

    Returns:
        pd.DataFrame: DataFrame containing lagged features and future targets.
    """
    if isinstance(data, np.ndarray):
        data = pd.Series(data)
    elif not isinstance(data, pd.Series):
        raise ValueError("Input data must be a Pandas Series or a NumPy array.")
    
    df = pd.DataFrame()
    
    # Create lagged features
    for i in range(1, n_lags + 1):
        df[f'lag_{i}'] = data.shift(i)
    
    # Create forecast targets
    for i in range(1, n_forecasts + 1):
        df[f'forecast_{i}'] = data.shift(-i)
    
    # Drop rows with NaN values
    df.dropna(inplace=True)
    
    return df

def split_data_multi(data: pd.DataFrame, train_size: float = 0.8, test_size: float = 0.1):
    """
    Splits time-series data into three sets: train, test, and validation for multi-step forecasting.
    
    Args:
        data (pd.DataFrame): DataFrame containing lagged features and future targets.
        train_size (float or int): Fraction or absolute number of samples for training.
        test_size (float or int): Fraction or absolute number of samples for testing.
        forecast_horizon (int): Number of future steps for final validation (out-of-sample).

    Returns:
        tuple: (train, test, validation) where:
            - train (pd.DataFrame): Training set with historical data.
            - test (pd.DataFrame): In-sample test set (lagged).
            - validation (pd.DataFrame): Final validation set (not lagged).
    """

    n_samples = len(data)

    # Convert train_size and test_size to absolute numbers
    if isinstance(train_size, float):
        train_size = int(n_samples * train_size)
    if isinstance(test_size, float):
        test_size = int(n_samples * test_size)

    # Perform the split
    train = data.iloc[:train_size]  # First portion for training
    test = data.iloc[train_size:train_size + test_size]  # Middle portion for in-sample testing
    validation = data.iloc[train_size + test_size:n_samples]  # Future (non-lagged)

    logger.info(f"Data split: Train ({len(train)} samples), Test ({len(test)} samples), Validation ({len(validation)} samples).")

    return train, test, validation

def train_model_multi(model, train, test, validation):
    """
    Trains a time-series forecasting model using lagged features for training and testing,
    while evaluating performance on a separate validation set (lagged as well).

    Args:
        model (sklearn model): The base forecasting model to be trained.
        train (pd.DataFrame): Lagged training dataset.
        test (pd.DataFrame): Lagged test dataset for in-sample evaluation.
        validation (pd.DataFrame): Lagged dataset for final validation.

    Returns:
        dict: A dictionary containing:
            - 'trained_models': List of trained models (one per step).
            - 'y_pred_train': Predictions for the training set.
            - 'y_pred_test': Predictions for the test set.
            - 'y_pred_validation': Predictions for the validation set.
            - 'metrics_test': Performance metrics on test data.
            - 'metrics_validation': Performance metrics on validation data.
    """
    X_train, y_train = train.iloc[:, :-1], train.iloc[:, -1]
    X_test, y_test = test.iloc[:, :-1], test.iloc[:, -1]
    X_val, y_val = validation.iloc[:, :-1], validation.iloc[:, -1]
    
    model.fit(X_train, y_train)
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)
    y_pred_val = model.predict(X_val)
    
    metrics_test = {
        'MAE': mean_absolute_error(y_test, y_pred_test),
        'MSE': mean_squared_error(y_test, y_pred_test),
        'R2' : r2_score(y_test, y_pred_test)
    }
    
    metrics_validation = {
        'MAE': mean_absolute_error(y_val, y_pred_val),
        'MSE': mean_squared_error(y_val, y_pred_val),
        'R2' : r2_score(y_val, y_pred_val)
    }
    
    return {
        'trained_model': model,
        'y_pred_train': y_pred_train,
        'y_pred_test': y_pred_test,
        'y_pred_validation': y_pred_val,
        'metrics_test': metrics_test,
        'metrics_validation': metrics_validation
    }

def forecast_future_values(model, data, n_forecasts=10):
    """
    Generates future forecasted values using a trained model.

    Args:
        model: Trained forecasting model.
        data (pd.DataFrame): Recent data to base forecasts on.
        n_forecasts (int): Number of future steps to predict.

    Returns:
        pd.DataFrame: DataFrame containing forecasted values.
    """
    last_known = data.iloc[-1, :-1].values.reshape(1, -1)
    forecasts = []
    
    for _ in range(n_forecasts):
        prediction = model.predict(last_known)[0]
        forecasts.append(prediction)
        last_known = np.roll(last_known, -1)
        last_known[0, -1] = prediction  # Update with the new prediction
    
    forecast_dates = pd.date_range(start=data.index[-1], periods=n_forecasts + 1, freq='B')[1:]
    forecast_df = pd.DataFrame({'Date': forecast_dates, 'Forecast': forecasts})
    return forecast_df

def multi_plot_forecasts(actual, model_results):
    """
    Plots actual vs predicted values for train, test, and validation sets using Plotly.
    Includes a 5% variance analysis for better visualization.

    Args:
        actual (pd.DataFrame): The dataset containing actual values.
        model_results (dict): The dictionary containing trained models and predictions.
    """
    # Extract actual and predicted values
    y_train_pred = model_results.get('y_pred_train')
    y_test_pred = model_results.get('y_pred_test')
    y_val_pred = model_results.get('y_pred_validation')
    
    if y_train_pred is None or y_test_pred is None or y_val_pred is None:
        raise ValueError("Missing required keys in model_results dictionary.")
    
    # Ensure predictions are 1D by flattening if necessary
    y_train_pred = np.array(y_train_pred).flatten()
    y_test_pred = np.array(y_test_pred).flatten()
    y_val_pred = np.array(y_val_pred).flatten()
    
    # Define lengths for each segment
    train_length = len(y_train_pred)
    test_length = len(y_test_pred)
    val_length = len(y_val_pred)
    
    # Split actual values to match the predictions
    y_train_actual = np.squeeze(actual.iloc[:train_length, -1])
    y_test_actual = np.squeeze(actual.iloc[train_length:train_length + test_length, -1])
    y_val_actual = np.squeeze(actual.iloc[train_length + test_length:train_length + test_length + val_length, -1])
    
    # Create time indices for plotting
    train_index = actual.index[:train_length]
    test_index = actual.index[train_length:train_length + test_length]
    val_index = actual.index[train_length + test_length:train_length + test_length + val_length]
    
    # Create Plotly figure
    fig = go.Figure()
    
    # Add actual vs predicted traces for train, test, and validation
    fig.add_trace(go.Scatter(
        x=train_index, y=y_train_actual, mode='lines+markers', name="Actual Train",
        line=dict(color='black', width=2), marker=dict(size=6)
    ))
    fig.add_trace(go.Scatter(
        x=train_index, y=y_train_pred, mode='lines', name="Predicted Train",
        line=dict(color='blue', width=2, dash='dash')
    ))
    
    fig.add_trace(go.Scatter(
        x=test_index, y=y_test_actual, mode='lines+markers', name="Actual Test",
        line=dict(color='red', width=2), marker=dict(size=6)
    ))
    fig.add_trace(go.Scatter(
        x=test_index, y=y_test_pred, mode='lines', name="Predicted Test",
        line=dict(color='orange', width=2, dash='dash')
    ))
    
    fig.add_trace(go.Scatter(
        x=val_index, y=y_val_actual, mode='lines+markers', name="Actual Validation",
        line=dict(color='green', width=2), marker=dict(size=6)
    ))
    fig.add_trace(go.Scatter(
        x=val_index, y=y_val_pred, mode='lines', name="Predicted Validation",
        line=dict(color='purple', width=2, dash='dash')
    ))
    
    # Add 5% variance bands
    for idx, (actual_vals, pred_vals) in enumerate(zip(
        [y_train_actual, y_test_actual, y_val_actual],
        [y_train_pred, y_test_pred, y_val_pred]
    )):
        upper_bound = pred_vals * 1.05  # 5% upper bound
        lower_bound = pred_vals * 0.95  # 5% lower bound
        x_vals = [train_index, test_index, val_index][idx]
        
        fig.add_trace(go.Scatter(
            x=x_vals, y=upper_bound, mode='lines', line=dict(width=0),
            showlegend=False, fillcolor='rgba(0,100,80,0.2)', fill='tonexty',
            name="5% Variance"
        ))
        fig.add_trace(go.Scatter(
            x=x_vals, y=lower_bound, mode='lines', line=dict(width=0),
            showlegend=False, fillcolor='rgba(0,100,80,0.2)', fill='tonexty'
        ))
    
    # Update layout for better visualization
    fig.update_layout(
        title="Actual vs Predicted Train, Test & Validation with 5% Variance Bands",
        xaxis_title="Time",
        yaxis_title="Value",
        legend=dict(x=0.02, y=0.98),
        template="plotly_white",
        hovermode="x unified"
    )
    
    # Add grid and improve axis formatting
    fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
    
    # Show the plot
    fig.show()

def plot_forecast_vs_actual(actual, forecast_df):
    """
    Plots the actual vs predicted values for the last month using Plotly.

    Args:
        actual_data (pd.DataFrame): The full actual time series data.
        forecast_df (pd.DataFrame): The forecasted values.
    """
    last_month_actual = actual.loc[actual.index >= (actual.index[-1] - pd.DateOffset(days=30))]
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=last_month_actual.index, y=last_month_actual['Close'],
                             mode='lines+markers', name='Actual', line=dict(color='black')))
    fig.add_trace(go.Scatter(x=forecast_df['Date'], y=forecast_df['Forecast'],
                             mode='lines+markers', name='Predicted', line=dict(color='red', dash='dash')))
    
    fig.update_layout(title='Actual vs Predicted Stock Prices (Last Month)',
                      xaxis_title='Date', yaxis_title='Stock Price',
                      template='plotly_white', hovermode='x unified')
    
    fig.show()


FinIns_close = add_features(load_data('OXY', '2022-01-01', '2025-01-30')) #240 BD in a year 40 is 20%
#print(FinIns_close.tail())
lagged_df = create_lagged_features(FinIns_close['Close'])
#print(lagged_df.tail())
train, test, validation = split_data_multi(lagged_df)
#print(validation.tail())
# Train and evaluate the model
model = SVR(C=0.1, kernel='linear')  # User-specified hyperparameters
results = train_model_multi(model, train, test, validation)
forecast_df = forecast_future_values(results['trained_model'], lagged_df, n_forecasts=10)

# Print performance metrics
import json
print("Test Set Metrics (Lagged Features):")
print(json.dumps(results["metrics_test"], indent=4))
print("\nValidation Set Metrics (Lagged Features):")
print(json.dumps(results["metrics_validation"], indent=4))
#print("First few predicted values on validation set:\n", results['y_pred_validation'][:-5])

#Plotting
multi_plot_forecasts(lagged_df, results)
plot_forecast_vs_actual(FinIns_close, forecast_df)


[*********************100%***********************]  1 of 1 completed
2025-02-02 23:18:40,709 - INFO - Data split: Train (610 samples), Test (76 samples), Validation (77 samples).


Test Set Metrics (Lagged Features):
{
    "MAE": 0.6911556621754116,
    "MSE": 0.8102924224334751,
    "R2": 0.9424472430944802
}

Validation Set Metrics (Lagged Features):
{
    "MAE": 0.7334658400049481,
    "MSE": 0.8273089670736374,
    "R2": 0.6627595467170545
}



X does not have valid feature names, but SVR was fitted with feature names


X does not have valid feature names, but SVR was fitted with feature names


X does not have valid feature names, but SVR was fitted with feature names


X does not have valid feature names, but SVR was fitted with feature names


X does not have valid feature names, but SVR was fitted with feature names


X does not have valid feature names, but SVR was fitted with feature names


X does not have valid feature names, but SVR was fitted with feature names


X does not have valid feature names, but SVR was fitted with feature names


X does not have valid feature names, but SVR was fitted with feature names


X does not have valid feature names, but SVR was fitted with feature names



In [25]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

def plot_forecasts(actual, model_results=None, forecast_df=None, plot_type="full"):
    """
    Plots actual vs predicted values for train, test, and validation sets or the last month's forecast using Plotly.
    Includes a 5% variance analysis for better visualization when plotting the full forecast.

    Args:
        actual (pd.DataFrame): The dataset containing actual values.
        model_results (dict, optional): The dictionary containing trained models and predictions. Required if plot_type is "full".
        forecast_df (pd.DataFrame, optional): The forecasted values. Required if plot_type is "last_month".
        plot_type (str): The type of plot to generate. Options are "full" (default) or "last_month".
    """
    if plot_type not in ["full", "last_month"]:
        raise ValueError("plot_type must be either 'full' or 'last_month'.")
    
    if plot_type == "full" and model_results is None:
        raise ValueError("model_results must be provided when plot_type is 'full'.")
    
    if plot_type == "last_month" and forecast_df is None:
        raise ValueError("forecast_df must be provided when plot_type is 'last_month'.")
    
    if plot_type == "full":
        # Extract actual and predicted values
        y_train_pred = model_results.get('y_pred_train')
        y_test_pred = model_results.get('y_pred_test')
        y_val_pred = model_results.get('y_pred_validation')
        
        if y_train_pred is None or y_test_pred is None or y_val_pred is None:
            raise ValueError("Missing required keys in model_results dictionary.")
        
        # Ensure predictions are 1D by flattening if necessary
        y_train_pred = np.array(y_train_pred).flatten()
        y_test_pred = np.array(y_test_pred).flatten()
        y_val_pred = np.array(y_val_pred).flatten()
        
        # Define lengths for each segment
        train_length = len(y_train_pred)
        test_length = len(y_test_pred)
        val_length = len(y_val_pred)
        
        # Split actual values to match the predictions
        y_train_actual = np.squeeze(actual.iloc[:train_length, -1])
        y_test_actual = np.squeeze(actual.iloc[train_length:train_length + test_length, -1])
        y_val_actual = np.squeeze(actual.iloc[train_length + test_length:train_length + test_length + val_length, -1])
        
        # Create time indices for plotting
        train_index = actual.index[:train_length]
        test_index = actual.index[train_length:train_length + test_length]
        val_index = actual.index[train_length + test_length:train_length + test_length + val_length]
        
        # Create Plotly figure
        fig = go.Figure()
        
        # Add actual vs predicted traces for train, test, and validation
        fig.add_trace(go.Scatter(
            x=train_index, y=y_train_actual, mode='lines+markers', name="Actual Train",
            line=dict(color='black', width=2), marker=dict(size=6)
        ))
        fig.add_trace(go.Scatter(
            x=train_index, y=y_train_pred, mode='lines', name="Predicted Train",
            line=dict(color='blue', width=2, dash='dash')
        ))
        
        fig.add_trace(go.Scatter(
            x=test_index, y=y_test_actual, mode='lines+markers', name="Actual Test",
            line=dict(color='red', width=2), marker=dict(size=6)
        ))
        fig.add_trace(go.Scatter(
            x=test_index, y=y_test_pred, mode='lines', name="Predicted Test",
            line=dict(color='orange', width=2, dash='dash')
        ))
        
        fig.add_trace(go.Scatter(
            x=val_index, y=y_val_actual, mode='lines+markers', name="Actual Validation",
            line=dict(color='green', width=2), marker=dict(size=6)
        ))
        fig.add_trace(go.Scatter(
            x=val_index, y=y_val_pred, mode='lines', name="Predicted Validation",
            line=dict(color='purple', width=2, dash='dash')
        ))
        
        # Add 5% variance bands
        for idx, (actual_vals, pred_vals) in enumerate(zip(
            [y_train_actual, y_test_actual, y_val_actual],
            [y_train_pred, y_test_pred, y_val_pred]
        )):
            upper_bound = pred_vals * 1.05  # 5% upper bound
            lower_bound = pred_vals * 0.95  # 5% lower bound
            x_vals = [train_index, test_index, val_index][idx]
            
            fig.add_trace(go.Scatter(
                x=x_vals, y=upper_bound, mode='lines', line=dict(width=0),
                showlegend=False, fillcolor='rgba(0,100,80,0.2)', fill='tonexty',
                name="5% Variance"
            ))
            fig.add_trace(go.Scatter(
                x=x_vals, y=lower_bound, mode='lines', line=dict(width=0),
                showlegend=False, fillcolor='rgba(0,100,80,0.2)', fill='tonexty'
            ))
        
        # Update layout for better visualization
        fig.update_layout(
            title="Actual vs Predicted Train, Test & Validation with 5% Variance Bands",
            xaxis_title="Time",
            yaxis_title="Value",
            legend=dict(x=0.02, y=0.98),
            template="plotly_white",
            hovermode="x unified"
        )
        
    elif plot_type == "last_month":
        last_month_actual = actual.loc[actual.index >= (actual.index[-1] - pd.DateOffset(days=30))]
        
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=last_month_actual.index, y=last_month_actual['Close'],
                                 mode='lines+markers', name='Actual', line=dict(color='black')))
        fig.add_trace(go.Scatter(x=forecast_df['Date'], y=forecast_df['Forecast'],
                                 mode='lines+markers', name='Predicted', line=dict(color='red', dash='dash')))
        
        fig.update_layout(title='Actual vs Predicted Stock Prices (Last Month)',
                          xaxis_title='Date', yaxis_title='Stock Price',
                          template='plotly_white', hovermode='x unified')
    
    # Add grid and improve axis formatting
    fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
    
    # Show the plot
    fig.show()

plot_forecasts(lagged_df, model_results=results, plot_type="full")
plot_forecasts(lagged_df, forecast_df=forecast_df, plot_type="last_month")

KeyError: 'Close'