In [None]:
import pandas as pd
import numpy as np

In [None]:
def mape(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def mae(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred))

def rmse(y_true, y_pred):
    return np.sqrt(np.mean((y_true - y_pred) ** 2))


def split_train_test(df, test_size):
    df = df.sort_values('ds').reset_index(drop=True)
    train_size = int(len(df) * (1 - test_size))
    return df.iloc[:train_size], df.iloc[train_size:]

In [None]:


def plot_comprehensive_cv_results(df, forecast_func, n_splits=5, title="Time Series Cross-Validation Results", 
                                   metric='mape', save_to_json='cv_results.json', method_name=None, dataset_name=None):
    """
    Create a comprehensive plot showing the entire time series with all CV splits,
    forecasted vs actual values, and training/test periods clearly marked.
    
    Parameters:
    -----------
    df : DataFrame with 'ds' and 'y' columns
    forecast_func : function that takes (train_df, test_df) and returns predictions array
    n_splits : number of CV folds
    title : title for the plot
    metric : str, either 'mape' or 'rmse' (default: 'mape')
    save_to_json : str, path to JSON file to save results (optional)
    method_name : str, name of the forecasting method (auto-inferred from function if not provided)
    dataset_name : str, name of the dataset (auto-inferred if not provided)
    
    Returns:
    --------
    dict with 'scores' (list of metric scores for each fold), 'mean_score', and 'fold_results'
    """
    from sklearn.model_selection import TimeSeriesSplit
    import matplotlib.pyplot as plt
    import matplotlib.patches as patches
    import numpy as np
    import json
    import os
    
    # Validate metric parameter
    if metric not in ['mape', 'rmse']:
        raise ValueError("metric must be either 'mape' or 'rmse'")
    
    # Auto-infer method name from function if not provided
    if save_to_json is not None and method_name is None:
        method_name = getattr(forecast_func, '__name__', 'unknown_method')
        print(f"Auto-inferred method name: '{method_name}'")
    
    # Auto-infer dataset name if not provided
    if save_to_json is not None and dataset_name is None:
        # Try to infer dataset name from common patterns
        dataset_name = df.name
        print(f"Auto-inferred dataset name: '{dataset_name}'")
    
    # Use the same logic as the original time_series_cv function
    df = df.sort_values('ds').reset_index(drop=True)
    tscv = TimeSeriesSplit(n_splits=n_splits, test_size=round(len(df) * 0.1))
    
    scores = []
    fold_results = []
    
    # Get metric name for display
    metric_name = metric.upper()
    metric_unit = '%' if metric == 'mape' else ''
    
    # Run the cross-validation (same as original function)
    for fold, (train_idx, test_idx) in enumerate(tscv.split(df)):
        train_df = df.iloc[train_idx].copy()
        test_df = df.iloc[test_idx].copy()
        
        
        try:
            predictions = forecast_func(train_df, test_df)
            
            # Convert predictions to numpy array and ensure correct length
            predictions = np.array(predictions)
            actual = test_df['y'].values
            
            if len(predictions) != len(actual):
                predictions = predictions[:len(actual)]
            
            
            # Calculate metric score
            if metric == 'mape':
                mask = actual != 0
                if mask.any():
                    # Use numpy arrays for both actual and predictions
                    actual_masked = actual[mask]
                    predictions_masked = predictions[mask]
                    fold_score = np.mean(np.abs((actual_masked - predictions_masked) / actual_masked)) * 100
                else:
                    fold_score = np.inf
            elif metric == 'rmse':
                fold_score = np.sqrt(np.mean((actual - predictions) ** 2))
            
            scores.append(fold_score)
            fold_results.append({
                'fold': fold+1,
                'train_dates': (train_df['ds'].min(), train_df['ds'].max()),
                'test_dates': (test_df['ds'].min(), test_df['ds'].max()),
                'predictions': predictions,
                'actual': actual,
                'test_df': test_df
            })
            
        except Exception as e:
            print(f"  Error in fold {fold+1}: {e}")
            import traceback
            traceback.print_exc()
            scores.append(np.inf)
    
    # Calculate mean score (same as original)
    valid_scores = [s for s in scores if np.isfinite(s)]
    mean_score = np.mean(valid_scores) if valid_scores else np.inf
    std_score = np.std(valid_scores) if valid_scores else 0
    
    # Create the visualization - SINGLE PLOT ONLY
    fig, ax = plt.subplots(1, 1, figsize=(16, 8))
    
    # Sort data by date
    df_sorted = df.sort_values('ds').reset_index(drop=True)
    
    # Plot the entire time series as background (lighter)
    ax.plot(df_sorted['ds'], df_sorted['y'], 
            color='lightgray', alpha=0.5, linewidth=1, label='Complete Time Series')
    
    # Color palette for different folds
    colors = plt.cm.tab10(np.linspace(0, 1, n_splits))
    
    # Plot each fold's results
    for i, fold_data in enumerate(fold_results):
        if 'predictions' not in fold_data:
            continue
            
        fold_num = fold_data['fold']
        test_start, test_end = fold_data['test_dates']
        test_df = fold_data['test_df']
        
        # Plot actual values for this test period (thick black line)
        ax.plot(test_df['ds'], test_df['y'], 
                color='black', linewidth=4, alpha=0.9,
                label='Actual Values (Test Periods)' if i == 0 else "")
        
        # Plot predictions for this test period
        predictions = fold_data['predictions'][:len(test_df)]
        
        
        ax.plot(test_df['ds'], predictions, 
                color=colors[i], linewidth=3, linestyle='--',
                marker='o', markersize=6, alpha=0.9,
                label=f'Fold {fold_num} Forecast')
        
        # Add shaded region for test period
        ax.axvspan(test_start, test_end, alpha=0.15, color=colors[i])
        
        # Add performance annotation
        fold_score = scores[i] if i < len(scores) and np.isfinite(scores[i]) else np.nan
        if not np.isnan(fold_score):
            mid_date = test_start + (test_end - test_start) / 2
            y_range = ax.get_ylim()[1] - ax.get_ylim()[0]
            y_pos = ax.get_ylim()[0] + y_range * 0.05
            score_text = f'{metric_name}: {fold_score:.1f}{metric_unit}'
            ax.text(mid_date, y_pos, score_text, 
                    ha='center', va='bottom', fontsize=10, weight='bold',
                    bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.9, edgecolor=colors[i]))
        
        # Add fold boundary line
        ax.axvline(test_start, color=colors[i], alpha=0.7, linestyle=':', linewidth=2)
    
    # Formatting
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('Value', fontsize=12)
    title_text = f'{title}\nMean {metric_name}: {mean_score:.2f}{metric_unit} ± {std_score:.2f}{metric_unit}'
    ax.set_title(title_text, fontsize=14, pad=20)
    ax.legend(loc='upper left', bbox_to_anchor=(1.02, 1))
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
       
    # Prepare return data
    results = {
        'scores': scores,
        'mean_score': mean_score,
        'std_score': std_score,
        'fold_results': fold_results
    }
    
    # Save to JSON file if requested
    if save_to_json is not None:
        try:
            # Read existing JSON file or create empty dict
            if os.path.exists(save_to_json):
                with open(save_to_json, 'r') as f:
                    existing_data = json.load(f)
            else:
                existing_data = {}
            
            # Convert fold_results to JSON-serializable format
            json_fold_results = []
            for fold_data in fold_results:
                json_fold_data = {
                    'fold': fold_data['fold'],
                    'train_dates': [fold_data['train_dates'][0].isoformat(), fold_data['train_dates'][1].isoformat()],
                    'test_dates': [fold_data['test_dates'][0].isoformat(), fold_data['test_dates'][1].isoformat()],
                    'predictions': fold_data['predictions'].tolist() if hasattr(fold_data['predictions'], 'tolist') else list(fold_data['predictions']),
                    'actual': fold_data['actual'].tolist() if hasattr(fold_data['actual'], 'tolist') else list(fold_data['actual']),
                    'test_df_dates': fold_data['test_df']['ds'].dt.strftime('%Y-%m-%d').tolist(),
                    'test_df_values': fold_data['test_df']['y'].tolist()
                }
                json_fold_results.append(json_fold_data)
            
            # Create the dataset results structure
            dataset_results = {
                'dataset_name': dataset_name,
                'metric': metric,
                'n_splits': n_splits,
                'mean_score': float(mean_score) if np.isfinite(mean_score) else None,
                'std_score': float(std_score) if np.isfinite(std_score) else None,
                'scores': [float(s) if np.isfinite(s) else None for s in scores],
                'fold_results': json_fold_results
            }
            
            # Update the existing data structure: method_name -> list of dataset results
            if method_name not in existing_data:
                existing_data[method_name] = []
            
            # Check if this dataset already exists for this method and update/append
            dataset_updated = False
            for i, existing_dataset in enumerate(existing_data[method_name]):
                if existing_dataset['dataset_name'] == dataset_name:
                    existing_data[method_name][i] = dataset_results  # Update existing
                    dataset_updated = True
                    break
            
            if not dataset_updated:
                existing_data[method_name].append(dataset_results)  # Add new dataset
            
            # Write back to file
            with open(save_to_json, 'w') as f:
                json.dump(existing_data, f, indent=2)
            
            print(f"\nResults saved to {save_to_json} under method '{method_name}' for dataset '{dataset_name}'")
            
        except Exception as e:
            print(f"\nWarning: Failed to save results to JSON file: {e}")
    
    # Return same structure as original function
    return None