In [None]:

def top_n_categories(y_true, y_pred, features_df, feature_col, top_n=5, error_type="mae"):
    """
    Compute top N categories (best and worst) by average error for a given feature,
    including category frequency.
    
    Parameters
    ----------
    y_true : pandas Series (array-like)
        Ground truth values
    y_pred : pandas Series (array-like)
        Model predictions
    features_df : DataFrame
        DataFrame with features (aligned with y_true/y_pred)
    feature_col : str
        Feature containing the categories groupby
    top_n : int, default=5
        Number of top categories to return
    error_type : str, default="mae"
        Error metric: mae (mean absolute error) or mse (mean squared error)
        
    """
    
    df = pd.DataFrame({
      "y_true": y_true,
      "y_pred": y_pred,
      "error": y_true - y_pred
    })

    df[feature_col] = features_df[feature_col].values
    if error_type == "mae":
      df["err"] = df["error"].abs()


    if error_type == "mse":
      df["err"] = df["error"]**2
      


    grouped = df.groupby(feature_col).aggregate(avg_error=("err", "mean"),frequency=("err", "count"))

    sorted = grouped.sort_values("avg_error")
    best = sorted.head(top_n).reset_index()
    worst = sorted.tail(top_n).reset_index()
    
    print("Top 5 Best Categories:")
    print(best)

    print("\nTop 5 Least Accurate Predictions:")
    print(worst)

In [None]:
def top_n_accuracy(y_true, y_pred):
    """
    Compute top N accurate samples (best and worst) by error.
    
    Parameters
    ----------
    y_true : pandas Series (array-like)
        Ground truth values
    y_pred : pandas Series (array-like)
        Model predictions

    """
    errors = np.abs(y_true - y_pred)

    results = pd.DataFrame({
        "y_true": y_true,
        "y_pred": y_pred,
        "error": errors  })

    sorted_results = results.sort_values(by="error")
    top5_accurate = sorted_results.head(5)
    top5_inaccurate = sorted_results.tail(5)

    print("Top 5 Most Accurate Predictions:")
    print(top5_accurate)

    print("\nTop 5 Least Accurate Predictions:")
    print(top5_inaccurate)