# 4444

## What's new:

1- Using scipy for labeling 2


In [1]:
import numpy as np
import pandas as pd
np.random.seed(42)   # reproducibility

In [3]:
# ================================================================
# PART 1: Create random dataset (10 rows) and print forecast_class
# ================================================================
predicted_df = pd.DataFrame({
    "DATETIME": pd.date_range("2025-08-13 22:00:00", periods=10, freq="h"),
    "forecast_class": np.random.choice([0, 1, 2], size=10),
    "prob_0": np.random.rand(10),
    "prob_1": np.random.rand(10),
    "prob_2": np.random.rand(10),
})


vals = predicted_df["forecast_class"].tolist()
print("[" + " ".join(str(x) for x in vals) + "]")

# ================================================================
# PART 2: Filtering rules
# ================================================================

def filter_forecast_classes(fc):
    fc = fc.copy()
    n = len(fc)

    # ------------------------------------------
    # Rule A: Keep only last in consecutive runs
    # ------------------------------------------
    i = 0
    while i < n:
        if fc[i] in [1, 2]:
            val = fc[i]
            start = i
            end = i
            while end + 1 < n and fc[end + 1] == val:
                end += 1
            # Keep only the last occurrence
            for k in range(start, end):
                fc[k] = 0
            i = end
        i += 1

    # ---------------------------------------------------------
    # Rule B & C: If val (1 or 2) appears, then zeros, then val
    # ---------------------------------------------------------
    for val in [1, 2]:
        last_pos = None
        for i in range(n):
            if fc[i] == val:
                if last_pos is not None:
                    # Remove the old one (set to 0)
                    fc[last_pos] = 0
                last_pos = i
            elif fc[i] != 0:
                # reset tracking when 0-block is broken by other class
                last_pos = None

    return fc



filtered = filter_forecast_classes(predicted_df["forecast_class"].tolist())
print("[" + " ".join(str(x) for x in filtered) + "]")


[0 1 0 1 2 2 0 2 2 1]
[0 0 0 1 0 0 0 0 2 1]


In [4]:
# -----------------------------
# Part 3: Apply filtering
# -----------------------------
filtered = filter_forecast_classes(predicted_df["forecast_class"].tolist())

# Overwrite forecast_class (replace old with new)
predicted_df["forecast_class"] = filtered

# Print without commas
vals = predicted_df["forecast_class"].tolist()
print("[" + " ".join(str(x) for x in vals) + "]")

# Show full dataframe
predicted_df

[0 0 0 1 0 0 0 0 2 1]


Unnamed: 0,DATETIME,forecast_class,prob_0,prob_1,prob_2
0,2025-08-13 22:00:00,0,0.391061,0.926659,0.965255
1,2025-08-13 23:00:00,0,0.182236,0.727272,0.607034
2,2025-08-14 00:00:00,0,0.755361,0.326541,0.275999
3,2025-08-14 01:00:00,1,0.425156,0.570444,0.296274
4,2025-08-14 02:00:00,0,0.207942,0.520834,0.165267
5,2025-08-14 03:00:00,0,0.5677,0.961172,0.015636
6,2025-08-14 04:00:00,0,0.031313,0.844534,0.423401
7,2025-08-14 05:00:00,0,0.842285,0.74732,0.394882
8,2025-08-14 06:00:00,2,0.449754,0.539692,0.293488
9,2025-08-14 07:00:00,1,0.39515,0.586751,0.01408
