# Simulated Annealing Imputation for Time Series Gap with Bidirectional Weekly Seasonality and Stochastic Noise

This notebook fills a missing gap in time series data from **October 1, 2024** to **January 31, 2025** at **10‑minute** intervals (preserving the original seconds offset). It uses a bidirectional weekly seasonal profile (from 28 days before and after the gap) blended across the gap, adds a stochastic noise component, and formulates a QUBO to softly enforce trend, seasonality, and smoothness. The QUBO is solved via simulated annealing.

## Steps
1. Setup & Imports
2. Load Data & Define Gap
3. Bidirectional Seasonal Profile (mean & std)
4. Build Expected Values (trend + seasonality + noise)
5. QUBO Construction & Annealing
6. Integrate & Save

In [20]:
# 1. Setup & Imports
import pandas as pd
import numpy as np
from datetime import timedelta
import matplotlib.pyplot as plt
import random, math

# Try D-Wave's neal
try:
    from neal import SimulatedAnnealingSampler
    annealer_available = True
except ImportError:
    annealer_available = False
    print('neal not available; using custom SA')

print('Annealer available:', annealer_available)

Annealer available: True


In [21]:
# 2. Load Data & Define Gap
csv_file = 'router_metrics_timeseries_patchtst_expsmooth.csv'
df = pd.read_csv(csv_file, index_col=0, parse_dates=True)
df.sort_index(inplace=True)

# Boundary timestamps
Y_start_date = df.index[df.index < '2024-10-01'].max()
Y_end_date   = df.index[df.index > '2025-01-31'].min()

# Infer sampling interval
pre_idx = df.index[df.index < Y_start_date]
interval = pre_idx[-1] - pre_idx[-2]
print(f'Inferred interval: {interval}')

# Adjusted gap boundaries
adjusted_gap_start = Y_start_date + interval
adjusted_gap_end   = Y_end_date   - interval
missing_dates = pd.date_range(start=adjusted_gap_start, end=adjusted_gap_end, freq=interval)
N = len(missing_dates)
print(f'Gap from {adjusted_gap_start} to {adjusted_gap_end}, {N} points')

Inferred interval: 0 days 00:10:00
Gap from 2024-10-01 00:00:07 to 2025-01-30 23:50:07, 17568 points


In [22]:
# 3. Bidirectional Seasonal Profile (mean & std)
pre_window_start  = Y_start_date - timedelta(days=28)
pre_window_end    = Y_start_date
post_window_start = Y_end_date
post_window_end   = Y_end_date + timedelta(days=28)

pre_seg  = df.loc[pre_window_start:pre_window_end]
post_seg = df.loc[post_window_start:post_window_end]

# Compute linear trend parameters
total_steps = N + 1
trend_info = {}
for col in df.columns:
    y0 = df.loc[Y_start_date, col]
    y1 = df.loc[Y_end_date, col]
    d_step = (y1 - y0) / total_steps
    trend_info[col] = {'y0': y0, 'y1': y1, 'd': d_step}

sec_per_week = 7 * 24 * 3600
res_pre  = pd.DataFrame(index=pre_seg.index,  columns=df.columns)
res_post = pd.DataFrame(index=post_seg.index, columns=df.columns)

for col in df.columns:
    info = trend_info[col]
    steps_pre  = ((pre_seg.index - Y_start_date) / pd.Timedelta(interval)).astype(int)
    lin_pre    = info['y0'] + steps_pre * info['d']
    res_pre[col]  = pre_seg[col] - lin_pre

    steps_post = ((post_seg.index - Y_start_date) / pd.Timedelta(interval)).astype(int)
    lin_post   = info['y0'] + steps_post * info['d']
    res_post[col] = post_seg[col] - lin_post

tow_pre  = ((res_pre.index.view(np.int64)  // 1_000_000_000) % sec_per_week)
tow_post = ((res_post.index.view(np.int64) // 1_000_000_000) % sec_per_week)
res_pre['_tow']  = tow_pre
res_post['_tow'] = tow_post

weekly_profile_pre  = res_pre.groupby('_tow').mean()
weekly_profile_post = res_post.groupby('_tow').mean()
weekly_std_pre      = res_pre.groupby('_tow').std()
weekly_std_post     = res_post.groupby('_tow').std()

print('Computed weekly profiles and stds')

Computed weekly profiles and stds


In [23]:
# 4. Build Expected Values: Trend + Seasonality + Reduced Noise + Non‑negative clamp
np.random.seed(42)
beta = 0.4   # reduce noise amplitude

gap_imputed = pd.DataFrame(index=missing_dates, columns=df.columns)

for col in df.columns:
    info   = trend_info[col]
    y0     = info['y0']
    d_step = info['d']
    exp_vals = []
    for i, ts in enumerate(missing_dates):
        # linear trend
        lin = y0 + (i+1)*d_step

        # seasonal components (bidirectional as before)…
        key = int((ts.value // 1_000_000_000) % sec_per_week)
        # find nearest keys in pre/post profiles…
        val_pre, std_pre = weekly_profile_pre[col].get(key, 0), weekly_std_pre[col].get(key, 0)
        val_post, std_post = weekly_profile_post[col].get(key, 0), weekly_std_post[col].get(key, 0)

        alpha = (i+1)/(N+1)
        seas = (1-alpha)*val_pre + alpha*val_post
        blended_std = (1-alpha)*std_pre + alpha*std_post

        # stochastic noise (scaled)
        noise = beta * np.random.normal(0, blended_std)

        raw = lin + seas + noise
        # clamp negatives to zero
        val = max(0, raw)
        exp_vals.append(val)

    gap_imputed[col] = exp_vals

print("Built gap_imputed with non‑negative, reduced stochastic component (β=", beta, ")")

Built gap_imputed with non‑negative, reduced stochastic component (β= 0.4 )


In [24]:
# 5. QUBO Construction & Simulated Annealing with stronger high‐value penalty
P = 1e6
gamma_high = 1e6   # increased weight for values above max
gamma_low  = 1e4
imputed_columns = {}
Np = len(missing_dates)

def add_qubo_bias(Q, i, j, bias):
    key = (min(i, j), max(i, j))
    Q[key] = Q.get(key, 0) + bias

for col in df.columns:
    print(f"Processing column: {col}")
    Q = {}
    var_idx = {}
    idx_var = {}
    v = 0
    expv = gap_imputed[col].values
    cand = []

    # Discretize each expected continuous value into integer candidates
    for i, e in enumerate(expv):
        opts = {np.floor(e), np.ceil(e)}
        if len(opts) == 1:
            x = next(iter(opts))
            opts |= {x - 1, x + 1}
        opts = sorted(x for x in opts if x >= 0)
        cand.append(opts)
        for x in opts:
            var_idx[(i, x)] = v
            idx_var[v] = (i, x)
            v += 1

    # Historical bounds
    min_val = df[col].min()
    max_val = df[col].max()

    # (a) Trend deviation & high‐/low‐value penalties
    for (i, x), qi in var_idx.items():
        # deviation from expected
        add_qubo_bias(Q, qi, qi, (x - expv[i])**2)
        # stronger penalty if above historical max: quartic penalty
        if x > max_val:
            add_qubo_bias(Q, qi, qi, gamma_high * (x - max_val)**4)
        # penalty if below historical min
        if x < min_val:
            add_qubo_bias(Q, qi, qi, gamma_low * (min_val - x)**2)

    # (b) Boundary conditions
    info = trend_info[col]
    y0, y1, d_step = info['y0'], info['y1'], info['d']
    for x in cand[0]:
        add_qubo_bias(Q, var_idx[(0, x)], var_idx[(0, x)], (x - y0 - d_step)**2)
    for x in cand[-1]:
        add_qubo_bias(Q, var_idx[(Np - 1, x)], var_idx[(Np - 1, x)], (y1 - x - d_step)**2)

    # (c) Smoothness between consecutive points
    for i in range(Np - 1):
        for xi in cand[i]:
            vi = var_idx[(i, xi)]
            for xj in cand[i + 1]:
                vj = var_idx[(i + 1, xj)]
                add_qubo_bias(Q, vi, vj, ((xj - xi) - d_step)**2)

    # (d) One‐hot constraints per time‐slot
    for i, opts in enumerate(cand):
        for a in range(len(opts)):
            va = var_idx[(i, opts[a])]
            add_qubo_bias(Q, va, va, -2 * P)
            for b in range(a + 1, len(opts)):
                vb = var_idx[(i, opts[b])]
                add_qubo_bias(Q, va, vb, 2 * P)

    # Solve the QUBO
    if annealer_available:
        sampler = SimulatedAnnealingSampler()
        sol = sampler.sample_qubo(Q, num_reads=50).first.sample
    else:
        sol = current_solution  # your custom SA fallback

    # Extract the chosen values, fallback‐rounding any unassigned slots
    imputed_vals = [None] * Np
    for vid, bit in sol.items():
        if bit == 1:
            idx, val = idx_var[vid]
            imputed_vals[idx] = val
    for i in range(Np):
        if imputed_vals[i] is None:
            imputed_vals[i] = int(round(expv[i]))

    imputed_columns[col] = imputed_vals
    print(f"Done {col}")

imputed_df = pd.DataFrame(imputed_columns, index=missing_dates)
print("QUBO imputation complete; imputed_df shape:", imputed_df.shape)

Processing column: ifInMulticastPkts
Done ifInMulticastPkts
Processing column: ifInBroadcastPkts
Done ifInBroadcastPkts
Processing column: ifOutMulticastPkts
Done ifOutMulticastPkts
Processing column: ifOutBroadcastPkts
Done ifOutBroadcastPkts
Processing column: ifHCInOctets
Done ifHCInOctets
Processing column: ifHCInUcastPkts
Done ifHCInUcastPkts
Processing column: ifHCInMulticastPkts
Done ifHCInMulticastPkts
Processing column: ifHCInBroadcastPkts
Done ifHCInBroadcastPkts
Processing column: ifHCOutOctets
Done ifHCOutOctets
Processing column: ifHCOutUcastPkts
Done ifHCOutUcastPkts
Processing column: ifHCOutMulticastPkts
Done ifHCOutMulticastPkts
Processing column: ifHCOutBroadcastPkts
Done ifHCOutBroadcastPkts
Processing column: ifHighSpeed
Done ifHighSpeed
QUBO imputation complete; imputed_df shape: (17568, 13)


In [None]:
# 6. Integrate & Save
df.drop(df.loc[adjusted_gap_start:adjusted_gap_end].index, inplace=True)
df_filled = pd.concat([df, imputed_df])
df_filled.sort_index(inplace=True)
df_filled.index.name = "date"

print(df_filled.loc[Y_start_date - interval : Y_end_date + interval])
df_filled.to_csv('router_metrics_Simulated_annealing.csv')
print('Saved filled CSV.')