# Hurst Exponent Derivation: Complete Mathematical Proof

**Drop in your data ‚Üí See the math ‚Üí Get the result**

---

## ‚öôÔ∏è Configuration - Change These!

In [None]:
# ============================================================
# ‚öôÔ∏è USER CONFIGURABLE PARAMETERS
# ============================================================

# Option 1: Load from PRISM battery data
DATA_SOURCE = 'prism'  # 'prism' or 'csv'
ENTITY_ID = 'B0047'    # Battery: B0045, B0046, B0047, B0048

# Option 2: Load from CSV (uncomment and set path)
# DATA_SOURCE = 'csv'
# CSV_PATH = 'your_data.csv'  # CSV with 'value' column

# Window for detailed calculation demo
WINDOW_START = 19      # Starting index (0-based)
WINDOW_SIZE = 8        # Window size for detailed calc

# ============================================================

## Step 0: Load Data

In [None]:
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(precision=6, suppress=True)

# Set plot style
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (12, 4)
plt.rcParams['font.size'] = 11

# Load data based on source
if DATA_SOURCE == 'prism':
    import polars as pl
    obs = pl.read_parquet('../../data/battery_45_48/observations.parquet')
    battery = obs.filter(
        (pl.col('entity_id') == ENTITY_ID) & 
        (pl.col('signal_id') == 'capacity')
    ).sort('timestamp')
    X = battery['value'].to_numpy()
    data_label = f'Battery {ENTITY_ID} Capacity'
    x_label = 'Cycle'
    y_label = 'Capacity (Ah)'
else:
    import pandas as pd
    df = pd.read_csv(CSV_PATH)
    X = df['value'].values
    data_label = 'Your Data'
    x_label = 'Time'
    y_label = 'Value'

n = len(X)
print(f"Loaded {n} observations")
print(f"  Range: [{X.min():.4f}, {X.max():.4f}]")
print(f"  Mean:  {X.mean():.4f}")
print(f"  Std:   {X.std():.4f}")

## üìä Data Overview

In [None]:
fig, ax = plt.subplots(figsize=(14, 5))

# Plot full series
ax.plot(range(n), X, 'b-', linewidth=1.5, label=data_label)

# Highlight analysis window
ax.axvspan(WINDOW_START, WINDOW_START + WINDOW_SIZE, 
           alpha=0.3, color='orange', label=f'Analysis Window (cycles {WINDOW_START+1}-{WINDOW_START+WINDOW_SIZE})')

# Mark window data points
window_x = range(WINDOW_START, WINDOW_START + WINDOW_SIZE)
window_y = X[WINDOW_START:WINDOW_START + WINDOW_SIZE]
ax.scatter(window_x, window_y, color='red', s=50, zorder=5, label='Window Data Points')

ax.set_xlabel(x_label, fontsize=12)
ax.set_ylabel(y_label, fontsize=12)
ax.set_title(f'{data_label} - Full Series with Analysis Window', fontsize=14, fontweight='bold')
ax.legend(loc='best')
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nüìç Analysis Window: indices {WINDOW_START} to {WINDOW_START + WINDOW_SIZE - 1} (cycles {WINDOW_START+1} to {WINDOW_START+WINDOW_SIZE})")

---

## Step 1: Extract Window Data

We extract the specific window for our detailed R/S calculation.

In [None]:
s = WINDOW_SIZE
start_idx = WINDOW_START
end_idx = start_idx + s
window = X[start_idx:end_idx]

print("="*60)
print("GIVEN DATA FOR THIS WINDOW")
print("="*60)
print(f"\nWindow: cycles {start_idx+1} to {end_idx} (size s = {s})")
print(f"\n  x = [{', '.join([f'{v:.6f}' for v in window])}]")
print(f"\n  {'i':<4} {'Cycle':<8} {'x·µ¢':<14}")
print(f"  {'-'*4} {'-'*8} {'-'*14}")
for i, v in enumerate(window):
    print(f"  {i:<4} {start_idx+i+1:<8} {v:<14.6f}")

---

## Step 2: Compute the Mean

$$\bar{x} = \frac{1}{s} \sum_{i=0}^{s-1} x_i$$

In [None]:
print("Step 2: Compute the Mean")
print("="*60)

total = np.sum(window)
x_bar = total / s

print(f"\n  xÃÑ = (1/s) √ó Œ£x·µ¢")
print(f"\n  xÃÑ = (1/{s}) √ó ({' + '.join([f'{v:.4f}' for v in window])})")
print(f"\n  xÃÑ = (1/{s}) √ó {total:.6f}")
print(f"\n  ‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê")
print(f"  ‚îÇ  xÃÑ = {x_bar:.6f}             ‚îÇ")
print(f"  ‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò")

---

## Step 3: Compute Deviations from Mean

$$y_i = x_i - \bar{x}$$

In [None]:
print("Step 3: Compute Deviations from Mean")
print("="*60)

y = window - x_bar

print(f"\n  y·µ¢ = x·µ¢ - xÃÑ  (where xÃÑ = {x_bar:.6f})")
print(f"\n  {'i':<4} {'x·µ¢':<12} {'-':<3} {'xÃÑ':<12} {'=':<3} {'y·µ¢':<12}")
print(f"  {'-'*4} {'-'*12} {'-'*3} {'-'*12} {'-'*3} {'-'*12}")
for i in range(s):
    print(f"  {i:<4} {window[i]:<12.6f} {'-':<3} {x_bar:<12.6f} {'=':<3} {y[i]:+12.6f}")

print(f"\n  y = [{', '.join([f'{v:+.6f}' for v in y])}]")
print(f"\n  ‚úì Verification: Œ£y·µ¢ = {np.sum(y):.2e} ‚âà 0")

---

## Step 4: Compute Cumulative Deviation Series

$$Z_k = \sum_{i=0}^{k} y_i$$

In [None]:
print("Step 4: Compute Cumulative Deviation Series")
print("="*60)

Z = np.cumsum(y)

print(f"\n  Z‚Çñ = Œ£·µ¢‚Çå‚ÇÄ·µè y·µ¢ (running sum)")
print(f"\n  {'k':<4} {'Calculation':<40} {'Z‚Çñ':<12}")
print(f"  {'-'*4} {'-'*40} {'-'*12}")
print(f"  0    y‚ÇÄ = {y[0]:+.6f}{' '*23} {Z[0]:+.6f}")
for k in range(1, s):
    calc = f"Z{k-1} + y{k} = {Z[k-1]:+.6f} + ({y[k]:+.6f})"
    print(f"  {k:<4} {calc:<40} {Z[k]:+.6f}")

print(f"\n  Z = [{', '.join([f'{v:+.6f}' for v in Z])}]")

### üìä Cumulative Deviation Profile

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Left: Deviations y
colors = ['green' if v >= 0 else 'red' for v in y]
ax1.bar(range(s), y, color=colors, alpha=0.7, edgecolor='black')
ax1.axhline(y=0, color='black', linestyle='-', linewidth=1)
ax1.set_xlabel('Index i', fontsize=12)
ax1.set_ylabel('Deviation y·µ¢', fontsize=12)
ax1.set_title('Step 3: Deviations from Mean', fontsize=13, fontweight='bold')
ax1.set_xticks(range(s))
for i, v in enumerate(y):
    ax1.annotate(f'{v:+.4f}', (i, v), textcoords="offset points", 
                 xytext=(0, 5 if v >= 0 else -15), ha='center', fontsize=9)

# Right: Cumulative deviations Z
ax2.plot(range(s), Z, 'b-o', linewidth=2, markersize=8, label='Cumulative Z')
ax2.fill_between(range(s), Z, alpha=0.3)
ax2.axhline(y=0, color='black', linestyle='-', linewidth=1)

# Mark max and min
max_idx, min_idx = np.argmax(Z), np.argmin(Z)
ax2.scatter([max_idx], [Z[max_idx]], color='green', s=150, zorder=5, label=f'Max: Z{max_idx}={Z[max_idx]:.4f}')
ax2.scatter([min_idx], [Z[min_idx]], color='red', s=150, zorder=5, label=f'Min: Z{min_idx}={Z[min_idx]:.4f}')

# Draw range R
ax2.annotate('', xy=(s-0.5, Z[max_idx]), xytext=(s-0.5, Z[min_idx]),
            arrowprops=dict(arrowstyle='<->', color='purple', lw=2))
ax2.text(s-0.3, (Z[max_idx]+Z[min_idx])/2, f'R = {Z[max_idx]-Z[min_idx]:.4f}', 
         fontsize=11, color='purple', fontweight='bold')

ax2.set_xlabel('Index k', fontsize=12)
ax2.set_ylabel('Cumulative Deviation Z‚Çñ', fontsize=12)
ax2.set_title('Step 4: Cumulative Deviation Profile', fontsize=13, fontweight='bold')
ax2.set_xticks(range(s))
ax2.legend(loc='best')

plt.tight_layout()
plt.show()

---

## Step 5: Compute the Range R

$$R = \max(Z) - \min(Z)$$

In [None]:
print("Step 5: Compute the Range R")
print("="*60)

Z_max = np.max(Z)
Z_min = np.min(Z)
R = Z_max - Z_min

print(f"\n  From Z = [{', '.join([f'{v:+.4f}' for v in Z])}]")
print(f"\n  max(Z) = {Z_max:+.6f}  (at k={np.argmax(Z)})")
print(f"  min(Z) = {Z_min:+.6f}  (at k={np.argmin(Z)})")
print(f"\n  R = max(Z) - min(Z)")
print(f"    = {Z_max:+.6f} - ({Z_min:+.6f})")
print(f"\n  ‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê")
print(f"  ‚îÇ  R = {R:.6f}               ‚îÇ")
print(f"  ‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò")

---

## Step 6: Compute Standard Deviation S

$$S = \sqrt{\frac{1}{s-1} \sum_{i=0}^{s-1} y_i^2}$$

In [None]:
print("Step 6: Compute Standard Deviation S")
print("="*60)

y_sq = y ** 2
sum_y_sq = np.sum(y_sq)
variance = sum_y_sq / (s - 1)
S = np.sqrt(variance)

print(f"\n  S = ‚àö[ (1/(s-1)) √ó Œ£y·µ¢¬≤ ]")
print(f"\n  {'i':<4} {'y·µ¢':<14} {'y·µ¢¬≤':<14}")
print(f"  {'-'*4} {'-'*14} {'-'*14}")
for i in range(s):
    print(f"  {i:<4} {y[i]:+.6f}      {y_sq[i]:.8f}")
print(f"  {'-'*4} {'-'*14} {'-'*14}")
print(f"  Œ£                    {sum_y_sq:.8f}")

print(f"\n  Variance = {sum_y_sq:.8f} / {s-1} = {variance:.8f}")
print(f"\n  ‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê")
print(f"  ‚îÇ  S = ‚àö{variance:.8f} = {S:.6f}  ‚îÇ")
print(f"  ‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò")

---

## Step 7: Compute Rescaled Range (R/S)

$$\frac{R}{S} = \frac{\max(Z) - \min(Z)}{S}$$

In [None]:
print("Step 7: Compute Rescaled Range (R/S)")
print("="*60)

RS = R / S

print(f"\n  (R/S) = R / S")
print(f"        = {R:.6f} / {S:.6f}")
print(f"\n  ‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó")
print(f"  ‚ïë  (R/S) for window size {s} = {RS:.6f}              ‚ïë")
print(f"  ‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù")

---

## Step 8: Multi-Scale R/S Analysis

Compute R/S for multiple window sizes to estimate H via regression.

In [None]:
print("Step 8: Compute R/S for Multiple Window Sizes")
print("="*60)

def compute_rs(series, window_size):
    """Compute mean R/S for a window size."""
    n_win = len(series) // window_size
    rs_vals = []
    for i in range(n_win):
        w = series[i*window_size:(i+1)*window_size]
        w_mean = np.mean(w)
        w_dev = w - w_mean
        w_cum = np.cumsum(w_dev)
        w_R = np.max(w_cum) - np.min(w_cum)
        w_S = np.std(w, ddof=1)
        if w_S > 0:
            rs_vals.append(w_R / w_S)
    return np.mean(rs_vals) if rs_vals else 0

# Window sizes
window_sizes = [8, 11, 14, 17, 22]
results = [(ws, compute_rs(X, ws)) for ws in window_sizes]

print(f"\n  {'Window s':<12} {'R/S':<14} {'log(s)':<12} {'log(R/S)':<12}")
print(f"  {'-'*12} {'-'*14} {'-'*12} {'-'*12}")
for ws, rs in results:
    print(f"  {ws:<12} {rs:<14.6f} {np.log(ws):<12.6f} {np.log(rs):<12.6f}")

---

## Step 9: Linear Regression ‚Üí Hurst Exponent

$$\log(R/S) = H \cdot \log(s) + c$$

The slope **H** is the Hurst exponent.

In [None]:
print("Step 9: Linear Regression for Hurst Exponent")
print("="*60)

log_s = np.array([np.log(ws) for ws, _ in results])
log_rs = np.array([np.log(rs) for _, rs in results])

# OLS regression
x_mean = np.mean(log_s)
y_mean = np.mean(log_rs)
numerator = np.sum((log_s - x_mean) * (log_rs - y_mean))
denominator = np.sum((log_s - x_mean) ** 2)
H = numerator / denominator
c = y_mean - H * x_mean

print(f"\n  Regression: log(R/S) = H¬∑log(s) + c")
print(f"\n  H = Œ£(x-xÃÑ)(y-»≥) / Œ£(x-xÃÑ)¬≤")
print(f"    = {numerator:.6f} / {denominator:.6f}")
print(f"\n  ‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó")
print(f"  ‚ïë                                                       ‚ïë")
print(f"  ‚ïë   HURST EXPONENT:  H = {H:.6f}                       ‚ïë")
print(f"  ‚ïë                                                       ‚ïë")
print(f"  ‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù")

### üìä Log-Log Regression Plot

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

# Data points
ax.scatter(log_s, log_rs, s=100, c='blue', zorder=5, label='Data points')

# Regression line
x_line = np.linspace(log_s.min() - 0.1, log_s.max() + 0.1, 100)
y_line = H * x_line + c
ax.plot(x_line, y_line, 'r-', linewidth=2, label=f'Fit: log(R/S) = {H:.4f}¬∑log(s) + {c:.4f}')

# Annotate points
for ws, rs in results:
    ax.annotate(f's={ws}', (np.log(ws), np.log(rs)), 
                textcoords="offset points", xytext=(5, 5), fontsize=10)

ax.set_xlabel('log(s) - Log Window Size', fontsize=12)
ax.set_ylabel('log(R/S) - Log Rescaled Range', fontsize=12)
ax.set_title(f'Hurst Exponent Estimation: H = {H:.4f}', fontsize=14, fontweight='bold')
ax.legend(loc='lower right', fontsize=11)
ax.grid(True, alpha=0.3)

# Add interpretation box
if H > 0.5:
    interp = f'H = {H:.3f} > 0.5\nPERSISTENT (Trending)'
    color = 'green'
elif H < 0.5:
    interp = f'H = {H:.3f} < 0.5\nANTI-PERSISTENT (Mean-reverting)'
    color = 'red'
else:
    interp = f'H = {H:.3f} ‚âà 0.5\nRANDOM WALK'
    color = 'gray'
    
ax.text(0.05, 0.95, interp, transform=ax.transAxes, fontsize=12,
        verticalalignment='top', bbox=dict(boxstyle='round', facecolor=color, alpha=0.2))

plt.tight_layout()
plt.show()

---

## üéØ Final Result

In [None]:
# Interpretation
if H > 0.9:
    behavior, meaning = "STRONGLY PERSISTENT", "Near-deterministic trend"
elif H > 0.5:
    behavior, meaning = "PERSISTENT", "Trending behavior"
elif H < 0.5:
    behavior, meaning = "ANTI-PERSISTENT", "Mean-reverting behavior"
else:
    behavior, meaning = "RANDOM WALK", "No memory"

print("\n" + "="*70)
print("                         FINAL RESULT")
print("="*70)
print(f"""
  INPUT:
    ‚Ä¢ Data: {data_label}
    ‚Ä¢ Observations: n = {n}
    ‚Ä¢ Range: [{X.min():.4f}, {X.max():.4f}]

  METHOD: Rescaled Range (R/S) Analysis

  DETAILED WINDOW (cycles {start_idx+1}-{start_idx+s}):
    ‚Ä¢ Mean: xÃÑ = {x_bar:.6f}
    ‚Ä¢ Range: R = {R:.6f}
    ‚Ä¢ Std: S = {S:.6f}  
    ‚Ä¢ R/S = {RS:.6f}

  MULTI-SCALE REGRESSION:
    ‚Ä¢ Window sizes: {window_sizes}
    ‚Ä¢ Slope H = {numerator:.6f} / {denominator:.6f}

  ‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
  ‚ïë                                                              ‚ïë
  ‚ïë   HURST EXPONENT:  H = {H:.6f}                              ‚ïë
  ‚ïë                                                              ‚ïë
  ‚ïë   Behavior: {behavior:<44} ‚ïë
  ‚ïë   Meaning:  {meaning:<44} ‚ïë
  ‚ïë                                                              ‚ïë
  ‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù
""")

---

## üî¨ Try Different Windows!

Change `WINDOW_START` in the configuration cell to see the calculation at different points:

| Phase | WINDOW_START | Description |
|-------|-------------|-------------|
| Early life | 0 | Fresh, minimal degradation |
| Mid life | 30 | Active degradation |
| Late life | 55 | Near end-of-life |

---

*PRISM Behavioral Geometry Engine - Mathematical Derivation Proof*