# QUBO Portfolio Optimisation for a Single Football Match

The odds and implied probability of twenty different markets related to a single football match are pulled via API. We have taken an hourly snapshot of the odds for these markets, and the correlation analysis of these for 3 days up to the time of the match was carried out. 

With odds and implied probability, an expectation value of the bet put is then calculated and assigned to the nodes of the graph we are building as w_i.

In [36]:
import numpy as np
import pandas as pd
import itertools
import time
import orbit 

In [37]:
def expected_value(p, odds):
    """
    Expected profit per unit stake:
    EV = p * odds - 1
    """
    return p * odds - 1.0

def portfolio_energy_qubo(x, w, Q):
    """
    QUBO energy:
    E(x) = sum_i w_i x_i + sum_{i<j} Q_ij x_i x_j
    x : 1D numpy array of 0/1 of length N
    """
    linear = np.dot(w, x)
    quadratic = 0.0
    N = len(x)
    for i in range(N):
        for j in range(i+1, N):
            quadratic += Q[i, j] * x[i] * x[j]
    return linear + quadratic

In [47]:
#TO BE REPLACED BY THE API DATA
def generate_betting_timeseries(
    match_time="2025-01-10 20:00",
    n_days_before=3,
    n_markets=21,
    seed=42
):
    """
    Create a time series DataFrame of hourly odds and implied probabilities
    for multiple betting markets in the days leading up to a match.
    
    Parameters
    ----------
    match_time : str or pd.Timestamp
        Kick-off time of the match.
    n_days_before : int
        Number of days before the match to include (hourly intervals).
    n_markets : int
        Number of distinct betting markets.
    seed : int
        Random seed for reproducibility.
    
    Returns
    -------
    df : pd.DataFrame
        Long-format DataFrame with columns:
        ['timestamp', 'market', 'odds', 'implied_prob'].
    """
    np.random.seed(seed)
    
    # Time index: hourly from (match_time - n_days_before) to match_time
    match_time = pd.to_datetime(match_time)
    start_time = match_time - pd.Timedelta(days=n_days_before)
    time_index = pd.date_range(start=start_time, end=match_time, freq="H")
    
    # Create some market names (you can replace with real ones)
    markets = [f"Market_{i+1:02d}" for i in range(n_markets)]
    
    # DataFrame to hold odds paths (wide format first)
    odds_wide = pd.DataFrame(index=time_index, columns=markets, dtype=float)
    
    for m in markets:
        # Random initial odds between 1.5 and 6.0
        start_odds = np.random.uniform(1.5, 6.0)
        
        # Random walk on log-odds to keep them positive and fairly smooth
        n_steps = len(time_index)
        noise = np.random.normal(loc=0.0, scale=0.03, size=n_steps)  # step volatility
        log_odds = np.log(start_odds) + np.cumsum(noise)
        series_odds = np.exp(log_odds)
        
        # Clip odds to a sensible range
        series_odds = np.clip(series_odds, 1.01, 20.0)
        
        odds_wide[m] = series_odds
    
    # Long / tidy format
    df = (
        odds_wide
        .reset_index()
        .melt(id_vars="index", var_name="market", value_name="odds")
        .rename(columns={"index": "timestamp"})
    )
    
    # Implied probability from decimal odds: p = 1 / odds
    df["implied_prob"] = 1.0 / df["odds"]
    
    return df


if __name__ == "__main__":
    df = generate_betting_timeseries()
    print(df.head())
    print("\nShape:", df.shape)


            timestamp     market      odds  implied_prob
0 2025-01-07 20:00:00  Market_01  3.080929      0.324577
1 2025-01-07 21:00:00  Market_01  3.110546      0.321487
2 2025-01-07 22:00:00  Market_01  3.136694      0.318807
3 2025-01-07 23:00:00  Market_01  3.233240      0.309287
4 2025-01-08 00:00:00  Market_01  3.177385      0.314724

Shape: (1533, 4)


  time_index = pd.date_range(start=start_time, end=match_time, freq="H")


In [48]:
def compute_market_correlation(df, value_col="implied_prob"):
    """
    Compute the correlation matrix of N betting markets based on
    implied probabilities (or odds).

    Parameters
    ----------
    df : pd.DataFrame
        Must contain columns ['timestamp', 'market', value_col]
    value_col : str
        Column name to compute correlation on ("implied_prob" or "odds")

    Returns
    -------
    corr_matrix : pd.DataFrame
        N×N correlation matrix of markets.
    wide_df : pd.DataFrame
        Time-indexed wide DataFrame used for correlation.
    """

    wide_df = df.pivot_table(
        index="timestamp",
        columns="market",
        values=value_col
    )
    corr_matrix = wide_df.corr()
    return corr_matrix, wide_df

if __name__ == "__main__":
    corr, wide = compute_market_correlation(df)
    print("\n=== Correlation Matrix (20×20) ===")
    print(corr)


=== Correlation Matrix (20×20) ===
market     Market_01  Market_02  Market_03  Market_04  Market_05  Market_06  \
market                                                                        
Market_01   1.000000  -0.020199  -0.082113  -0.123655   0.151383  -0.259624   
Market_02  -0.020199   1.000000  -0.318134  -0.085121  -0.347548   0.410202   
Market_03  -0.082113  -0.318134   1.000000  -0.639656   0.844664   0.314821   
Market_04  -0.123655  -0.085121  -0.639656   1.000000  -0.633921  -0.641922   
Market_05   0.151383  -0.347548   0.844664  -0.633921   1.000000   0.182712   
Market_06  -0.259624   0.410202   0.314821  -0.641922   0.182712   1.000000   
Market_07   0.125228   0.547869  -0.722832   0.179351  -0.636206   0.041396   
Market_08   0.649443   0.197102  -0.331304   0.129960  -0.192612  -0.298146   
Market_09  -0.349277   0.097443  -0.173095   0.091511  -0.237146   0.379116   
Market_10  -0.170570   0.157804  -0.718524   0.455711  -0.650256  -0.234231   
Market_11   0.30

In [49]:
#Taking the lastest odds from the API
final_snapshot = (
    df.sort_values("timestamp")
      .groupby("market")
      .tail(1)
      .set_index("market")
)
final_snapshot

Unnamed: 0_level_0,timestamp,odds,implied_prob
market,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Market_08,2025-01-10 20:00:00,2.773593,0.360543
Market_13,2025-01-10 20:00:00,2.28578,0.437487
Market_03,2025-01-10 20:00:00,3.023226,0.330773
Market_18,2025-01-10 20:00:00,2.017618,0.495634
Market_10,2025-01-10 20:00:00,5.000613,0.199975
Market_05,2025-01-10 20:00:00,4.084994,0.244798
Market_19,2025-01-10 20:00:00,1.715686,0.582857
Market_02,2025-01-10 20:00:00,2.438868,0.410026
Market_11,2025-01-10 20:00:00,1.774221,0.563628
Market_14,2025-01-10 20:00:00,2.257435,0.442981


In [50]:
p_final = final_snapshot["implied_prob"].values
odds_final = final_snapshot["odds"].values

# Expected value per market
EV = p_final * odds_final - 1.0

# Linear QUBO term
w = -EV

In [51]:
wide = df.pivot_table(
    index="timestamp",
    columns="market",
    values="implied_prob"
).sort_index()

rho = wide.corr().values  # 20 × 20 matrix
Q = rho

In [52]:
#Brute-Force Calculations
N = len(final_snapshot)
all_results = []

start_time = time.perf_counter()  # start timer

for bits in itertools.product([0, 1], repeat=N):
    x = np.array(bits)
    E = -1 * portfolio_energy_qubo(x, w, Q)
    all_results.append({
        "x": bits,
        "E": E,
        "num_bets": x.sum()
    })

end_time = time.perf_counter()  # end timer
elapsed_bf = end_time - start_time

results_df = (
    pd.DataFrame(all_results)
      .sort_values("E", ascending=True)
      .reset_index(drop=True)
)

print(results_df.head(32))
print(f"\nChecked {2**N} configurations for N={N}.")
print(f"Time to find ground state (brute force): {elapsed_bf:.4f} seconds")

                                                    x          E  num_bets
0   (0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, ... -20.578048        11
1   (0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, ... -20.338205        12
2   (1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, ... -19.740193        12
3   (1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, ... -19.151073        13
4   (0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, ... -18.968852        10
5   (0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, ... -18.847911        12
6   (0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, ... -18.744355        12
7   (1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, ... -18.623191        11
8   (0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, ... -18.601955        13
9   (0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, ... -18.465020        10
10  (1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, ... -18.220471        13
11  (1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, ... -18.115095        11
12  (0, 0, 1, 0, 1, 1, 0,

In [None]:
#Orbit Calculations
N = len(final_snapshot)
bets = final_snapshot.index.tolist()  

# Standard QUBO -> Ising mapping 
J0 = Q / 4.0
h0 = np.zeros(N)
for i in range(N):
    h0[i] = w[i] / 2.0 + 0.25 * np.sum(Q[i, :])

ising_J = J0
ising_h = h0

start_time = time.perf_counter()
result = orbit.optimize_ising(
    -1*ising_J,
    -1*ising_h,
    n_replicas=3,
    full_sweeps=50_000,
    beta_initial=0.35,
    beta_end=3.5,
    beta_step_interval=1,
)

elapsed_orb = time.perf_counter() - start_time

s_star = np.array(result.min_state)  
x_star = (1 + s_star) // 2                 

E_orbit = 1 * portfolio_energy_qubo(x_star, w, Q)

chosen_bets = [b for b, bit in zip(bets, x_star) if bit == 1]

print("=== ORBIT optimisation result ===")
print(f"Time to (approximately) find ground state: {elapsed_orb:.4f} seconds\n")

print("Spins (s*):", s_star.tolist())
print("Bits  (x*):", x_star.tolist())
print("Number of bets in portfolio:", int(x_star.sum()))
print("Selected bets:", chosen_bets)

print("\nORBIT reported min_cost:", result.min_cost)
print("Objective E = -portfolio_energy_qubo(x*, w, Q):", E_orbit)

# Optional: compare with brute-force ground state if you still have results_df
if 'results_df' in globals():
    print("\n=== Comparison with brute force ===")
    print("Brute-force best E:", results_df.loc[0, 'E'])
    print("Brute-force best x:", results_df.loc[0, 'x'])

[2025-12-05 16:23:56] INFO - orbit.simulator: Simulation starting...


In [None]:
# --- Check ORBIT solution against brute-force results and rank it ---


orbit_bits = tuple(int(b) for b in x_star)
matches = results_df[results_df["x"] == orbit_bits]

if matches.empty:
    print("⚠ ORBIT bitstring not found in brute-force results (this should not happen if N matches).")
else:
    match_idx = matches.index[0]   # 0-based index in results_df
    rank = match_idx + 1           # human-friendly rank (1 = best)

    E_best = results_df.loc[0, "E"]
    x_best = results_df.loc[0, "x"]
    E_orbit_bruteforce = matches.iloc[0]["E"]

    print("\n=== ORBIT vs Brute Force ===")
    print(f"ORBIT bitstring: {orbit_bits}")
    print(f"ORBIT energy from brute-force table: {E_orbit_bruteforce:.6f}")
    print(f"Ground state energy (brute force):   {E_best:.6f}")
    print(f"Ground state bitstring:              {x_best}")

    print(f"\nRank of ORBIT state among all 2^{N} configs: {rank} (1 = ground state)")
    print(f"Energy gap to ground state: {E_orbit_bruteforce - E_best:.6f}")

    # Optional: how many configs are strictly better / equal
    n_better = (results_df["E"] < E_orbit_bruteforce).sum()
    n_equal  = (results_df["E"] == E_orbit_bruteforce).sum()
    print("")
    print(f"Time to the lowest state using orbit: {elapsed_orb:.4f} seconds\n")
    print(f"Time to find the ground state using brute-force  : {elapsed_bf:.4f} seconds\n")