# End-to-end Example

<a href="https://colab.research.google.com/drive/1GjrDG_iq_9_lxEQK_aBmr-jCCCnFt0v7?usp=sharing" target="_blank">
    <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab">
</a>
<br><br>

In [3]:
import penaltyblog as pb
import pandas as pd

## Download data from football-data.co.uk

In [None]:
df = pd.concat([
    pb.scrapers.FootballData("ENG Premier League", "2021-2022").get_fixtures(),
    pb.scrapers.FootballData("ENG Premier League", "2022-2023").get_fixtures(),
    pb.scrapers.FootballData("ENG Premier League", "2023-2024").get_fixtures(),
])

df.head()

Unnamed: 0_level_0,date,datetime,season,competition,div,time,team_home,team_away,fthg,ftag,...,b365_cahh,b365_caha,pcahh,pcaha,max_cahh,max_caha,avg_cahh,avg_caha,goals_home,goals_away
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1628812800---brentford---arsenal,2021-08-13,2021-08-13 20:00:00,2021-2022,ENG Premier League,E0,20:00,Brentford,Arsenal,2,0,...,1.75,2.05,1.81,2.13,2.05,2.17,1.8,2.09,2,0
1628899200---burnley---brighton,2021-08-14,2021-08-14 15:00:00,2021-2022,ENG Premier League,E0,15:00,Burnley,Brighton,1,2,...,1.79,2.15,1.81,2.14,1.82,2.19,1.79,2.12,1,2
1628899200---chelsea---crystal_palace,2021-08-14,2021-08-14 15:00:00,2021-2022,ENG Premier League,E0,15:00,Chelsea,Crystal Palace,3,0,...,2.05,1.75,2.12,1.81,2.16,1.93,2.06,1.82,3,0
1628899200---everton---southampton,2021-08-14,2021-08-14 15:00:00,2021-2022,ENG Premier League,E0,15:00,Everton,Southampton,3,1,...,2.05,1.88,2.05,1.88,2.08,1.9,2.03,1.86,3,1
1628899200---leicester---wolves,2021-08-14,2021-08-14 15:00:00,2021-2022,ENG Premier League,E0,15:00,Leicester,Wolves,1,0,...,2.02,1.91,2.01,1.92,2.05,1.95,1.99,1.89,1,0


## Create time-decay weights so more recent data have more influence on the model's fit

In [5]:
xi = 0.001
weights = pb.models.dixon_coles_weights(df["date"], xi=xi)

## Build the model's inputs

In [6]:
gh = df["goals_home"].values
ga = df["goals_away"].values
th = df["team_home"].values
ta = df["team_away"].values

## Fit a Dixon & Coles model

- `use_gradient=True` for faster convergence
- `minimizer_options` pass directly to `scipy.optimize.minimize(options=...)`

In [16]:
model = pb.models.DixonColesGoalModel(gh, ga, th, ta, weights=weights)

model.fit(
    use_gradient=True,                # optional; can be False for back-compat
    minimizer_options={               # optional; passes to `scipy.optimize.minimize`
        "maxiter": 3000,              # more iterations if needed
        "gtol": 1e-8,                 # gradient tolerance
        "ftol": 1e-9,                 # function tolerance
        "disp": False,                # silence optimiser output
    }
)

print("Fitted:", model.fitted)
print("Log-likelihood:", model.loglikelihood)
print("AIC:", model.aic)
print("Number of params:", model.n_params)
print("First few params:", list(model.params.items())[:5])

Fitted: True
Log-likelihood: -2164.74110313536
AIC: 4433.48220627072
Number of params: 52
First few params: [('attack_Arsenal', np.float64(1.4615514952216384)), ('attack_Aston Villa', np.float64(1.184047741206666)), ('attack_Bournemouth', np.float64(0.9147685302294655)), ('attack_Brentford', np.float64(1.0526808596367745)), ('attack_Brighton', np.float64(1.090265129154662))]


## Predict a specific fixture and get a `FootballProbabilityGrid` back

In [20]:
home_team = "Man City"
away_team = "Liverpool"

# By default: max_goals=15 and normalize=True
pred = model.predict(home_team, away_team, max_goals=15, normalize=True)

### Core markets (1x2)

In [21]:
print("P(Home win), P(Draw), P(Away win):", pred.home_draw_away)
print("P(Home win):", pred.home_win)
print("P(Draw):", pred.draw)
print("P(Away win):", pred.away_win)

P(Home win), P(Draw), P(Away win): [0.5679744323673176, 0.21833778831586806, 0.21368777931681426]
P(Home win): 0.5679744323673176
P(Draw): 0.21833778831586806
P(Away win): 0.21368777931681426


## Goal Expectancy

In [22]:
print("Home xG:", pred.home_goal_expectation)
print("Away xG:", pred.away_goal_expectation)

Home xG: 1.9358658956305108
Away xG: 1.103726188219536


## BTTS

In [None]:
print("BTTS (Yes):", pred.btts_yes)
print("BTTS (No):", pred.btts_no)

BTTS (Yes): 0.5713026487032481
BTTS (No): 0.4286973512967519


## Totals: Over/Under with push handling

In [24]:
# Integer line (push possible)
u, p, o = pred.totals(2.0)
print("Totals 2.0  -> Under, Push, Over:", (u, p, o))
# Half line (no push)
u, p, o = pred.totals(2.5)
print("Totals 2.5  -> Under, Push, Over:", (u, p, o))
# Back-compat helper (no push returned)
print("P(Over 2.5):", pred.total_goals("over", 2.5))


Totals 2.0  -> Under, Push, Over: (0.1939317196938681, 0.22044686250601725, 0.5856214178001146)
Totals 2.5  -> Under, Push, Over: (0.4143785821998854, 0.0, 0.5856214178001146)
P(Over 2.5): 0.5856214178001146


## Asian handicap (integer / half / quarter lines)

In [25]:
print("AH Home -0.5  (win prob only):", pred.asian_handicap("home", -0.5))
print("AH Home -0.25 (Win/Push/Lose):", pred.asian_handicap_probs("home", -0.25))
print("AH Away +1.0  (Win/Push/Lose):", pred.asian_handicap_probs("away", +1.0))

AH Home -0.5  (win prob only): 0.7863122206831856
AH Home -0.25 (Win/Push/Lose): {'win': 0.6771433265252516, 'push': 0.10916889415793403, 'lose': 0.21368777931681426}
AH Away +1.0  (Win/Push/Lose): {'win': 0.07973956221991603, 'push': 0.13394821709689822, 'lose': 0.7863122206831856}


## Double chance & Draw No Bet

In [26]:
print("Double chance 1X:", pred.double_chance_1x)
print("Double chance X2:", pred.double_chance_x2)
print("Double chance 12:", pred.double_chance_12)
print("DNB Home (conditional win prob):", pred.draw_no_bet_home)
print("DNB Away (conditional win prob):", pred.draw_no_bet_away)

Double chance 1X: 0.7863122206831856
Double chance X2: 0.4320255676326823
Double chance 12: 0.7816622116841319
DNB Home (conditional win prob): 0.7266238841757325
DNB Away (conditional win prob): 0.27337611582426735


## Exact scores & distributions

In [27]:
print("P(Exact score 2-1):", pred.exact_score(2, 1))
print("Home goal distribution (P(H=k)):", pred.home_goal_distribution())
print("Away goal distribution (P(A=k)):", pred.away_goal_distribution())
print("Total goals distribution (P(T=k)):", pred.total_goals_distribution())

P(Exact score 2-1): 0.09897005717496646
Home goal distribution (P(H=k)): [1.44299267e-01 2.79344030e-01 2.70386290e-01 1.74477199e-01
 8.44411149e-02 3.26933349e-02 1.05483187e-02 2.91716148e-03
 7.05904178e-04 1.51837314e-04 2.93936677e-05 5.17292718e-06
 8.34507775e-07 1.24268857e-07 1.71834173e-08]
Away goal distribution (P(A=k)): [3.31633051e-01 3.66032084e-01 2.01999598e-01 7.43174155e-02
 2.05065194e-02 4.52671651e-03 8.32709259e-04 1.31297574e-04
 1.81145713e-05 2.22150297e-06 2.45193101e-07 2.46023679e-08
 2.26285648e-09 1.92121073e-10 1.51463614e-11]
Total goals distribution (P(T=k)): [4.72349671e-02 1.46696753e-01 2.20446863e-01 2.23983794e-01
 1.70204841e-01 1.03470658e-01 5.24180987e-02 2.27613768e-02
 8.64816260e-03 2.92076518e-03 8.87793471e-04 2.45320910e-04
 6.21396246e-05 1.45291624e-05 3.15448050e-06 6.38486656e-07
 1.20534896e-07 2.11560541e-08 3.43503914e-09 5.13555845e-10
 7.04936725e-11 8.87598896e-12 1.02559236e-12 1.08826264e-13
 1.05979703e-14 9.42632896e-16 7.

## Team-centric analytics

In [28]:
print("Home win to nil:", pred.win_to_nil_home())
print("Away win to nil:", pred.win_to_nil_away())
print("Expected points (Home):", pred.expected_points_home())
print("Expected points (Away):", pred.expected_points_away())

Home win to nil: 0.2843980843256169
Away win to nil: 0.09706429990582308
Expected points (Home): 1.922261085417821
Expected points (Away): 0.8594011262663108


## Normalisation & truncation notes

In [32]:
print("Grid sum (should be 1.0 if normalize=True):", round(pred.grid.sum(), 6))

# If you need to audit truncation effects, you can re-run with normalize=False
pred_raw = model.predict(home_team, away_team, max_goals=15, normalize=False)
print("Raw grid sum (<= 1.0):", pred_raw.grid.sum())

Grid sum (should be 1.0 if normalize=True): 1.0
Raw grid sum (<= 1.0): 0.9999999974786222


## Save & load a fitted model

In [34]:
file_path = "/tmp/dixon_coles.pkl"

model.save(file_path)

loaded = pb.models.DixonColesGoalModel.load(file_path)
pred2 = loaded.predict(home_team, away_team)  # same API
print("Loaded model 1X2:", pred2.home_draw_away)

Loaded model 1X2: [0.5679744323673176, 0.21833778831586806, 0.21368777931681426]
