In [None]:
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import pymc as pm
import arviz as az


# load
data = pd.read_csv('./cleaned_data/final_dataset.csv')

# simplify to numerical encoding
data['result_code'] = data['result'].map(
    {'Home Win': 0, 'Draw': 1, 'Away Win': 2})

# standardice numeric features (important for convergence)

scaler_home = StandardScaler()
scaler_away = StandardScaler()
features_home = ['home_avg_market_value',
                 'home_nationalities', 'home_avg_age', 'home_total_minutes']
features_away = ['away_avg_market_value',
                 'away_nationalities', 'away_avg_age', 'away_total_minutes']

X_home = scaler_home.fit_transform(data[features_home])
X_away = scaler_away.fit_transform(data[features_away])

# After standardization explicitly clip to ensure numeric stability
X_home = np.clip(X_home, -3, 3)
X_away = np.clip(X_away, -3, 3)

# outcome
# explicitly ensure correct type and values
y = data['result_code'].values.astype(int)

In [None]:
import numpy as np

print("X_home stats:", np.min(X_home), np.max(X_home), np.mean(X_home))
print("X_away stats:", np.min(X_away), np.max(X_away), np.mean(X_away))

print("Are there NaNs or Infs?")
print("X_home NaNs:", np.isnan(X_home).sum(), "Infs:", np.isinf(X_home).sum())
print("X_away NaNs:", np.isnan(X_away).sum(), "Infs:", np.isinf(X_away).sum())

print("Explicit y data type:", y.dtype)
print("Explicit y unique values:", np.unique(y))
print("Outcome distribution:", np.bincount(y))

X_home stats: -3.0 3.0 0.009212097084674952
X_away stats: -3.0 3.0 0.00915566020318918
Are there NaNs or Infs?
X_home NaNs: 0 Infs: 0
X_away NaNs: 0 Infs: 0
Outcome distribution: [26891 14285 18429]


In [43]:
# define model
with pm.Model() as model:
    # Priors for team feature weights
    weights_home = pm.Normal('weights_home', mu=0,
                             sigma=0.1, shape=X_home.shape[1])
    weights_away = pm.Normal('weights_away', mu=0,
                             sigma=0.1, shape=X_away.shape[1])

    # Latent team strengths
    strength_home = pm.math.dot(X_home, weights_home)
    strength_away = pm.math.dot(X_away, weights_away)

    # reshape for safe stacking
    strength_home_exp = strength_home.dimshuffle(0, 'x')
    strength_away_exp = strength_away.dimshuffle(0, 'x')

    # Bias terms for match results (home, draw, away)
    bias = pm.Normal('bias', mu=0, sigma=0.1, shape=3)

    # Explicitly define logits in numerically stable form
    logits = pm.math.concatenate([
        strength_home_exp + bias[0],
        (strength_home_exp + strength_away_exp) / 2 + bias[1],
        strength_away_exp + bias[2]
    ], axis=1)

    # Safe softmax with clipping
    eps = 1e-8
    outcome_probs = pm.Deterministic(
        'outcome_probs',
        pm.math.clip(pm.math.softmax(logits), eps, 1-eps)
    )

    # Observed categorical outcomes
    outcome_obs = pm.Categorical('outcome_obs', p=outcome_probs, observed=y)

# Explicit stable initial values clearly defined:
safe_init = {
    'weights_home': np.zeros(X_home.shape[1]),
    'weights_away': np.zeros(X_away.shape[1]),
    'bias': np.zeros(3)
}

with model:
    trace = pm.sample(
        500, tune=500,
        target_accept=0.95,
        init='adapt_diag',
        initvals=safe_init,
        cores=1,               # <-- explicitly single-core to simplify debugging
        chains=1,              # <-- explicitly single-chain to simplify initialization
        progressbar=True       # explicitly shows clear progress
    )

az.plot_trace(trace)
print(az.summary(trace))

az.plot_trace(trace)
az.summary(trace)

Initializing NUTS using adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [weights_home, weights_away, bias]


SamplingError: Bad initial energy: SamplerWarning(kind=<WarningType.BAD_ENERGY: 8>, message='Bad initial energy, check any log probabilities that are inf or -inf, nan or very small:\n[-inf]\n.Try model.debug() to identify parametrization problems.', level='critical', step=0, exec_info=None, extra=None, divergence_point_source=None, divergence_point_dest=None, divergence_info=None)

In [None]:
# running inference
# Example new match features (standardized exactly like training data)
new_home = scaler.transform([[3000000, 8, 26, 990]])
new_away = scaler.transform([[2800000, 7, 25, 990]])

with model:
    posterior_predictive = pm.sample_posterior_predictive(
        trace,
        var_names=['weights_home', 'weights_away', 'bias']
    )

# Compute strengths
strength_home_pred = np.dot(new_home, posterior_predictive['weights_home'].T)
strength_away_pred = np.dot(new_away, posterior_predictive['weights_away'].T)

# Compute outcome probabilities clearly
logits = np.vstack([
    strength_home_pred + posterior_predictive['bias'][:, 0],
    (strength_home_pred + strength_away_pred) /
    2 + posterior_predictive['bias'][:, 1],
    strength_away_pred + posterior_predictive['bias'][:, 2]
])

outcome_prob_pred = np.exp(logits) / np.sum(np.exp(logits), axis=0)

# Clear probabilities for the match outcome:
mean_outcome_probs = np.mean(outcome_prob_pred, axis=1)
print("Home win probability:", mean_outcome_probs[0])
print("Draw probability:", mean_outcome_probs[1])
print("Away win probability:", mean_outcome_probs[2])



NameError: name 'trace' is not defined