# League Table and Season Prediction

This notebook demonstrates:
1. Computing league tables from match results
2. Different bonus point systems (URC, Premiership, Top14)
3. Predicting final season standings with Monte Carlo simulation
4. Playoff qualification probabilities
5. Position probability distributions

In [None]:
from rugby_ranking.notebook_utils import setup_notebook_environment, load_model_and_trace
from rugby_ranking.model.league_table import LeagueTable, BonusPointRules, format_table
from rugby_ranking.model.season_predictor import SeasonPredictor
from rugby_ranking.model.predictions import MatchPredictor

# Setup: load data and configure plots
dataset, df, model_dir = setup_notebook_environment()

## 1. Load Data

First, load match data from the Rugby-Data repository.

In [None]:
# Path to Rugby-Data repository (adjust as needed)
DATA_DIR = Path("../../Rugby-Data")

# Load dataset
print("Loading data...")
dataset = MatchDataset(DATA_DIR)
dataset.load_json_files()

print(f"Loaded {len(dataset.matches)} matches")
print(f"Date range: {min(m.date for m in dataset.matches).date()} to {max(m.date for m in dataset.matches).date()}")

## 2. League Table Computation

Let's compute a league table for a specific season and competition.

In [None]:
# Select season and competition
SEASON = "2024-2025"
COMPETITION = "celtic"  # or "premiership", "top14", etc.

# Filter matches
season_matches = [
    m for m in dataset.matches 
    if m.season == SEASON and COMPETITION.lower() in m.competition.lower() and m.is_played
]

print(f"Found {len(season_matches)} played matches for {COMPETITION.upper()} {SEASON}")

In [None]:
# Convert to DataFrame format for league table
# Need one row per team per match

def count_tries(scores):
    """Count tries from scoring events."""
    if not scores:
        return 0
    return sum(1 for s in scores if s.get('type', '').lower() in ['try', 't'])

match_rows = []
for match in season_matches:
    # Home team row
    home_tries = count_tries(match.home_scores)
    away_tries = count_tries(match.away_scores)
    
    match_rows.append({
        'team': match.home_team,
        'opponent': match.away_team,
        'score': match.home_score,
        'opponent_score': match.away_score,
        'tries': home_tries,
        'opponent_tries': away_tries,
        'is_home': True,
        'date': match.date,
    })
    
    # Away team row
    match_rows.append({
        'team': match.away_team,
        'opponent': match.home_team,
        'score': match.away_score,
        'opponent_score': match.home_score,
        'tries': away_tries,
        'opponent_tries': home_tries,
        'is_home': False,
        'date': match.date,
    })

matches_df = pd.DataFrame(match_rows)
print(f"Created DataFrame with {len(matches_df)} rows (2 per match)")
matches_df.head()

In [None]:
# Compute league table with URC rules
table = LeagueTable(bonus_rules=BonusPointRules.URC)
standings = table.compute_standings(matches_df, opponent_tries_col='opponent_tries')

print(format_table(standings, max_teams=20))

### Bonus Point Breakdown

Let's analyze how different teams earn bonus points.

In [None]:
# Plot bonus points breakdown
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Try bonus vs losing bonus
top_teams = standings.head(10)
x = np.arange(len(top_teams))
width = 0.35

axes[0].bar(x - width/2, top_teams['try_bonus'], width, label='Try Bonus', alpha=0.8)
axes[0].bar(x + width/2, top_teams['losing_bonus'], width, label='Losing Bonus', alpha=0.8)
axes[0].set_xlabel('Team')
axes[0].set_ylabel('Bonus Points')
axes[0].set_title('Bonus Points Breakdown (Top 10)')
axes[0].set_xticks(x)
axes[0].set_xticklabels(top_teams['team'], rotation=45, ha='right')
axes[0].legend()

# Match points vs bonus points contribution
axes[1].scatter(standings['match_points'], standings['bonus_points'], alpha=0.6, s=100)
axes[1].set_xlabel('Match Points (Wins/Draws)')
axes[1].set_ylabel('Total Bonus Points')
axes[1].set_title('Match Points vs Bonus Points')

# Add team labels for top teams
for idx, row in top_teams.iterrows():
    axes[1].annotate(row['team'], 
                     (row['match_points'], row['bonus_points']),
                     fontsize=8, alpha=0.7)

plt.tight_layout()
plt.show()

### Different Bonus Point Systems

Let's compare how the table would look under different bonus point systems.

In [None]:
# Compare different bonus systems
systems = {
    'URC': BonusPointRules.URC,
    'Premiership': BonusPointRules.PREMIERSHIP,
    'Top14': BonusPointRules.TOP14,
}

comparison = []
for name, rules in systems.items():
    table = LeagueTable(bonus_rules=rules)
    standings = table.compute_standings(matches_df, opponent_tries_col='opponent_tries')
    
    # Get top 8 teams
    for idx, row in standings.head(8).iterrows():
        comparison.append({
            'System': name,
            'Position': row['position'],
            'Team': row['team'],
            'Total Points': row['total_points'],
            'Bonus Points': row['bonus_points'],
        })

comparison_df = pd.DataFrame(comparison)
print("\nTop 8 teams under different bonus systems:")
print(comparison_df.to_string())

## 3. Season Prediction with Monte Carlo

Now let's predict the final season standings using our model.

In [None]:
# Load trained model
CHECKPOINT = "latest"

print(f"Loading model checkpoint: {CHECKPOINT}")
model, trace = load_checkpoint(CHECKPOINT, verbose=True)

In [None]:
# Get remaining fixtures (unplayed matches)
remaining_matches = [
    m for m in dataset.matches 
    if m.season == SEASON and COMPETITION.lower() in m.competition.lower() and not m.is_played
]

print(f"Found {len(remaining_matches)} remaining fixtures")

# Convert to fixtures format (one row per match, not per team)
fixtures_rows = []
for match in remaining_matches:
    fixtures_rows.append({
        'home_team': match.home_team,
        'away_team': match.away_team,
        'date': match.date,
    })

fixtures_df = pd.DataFrame(fixtures_rows)
print("\nNext few fixtures:")
print(fixtures_df.head(10))

In [None]:
# Create predictors
match_predictor = MatchPredictor(model, trace)
season_predictor = SeasonPredictor(
    match_predictor=match_predictor,
    competition=BonusPointRules.URC,  # or PREMIERSHIP, TOP14
    playoff_spots=8,  # URC has 8 playoff spots
)

print("Predictors created successfully")

In [None]:
# Run season prediction
# Note: This may take a few minutes depending on n_simulations
print("Running Monte Carlo simulation...")
print("This may take 2-5 minutes for 1000 simulations...\n")

season_pred = season_predictor.predict_season(
    played_matches=matches_df,
    remaining_fixtures=fixtures_df,
    season=SEASON,
    n_simulations=1000,  # Increase for more accuracy (but slower)
)

print("\nSimulation complete!")

In [None]:
# Display formatted predictions
print(season_predictor.format_predictions(season_pred))

## 4. Visualize Season Predictions

In [None]:
# Position probabilities heatmap
pos_probs = season_pred.position_probabilities.reset_index()  # Convert index to 'index' column
pos_probs = pos_probs.rename(columns={'index': 'team'})  # Rename to 'team'

# Get position columns
pos_cols = [c for c in pos_probs.columns if c.startswith('P(pos')]
n_positions = len(pos_cols)

# Create heatmap data
heatmap_data = pos_probs[pos_cols].values
teams = pos_probs['team'].values

# Plot
fig, ax = plt.subplots(figsize=(14, 10))
im = ax.imshow(heatmap_data, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1)

# Set ticks
ax.set_xticks(np.arange(n_positions))
ax.set_yticks(np.arange(len(teams)))
ax.set_xticklabels([f"{i+1}" for i in range(n_positions)])
ax.set_yticklabels(teams)

# Labels
ax.set_xlabel('Final Position', fontsize=12)
ax.set_ylabel('Team', fontsize=12)
ax.set_title('Position Probability Distribution', fontsize=14, fontweight='bold')

# Add colorbar
cbar = plt.colorbar(im, ax=ax)
cbar.set_label('Probability', rotation=270, labelpad=20)

# Add probability values in cells
for i in range(len(teams)):
    for j in range(n_positions):
        prob = heatmap_data[i, j]
        if prob > 0.05:  # Only show if > 5%
            text = ax.text(j, i, f'{prob:.0%}',
                          ha="center", va="center",
                          color="black" if prob < 0.5 else "white",
                          fontsize=8)

plt.tight_layout()
plt.show()

In [None]:
# Playoff probabilities bar chart
playoff_probs = season_pred.playoff_probabilities.sort_values('playoff_probability', ascending=False)

fig, ax = plt.subplots(figsize=(12, 8))
colors = ['green' if p >= 0.5 else 'orange' if p >= 0.25 else 'red' 
          for p in playoff_probs['playoff_probability']]

bars = ax.barh(playoff_probs['team'], playoff_probs['playoff_probability'], color=colors, alpha=0.7)
ax.set_xlabel('Playoff Qualification Probability', fontsize=12)
ax.set_title(f'Playoff Probabilities (Top {season_predictor.playoff_spots})', fontsize=14, fontweight='bold')
ax.set_xlim(0, 1)

# Add percentage labels
for i, (idx, row) in enumerate(playoff_probs.iterrows()):
    prob = row['playoff_probability']
    ax.text(prob + 0.02, i, f'{prob:.1%}', va='center', fontsize=10)

# Add grid
ax.grid(axis='x', alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Expected points vs current points
current = season_pred.current_standings[['team', 'total_points']].set_index('team')
predicted = season_pred.predicted_standings[['team', 'expected_points']].set_index('team')

comparison = current.join(predicted, how='outer').fillna(0)
comparison['points_to_gain'] = comparison['expected_points'] - comparison['total_points']
comparison = comparison.sort_values('expected_points', ascending=False)

fig, ax = plt.subplots(figsize=(12, 8))

x = np.arange(len(comparison))
width = 0.35

ax.bar(x - width/2, comparison['total_points'], width, label='Current Points', alpha=0.8)
ax.bar(x + width/2, comparison['expected_points'], width, label='Expected Final Points', alpha=0.8)

ax.set_xlabel('Team')
ax.set_ylabel('Points')
ax.set_title('Current vs Expected Final Points', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(comparison.index, rotation=45, ha='right')
ax.legend()
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

## 5. Analyze Specific Teams

In [None]:
# Pick a team to analyze in detail
TEAM_NAME = "Leinster Rugby"  # Change this to analyze different teams

# Get team's position probabilities (pos_probs already has 'team' column from previous cell)
team_probs_df = pos_probs[pos_probs['team'] == TEAM_NAME]

if len(team_probs_df) > 0:
    team_probs = team_probs_df.iloc[0]
    
    # Extract position probabilities
    pos_prob_values = [team_probs[f'P(pos {i+1})'] for i in range(n_positions)]
    
    # Plot
    fig, ax = plt.subplots(figsize=(12, 6))
    positions = list(range(1, n_positions + 1))
    
    ax.bar(positions, pos_prob_values, alpha=0.7, color='steelblue')
    ax.set_xlabel('Final Position', fontsize=12)
    ax.set_ylabel('Probability', fontsize=12)
    ax.set_title(f'{TEAM_NAME} - Final Position Distribution', fontsize=14, fontweight='bold')
    ax.set_xticks(positions)
    ax.set_ylim(0, max(pos_prob_values) * 1.2)
    ax.grid(axis='y', alpha=0.3)
    
    # Add percentage labels on bars
    for i, prob in enumerate(pos_prob_values):
        if prob > 0.01:  # Only label if > 1%
            ax.text(i+1, prob + 0.01, f'{prob:.1%}', 
                   ha='center', va='bottom', fontsize=9)
    
    plt.tight_layout()
    plt.show()
    
    # Print summary
    print(f"\n{TEAM_NAME} Summary:")
    print(f"Most likely position: {int(team_probs['most_likely_position'])}")
    playoff_prob = playoff_probs[playoff_probs['team'] == TEAM_NAME]['playoff_probability'].values
    if len(playoff_prob) > 0:
        print(f"Playoff probability: {playoff_prob[0]:.1%}")
else:
    print(f"Team '{TEAM_NAME}' not found in predictions")

## 6. Remaining Fixtures Impact

Let's look at which remaining fixtures are most important for playoff contention.

In [None]:
# Get teams on the playoff bubble (positions 6-10)
playoff_cutoff = season_predictor.playoff_spots
bubble_teams = playoff_probs[
    (playoff_probs['playoff_probability'] > 0.1) & 
    (playoff_probs['playoff_probability'] < 0.9)
]['team'].tolist()

print(f"Teams on playoff bubble: {', '.join(bubble_teams)}")

# Find fixtures involving bubble teams
bubble_fixtures = fixtures_df[
    fixtures_df['home_team'].isin(bubble_teams) | 
    fixtures_df['away_team'].isin(bubble_teams)
].head(10)

print("\nKey remaining fixtures (next 10):")
for idx, row in bubble_fixtures.iterrows():
    print(f"  {row['home_team']} vs {row['away_team']} ({row['date'].strftime('%Y-%m-%d')})")    

## 7. Export Results

Save the predictions for later use.

In [None]:
# Save predictions to CSV
output_dir = Path("../outputs")
output_dir.mkdir(exist_ok=True)

# Save standings
season_pred.current_standings.to_csv(output_dir / f"{COMPETITION}_{SEASON}_current_standings.csv", index=False)
season_pred.predicted_standings.to_csv(output_dir / f"{COMPETITION}_{SEASON}_predicted_standings.csv", index=False)
season_pred.playoff_probabilities.to_csv(output_dir / f"{COMPETITION}_{SEASON}_playoff_probabilities.csv", index=False)

# Position probabilities has teams as index, so we reset it before saving
pos_probs_to_save = season_pred.position_probabilities.reset_index()
pos_probs_to_save = pos_probs_to_save.rename(columns={'index': 'team'})
pos_probs_to_save.to_csv(output_dir / f"{COMPETITION}_{SEASON}_position_probabilities.csv", index=False)

print(f"Results saved to {output_dir}")

## Summary

This notebook demonstrated:
- Computing league tables with different bonus point systems
- Predicting final season standings using Monte Carlo simulation
- Calculating playoff probabilities and position distributions
- Visualizing uncertainty in season outcomes

The predictions account for:
- Individual match uncertainties
- Team strength variations
- Bonus point impacts
- Schedule strength

These predictions can be updated weekly as new matches are played.