# Point Importance in Tennis

This notebook explores the concept of **point importance** in tennis - a measure of 
how much a single point affects Player 1's probability of winning the match.

**Definition**: Point importance = P(win match | win point) - P(win match | lose point)

Key insights:
- Not all points are equally important
- Match points and break points have high importance
- Points early in games/sets have lower importance
- Understanding point importance helps explain "clutch" performance

## Setup and Imports

In [None]:
from matplotlib  import pyplot as plt
from scipy       import optimize
from typing      import Literal
import numpy as np
import os, random, sys

# Add path to the src directory
PROJECT_ROOT = os.path.abspath('..')
SRC_DIR = os.path.join(PROJECT_ROOT, 'src')
if SRC_DIR not in sys.path:
    sys.path.append(SRC_DIR)

from tennis_lab.core.match_format       import MatchFormat
from tennis_lab.core.match              import Match
from tennis_lab.core.match_score        import MatchScore
from tennis_lab.paths.match_probability import probabilityP1WinsMatch

## Core Functions

### Point Importance Calculation

The `calcPointImportance` function computes how much winning vs losing the next 
point affects Player 1's probability of winning the match.

The `calcPointImportanceAdaptive` function handles the case where serve-win 
probability itself depends on point importance (creating a fixed-point problem 
solved via root-finding).

In [None]:
def calcPointImportance(P1: float, P2: float, playerToServe: Literal[1,2], score: MatchScore):
    """
    Calculates the importance of a point for Player1, defined as the 
    difference in the probability that P1 wins the match conditional 
    on P1 winning vs losing the next point.

    P1            - probability that Player1 wins a point when serving
    P2            - probability that Player2 wins a point when serving
    playerToServe - which player is serving the point whose importance is calculated
    score         - current score
    """
    assert playerToServe in (1,2)

    # calculate the prob that P1 wins the match if P1 wins the next point        
    match1 = Match(playerToServe, score._matchFormat, score)
    match1.recordPoint(pointWinner=1)
    Y1s = probabilityP1WinsMatch(match1.score, match1.servesNext, [P1], P2)       

    # calculate the prob that P1 wins the match if P2 wins the next point                
    match2 = Match(playerToServe, score._matchFormat, score)
    match2.recordPoint(pointWinner=2)
    Y2s = probabilityP1WinsMatch(match2.score, match2.servesNext, [P1], P2)   

    # point importance := difference in the probability that P1
    # wins the match due to P1 winnning vs losing the next point
    return (Y1s-Y2s).item()

def calcPointImportanceAdaptive(P1fction, P2fction, playerToServe: Literal[1,2], score: MatchScore):
    """
    Calculates the importance of a point for Player1, defined as the 
    difference in the probability that P1 wins the match conditional 
    on P1 winnning vs losing the next point.

    The difference from the function above is that here we allow the 
    probability of winning a point when serving to depend on the point 
    importance. This creates a circular dependency between probability
    and point importance.

    P1fction      - function that calculates the probability that Player1 wins a point when serving given the point importance
    P2fction      - function that calculates the probability that Player2 wins a point when serving given the point importance
    playerToServe - which player is serving the point whose importance is calculated
    score         - current score
    """
    
    def calcPointImportanceWrapper(p1, p2):
        return calcPointImportance(p1, p2, playerToServe, score)

    def objective(imp, *args):
        imp1 = imp[0] if hasattr(imp, '__len__') else imp    
        imp2 = calcPointImportanceWrapper(P1fction(imp1), P2fction(imp1))
        imp2 = max(0, min(imp2, 1))
        return imp2 - imp1    

    initGuess = 0.1
    result = optimize.root(objective, initGuess, method='hybr')    
    return result.x.item()

## Point Importance Across All Scores

Here we calculate point importance for every possible score in a match.
This shows the distribution of importance values and identifies which 
game situations carry the most weight.

In [None]:
# ==================
# Match Configuration
# ==================

from tennis_lab.core.set_score  import SetScore
from tennis_lab.core.game_score import GameScore

BESTOF         = 5      # Match format (best of 3 or 5 sets)
P1             = 0.50   # Probability Player 1 wins point when serving
P2             = 0.50   # Probability Player 2 wins point when serving
PLAYER_SERVING = 1      # Which player is serving
matchFormat    = MatchFormat(bestOfSets=BESTOF)

# ===========================
# Generate All Valid Scores
# ===========================

def generateAllValidScores(matchFormat: MatchFormat) -> list[MatchScore]:
    """
    Generate all valid MatchScore instances for the given match format.
    Includes scores at set, game, and point granularity.
    """
    bestOf = matchFormat.bestOfSets
    setsToWin = bestOf // 2 + 1
    allScores = []

    for setsP1 in range(setsToWin + 1):
        for setsP2 in range(setsToWin + 1):
            # Skip invalid: both players can't have won
            if setsP1 == setsToWin and setsP2 == setsToWin:
                continue

            # Match is over
            if setsP1 == setsToWin or setsP2 == setsToWin:
                allScores.append(MatchScore(setsP1, setsP2, matchFormat))
                continue

            isFinalSet = (setsP1 == setsToWin - 1) or (setsP2 == setsToWin - 1)

            # Generate game combinations within a set
            for gamesP1 in range(7):
                for gamesP2 in range(7):
                    if gamesP1 > 6 or gamesP2 > 6:
                        continue
                    if gamesP1 == 6 and gamesP2 < 5:
                        continue  # P1 won set
                    if gamesP2 == 6 and gamesP1 < 5:
                        continue  # P2 won set
                    if gamesP1 == 7 or gamesP2 == 7:
                        continue  # 7-5 or 7-6 means set over

                    # Tiebreak at 6-6
                    if gamesP1 == 6 and gamesP2 == 6:
                        setScore = SetScore(6, 6, isFinalSet, matchFormat)
                        allScores.append(MatchScore(setsP1, setsP2, matchFormat, setScore))
                        continue

                    # Generate point scores within a game
                    for pointsP1 in range(4):
                        for pointsP2 in range(4):
                            if pointsP1 >= 4 or pointsP2 >= 4:
                                continue
                            if pointsP1 >= 3 and pointsP2 >= 3 and abs(pointsP1 - pointsP2) >= 2:
                                continue  # Game over

                            try:
                                gameScore = GameScore(pointsP1, pointsP2, matchFormat)
                                setScore = SetScore(gamesP1, gamesP2, isFinalSet, matchFormat, gameScore)
                                score = MatchScore(setsP1, setsP2, matchFormat, setScore)
                                allScores.append(score)
                            except ValueError:
                                continue

    return allScores

# =============================
# Calculate Point Importance
# =============================

allScores = generateAllValidScores(matchFormat)
print(f"Generated {len(allScores)} valid scores")

pointImportance = {}
for score in allScores:
    if score.isFinal:
        continue
    try:
        pointImp = calcPointImportance(P1, P2, PLAYER_SERVING, score)
        pointImportance[score] = pointImp
    except Exception:
        continue

print(f"Calculated importance for {len(pointImportance)} scores")

### Distribution of Point Importance

Most points have relatively low importance (~0.02-0.05), but a small number 
of critical points can have importance values exceeding 0.15.

In [None]:
importanceValues = list(pointImportance.values())

fig, ax = plt.subplots(figsize=(10, 5))
ax.hist(importanceValues, bins=50, edgecolor='black', alpha=0.7, color='steelblue')
ax.set_xlabel("Point Importance", fontsize=10)
ax.set_ylabel("Frequency", fontsize=10)
ax.set_title(f"Distribution of Point Importance Across All Possible Scores (Best of {BESTOF} sets)", fontsize=12)
ax.grid(alpha=0.3, linewidth=0.5)
plt.tight_layout()
plt.show()

print(f"Min importance:  {min(importanceValues):.4f}")
print(f"Max importance:  {max(importanceValues):.4f}")
print(f"Mean importance: {np.mean(importanceValues):.4f}")

### Top 100 Most Important Points

Ranking all possible scores by point importance shows that a relatively small 
number of game situations carry disproportionate weight.

In [None]:
# Sort by importance (descending)
sortedImportance = sorted(pointImportance.items(), key=lambda x: x[1], reverse=True)
importanceValues = [v[1] for v in sortedImportance][:100]

fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(importanceValues, marker='o', linestyle='None', markersize=3, color='darkred')
ax.set_xlabel("Rank (by importance)", fontsize=10)
ax.set_ylabel("Point Importance", fontsize=10)
ax.set_title(f"Top 100 Most Important Points (Best of {BESTOF} sets)", fontsize=12)
ax.grid(linewidth=0.3, alpha=0.7)
plt.tight_layout()
plt.show()

# Show top 5 most important scores
print("Top 5 most important scores:")
for i, (score, imp) in enumerate(sortedImportance[:5]):
    print(f"  {i+1}. {score} → importance = {imp:.4f}")

## Match Simulation with Point Importance

Now we simulate complete matches and track how point importance evolves 
throughout. This shows how important moments cluster around break points, 
set points, and match points.

The `simulatePointImportance` function allows serve-win probability to 
depend on importance (via `P1func` and `P2func`).

In [None]:
def simulatePointImportance(P1func, P2func, matchFormat: MatchFormat):
    """
    Simulate a match and calculate the importance of each point.
    The probability that Player1 & 2 win a point when serving can
    be conditioned on the point importance.

    Parameters:
    -----------
    P1func      - a function which takes in the importance of the next point and
                  returns the probability that Player1 wins a point when serving
    P2func      - a function which takes in the importance of the next point and
                  returns the probability that Player2 wins a point when serving
    matchFormat - match format    

    Returns:
    --------
    pointImportance - a list storing the importance of each point in the match
    winRecord       - a list of 2-tuples, one entry for each point in the match
                      (whether Player1 won the point, whether Player1 served for the point)
    """
    pointImportance = []
    winRecord       = []

    match = Match(playerServing=1, matchFormat=matchFormat)
    while True:
        server = match.servesNext    

        # calculate importance of next point
        importance = calcPointImportanceAdaptive(P1func, P2func, server, match.score)  
        pointImportance.append(importance)
        
        # play the point
        p = P1func(importance) if server == 1 else P2func(importance)
        serverWonPoint = True if random.random() < p else False
        p1Won = ((server == 1) and serverWonPoint) or \
                ((server == 2) and not serverWonPoint)    
        winner = 1 if p1Won else 2
        match.recordPoint(winner)

        # remember whether Player1 won the point and whether he was serving
        winRecord.append((p1Won, server == 1))
    
        if match.isOver:
            break

    return pointImportance, winRecord

### Single Match Visualization

Simulate one match with constant serve-win probabilities. Points are colored 
by outcome: green = Player 1 won, red = Player 1 lost. Shape indicates 
whether Player 1 was serving (○) or receiving (×).

In [None]:
# ==================
# Match Configuration
# ==================

matchFormat = MatchFormat(bestOfSets=3)
P1 = 0.67  # Probability Player 1 wins point when serving
P2 = 0.67  # Probability Player 2 wins point when serving

# ==============
# Run Simulation
# ==============

pointImportance, winRecord = simulatePointImportance(lambda x: P1, lambda x: P2, matchFormat)
pointImportance = np.array(pointImportance)

# =================
# Visualize Results
# =================

fig, ax = plt.subplots(figsize=(12, 5))

ax.plot(pointImportance, linewidth=0.5, color='gray', alpha=0.7)
ax.set_xlabel("Point Number", fontsize=10)
ax.set_ylabel("Point Importance", fontsize=10)
ax.set_title("Point Importance During a Tennis Match", fontsize=12)
ax.grid(linewidth=0.25, alpha=0.7)

# Highlight high-importance points (above threshold)
THRESHOLD = 0.1
hiImportIdxs   = [i for i, imp in enumerate(pointImportance) if imp > THRESHOLD]
hiImportWSIdxs = [i for i in hiImportIdxs if winRecord[i] == (True,  True)]   # Won serving
hiImportLSIdxs = [i for i in hiImportIdxs if winRecord[i] == (False, True)]   # Lost serving
hiImportWRIdxs = [i for i in hiImportIdxs if winRecord[i] == (True,  False)]  # Won receiving
hiImportLRIdxs = [i for i in hiImportIdxs if winRecord[i] == (False, False)]  # Lost receiving

ax.scatter(hiImportWSIdxs, pointImportance[hiImportWSIdxs], marker='o', color='green', s=20, label="Won serving")
ax.scatter(hiImportWRIdxs, pointImportance[hiImportWRIdxs], marker='x', color='green', s=20, label="Won receiving")
ax.scatter(hiImportLSIdxs, pointImportance[hiImportLSIdxs], marker='o', color='red',   s=20, label="Lost serving")
ax.scatter(hiImportLRIdxs, pointImportance[hiImportLRIdxs], marker='x', color='red',   s=20, label="Lost receiving")

ax.axhline(THRESHOLD, color='orange', linestyle='--', linewidth=0.8, alpha=0.7, label=f"Threshold = {THRESHOLD}")
ax.legend(fontsize=8, loc='upper right')

plt.tight_layout()
plt.show()

print(f"Total points played: {len(pointImportance)}")
print(f"High-importance points (>{THRESHOLD}): {len(hiImportIdxs)}")

## Importance-Dependent Serve Performance

What if a player's serve-win probability depends on point importance?

Here we model a "choking" scenario where Player 1's serve probability 
decreases as point importance increases. Player 2 has constant serve 
probability. We run multiple matches to gather statistics.

In [None]:
# =============
# Configuration
# =============

NUM_MATCHES = 10
matchFormat = MatchFormat(bestOfSets=3)

def P1func(importance):
    """P1's serve probability decreases with importance ('choking' model)."""
    Pmax = 0.67  # Probability on low-importance points
    Pmin = 0.01  # Probability on max-importance points
    P = Pmin + (1 - importance) * (Pmax - Pmin)
    return max(0.0, min(P, 1.0))

def P2func(importance):
    """P2 has constant serve probability regardless of importance."""
    return 0.60

# ==============
# Run Simulation
# ==============

pointImportance = [] 
winRecord = []

for i in range(NUM_MATCHES):
    importance, winrec = simulatePointImportance(P1func, P2func, matchFormat)
    pointImportance.extend(importance)
    winRecord.extend(winrec)
    
pointImportance = np.array(pointImportance)
winRecord = np.array(winRecord)

print(f"Simulated {NUM_MATCHES} matches")
print(f"Total points: {len(pointImportance)}")

### Large-Scale Simulation with Parallel Execution

For more robust statistics, we run 1000+ matches using joblib for 
parallel execution.

In [None]:
from joblib import Parallel, delayed

NUM_MATCHES = 4000
matchFormat = MatchFormat(bestOfSets=3)

def simulate(P1func, P2func, matchFormat):
    return simulatePointImportance(P1func, P2func, matchFormat)

# Run parallel simulations
n_workers = os.cpu_count()
batch_size = NUM_MATCHES // n_workers
remainder = NUM_MATCHES % n_workers
batch_sizes = [batch_size + (1 if i < remainder else 0) for i in range(n_workers)]

def simulate_batch(P1func, P2func, matchFormat, num_matches):
    results = []
    for _ in range(num_matches):
        results.append(simulatePointImportance(P1func, P2func, matchFormat))
    return results

results = Parallel(n_jobs=-1, verbose=1)(
    delayed(simulate_batch)(P1func, P2func, matchFormat, bs) for bs in batch_sizes
)

# Flatten results
all_results = [r for batch in results for r in batch]
pointImportance, winRecord = zip(*all_results)
pointImportance = np.concatenate(pointImportance)
winRecord = np.concatenate(winRecord)

print(f"Completed {NUM_MATCHES} matches")
print(f"Total points: {len(pointImportance)}")

### Logistic Regression Analysis

We can use logistic regression to analyze the relationship between point 
importance and actual win probability when serving. The fitted curve shows 
the empirical win probability, while the orange line shows the modeled 
probability function.

In [None]:
from sklearn.linear_model import LogisticRegression

# Filter to only serving points
idxs = [i for i, rec in enumerate(winRecord) if rec[1]]  # rec[1] = was serving
importance = pointImportance[idxs]
serveRecord = [t[0] for t in winRecord[idxs]]  # t[0] = won the point

# Fit logistic regression
X = np.array(importance).reshape(-1, 1)
y = np.array(serveRecord)
model = LogisticRegression().fit(X, y)

print(f"Logistic Regression Results:")
print(f"  Coefficient: {model.coef_[0][0]:.4f}")
print(f"  Intercept:   {model.intercept_[0]:.4f}")
print(f"  Accuracy:    {model.score(X, y):.4f}")

# Visualize
fig, ax = plt.subplots(figsize=(10, 5))

x_plot = np.linspace(min(importance), max(importance), 100).reshape(-1, 1)
y_pred = model.predict_proba(x_plot)[:, 1]

ax.plot(x_plot, y_pred, 'r-', linewidth=2, label='Fitted (logistic regression)')
ax.plot(x_plot, [P1func(x) for x in x_plot], 'orange', linewidth=2, linestyle='--', label='Modeled P1func')

ax.set_xlabel('Point Importance', fontsize=10)
ax.set_ylabel('Win Probability When Serving', fontsize=10)
ax.set_title('Serve Win Probability vs Point Importance', fontsize=12)
ax.legend(fontsize=9)
ax.grid(alpha=0.3)

plt.tight_layout()
plt.show()