In [None]:
import pandas as pd

# Function to compute batsman performance score with smoothing (from your provided code)
def batsman_score(batsman, opposition_bowlers, stats, min_balls=20, boundary_value=0.5, beta=0.5):
    scores = []
    overall_stats = stats[stats['batsman'] == batsman]
    n_o = overall_stats['balls'].sum() if not overall_stats.empty else 0
    r_o = overall_stats['Runs'].sum() if not overall_stats.empty else 0
    b_o = overall_stats['derived_boundaries'].sum() if not overall_stats.empty else 0

    for o in opposition_bowlers:
        pair_stats = stats[(stats['batsman'] == batsman) & (stats['bowler'] == o)]
        if not pair_stats.empty:
            n = pair_stats['balls'].iloc[0]
            r = pair_stats['Runs'].iloc[0]
            b = pair_stats['derived_boundaries'].iloc[0]
            if n >= min_balls:
                e = ((r / n) + (b / n) * boundary_value) * 100
            else:
                n_smooth = n + beta * n_o
                r_smooth = r + beta * r_o
                b_smooth = b + beta * b_o
                e = ((r_smooth / n_smooth) + (b_smooth / n_smooth) * boundary_value) * 100
        else:
            if n_o > 0:
                e = ((r_o / n_o) + (b_o / n_o) * boundary_value) * 100
            else:
                e = 10  # Default score
        scores.append(e)
    return sum(scores) / len(opposition_bowlers) if scores else 0

# Function to compute total batting strength of a team against another team
def total_batting_strength(team_A, team_B, stats):
    batsmen_A = [player['name'] for player in team_A if player['role'] in ['batsman', 'wicket-keeper', 'all-rounder']]
    bowlers_B = [player['name'] for player in team_B if player['role'] in ['bowler', 'all-rounder']]
    
    total = 0
    for batsman in batsmen_A:
        score = batsman_score(batsman, bowlers_B, stats)
        total += score
    return total

# Function to predict win probability using Pythagorean formula
def predict_win_probability(team_A, team_B, stats, k=4.5):
    expected_runs_A = total_batting_strength(team_A, team_B, stats)
    expected_runs_B = total_batting_strength(team_B, team_A, stats)
    
    if expected_runs_A == 0 and expected_runs_B == 0:
        return 0.5  # Tie
    elif expected_runs_A == 0:
        return 0  # Team B wins
    elif expected_runs_B == 0:
        return 1  # Team A wins
    else:
        return (expected_runs_A ** k) / (expected_runs_A ** k + expected_runs_B ** k)

# Example usage
if __name__ == "__main__":
    # Load or compute stats dataframe
    df = pd.read_csv('final.csv')  # Replace with your dataset path
    stats = df.groupby(['bowler', 'batsman']).apply(
        lambda g: pd.Series({
            'Runs': g['Runs'].sum(),
            'is_wicket': g['is_wicket'].sum(),
            'balls': g['ball'].count(),
            'derived_dot_balls': ((g['Runs'] == 0) & (g['is_wicket'] == False)).sum(),
            'derived_boundaries': (g['Runs'] >= 4).sum()
        })
    ).reset_index()
    
    # Define example teams
    team_A = [
        {'name': 'Rohit Sharma', 'role': 'batsman'},
        {'name': 'Virat Kohli', 'role': 'batsman'},
        {'name': 'Hardik Pandya', 'role': 'all-rounder'},
        {'name': 'Jasprit Bumrah', 'role': 'bowler'},
        {'name': 'MS Dhoni', 'role': 'wicket-keeper'}
    ]
    team_B = [
        {'name': 'David Warner', 'role': 'batsman'},
        {'name': 'Steve Smith', 'role': 'batsman'},
        {'name': 'Glenn Maxwell', 'role': 'all-rounder'},
        {'name': 'Pat Cummins', 'role': 'bowler'},
        {'name': 'Josh Hazlewood', 'role': 'bowler'}
    ]
    
    win_prob_A = predict_win_probability(team_A, team_B, stats)
    print(f"Win probability for Team A: {win_prob_A:.2f}")
    print(f"Win probability for Team B: {1 - win_prob_A:.2f}")