In [256]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sb
import json

In [257]:
matches = pd.read_csv(r"C:\Users\prasa\Videos\IPL Project\matches.csv")
deliveries = pd.read_csv(r"C:\Users\prasa\Videos\IPL Project\deliveries\deliveries.csv")


In [258]:
# Convert the Match date from object to date format
matches['date'] = pd.to_datetime(matches.date)

In [259]:
# Filtering the matches .... Matches after mid 2024 season 2024-04-05
matches_2024 = matches[matches['date']>'2024-04-25']
matches_2024.sample(4)


Unnamed: 0,id,season,city,date,match_type,player_of_match,venue,team1,team2,toss_winner,toss_decision,winner,result,result_margin,target_runs,target_overs,super_over,method,umpire1,umpire2
1092,1426310,2024,Ahmedabad,2024-05-22,Eliminator,R Ashwin,"Narendra Modi Stadium, Ahmedabad",Royal Challengers Bengaluru,Rajasthan Royals,Rajasthan Royals,field,Rajasthan Royals,wickets,4.0,173.0,20.0,N,,KN Ananthapadmanabhan,MV Saidharshan Kumar
1068,1426283,2024,Ahmedabad,2024-04-28,League,WG Jacks,"Narendra Modi Stadium, Ahmedabad",Gujarat Titans,Royal Challengers Bengaluru,Royal Challengers Bengaluru,field,Royal Challengers Bengaluru,wickets,9.0,201.0,20.0,N,,Nitin Menon,VK Sharma
1074,1426289,2024,Mumbai,2024-05-03,League,VR Iyer,"Wankhede Stadium, Mumbai",Kolkata Knight Riders,Mumbai Indians,Mumbai Indians,field,Kolkata Knight Riders,runs,24.0,170.0,20.0,N,,J Madanagopal,Tapan Sharma
1067,1426282,2024,Lucknow,2024-04-27,League,SV Samson,Bharat Ratna Shri Atal Bihari Vajpayee Ekana C...,Lucknow Super Giants,Rajasthan Royals,Rajasthan Royals,field,Rajasthan Royals,wickets,7.0,197.0,20.0,N,,KN Ananthapadmanabhan,MA Gough


In [260]:
# Considering the players who played in 2024 IPL season (mid season onwards...)
del_2024 = deliveries.copy()
del_2024 = del_2024.merge(matches_2024, left_on='match_id', right_on = "id")

In [261]:
# Players 
with open('players.json', 'r') as f:
    players_classified = json.load(f)

In [262]:
players_2024 = list(set(del_2024['batter']).union(set(del_2024['bowler'])))
print(len(players_2024))

171


## Considerations
Considering the data till mid IPL 2024 season.... 

`del before` : Match data (including deliveries) till IPL 2024 mid season

`del_recent` : Matches played in after mid IPL 2024... Matches where we will test our code

In [263]:
matches_before = matches[matches['date']<='2024-04-25']
del_before = deliveries.merge(matches_before, left_on='match_id', right_on = "id")
del_before.sample(4)

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,batsman_runs,...,toss_decision,winner,result,result_margin,target_runs,target_overs,super_over,method,umpire1,umpire2
23103,392223,2,Deccan Chargers,Delhi Daredevils,7,4,DB Ravi Teja,R Bhatia,AC Gilchrist,1,...,field,Delhi Daredevils,runs,12.0,174.0,20.0,N,,DJ Harper,SL Shastri
236001,1359516,1,Rajasthan Royals,Mumbai Indians,0,4,YBK Jaiswal,C Green,JC Buttler,6,...,bat,Mumbai Indians,wickets,6.0,213.0,20.0,N,,Vinod Seshan,VK Sharma
241736,1359540,1,Punjab Kings,Rajasthan Royals,16,6,SM Curran,A Zampa,M Shahrukh Khan,1,...,field,Rajasthan Royals,wickets,4.0,188.0,20.0,N,,A Nand Kishore,RJ Tucker
219094,1304092,2,Sunrisers Hyderabad,Chennai Super Kings,0,2,Abhishek Sharma,Mukesh Choudhary,KS Williamson,0,...,field,Chennai Super Kings,runs,13.0,203.0,20.0,N,,AK Chaudhary,KN Ananthapadmanabhan


In [264]:
matches_first_half = matches[(matches['date']>'2024-01-01') &(matches['date']<='2024-04-25')]
del_recent = matches_first_half.merge(deliveries,left_on='id',right_on='match_id',how='inner')

## Handling the duplicate venue names

In [265]:
# Mapping dictionary to standardize duplicate venues
venue_standardization = {
    # Eden Gardens
    'Eden Gardens, Kolkata': 'Eden Gardens',
    
    # Wankhede Stadium
    'Wankhede Stadium, Mumbai': 'Wankhede Stadium',
    
    # M Chinnaswamy Stadium
    'M Chinnaswamy Stadium, Bengaluru': 'M Chinnaswamy Stadium',
    'M.Chinnaswamy Stadium': 'M Chinnaswamy Stadium',
    
    # Arun Jaitley / Feroz Shah Kotla
    'Feroz Shah Kotla': 'Arun Jaitley Stadium',
    'Arun Jaitley Stadium, Delhi': 'Arun Jaitley Stadium',
    
    # MA Chidambaram
    'MA Chidambaram Stadium, Chepauk': 'MA Chidambaram Stadium',
    'MA Chidambaram Stadium, Chepauk, Chennai': 'MA Chidambaram Stadium',
    
    # Rajiv Gandhi
    'Rajiv Gandhi International Stadium, Uppal': 'Rajiv Gandhi International Stadium',
    'Rajiv Gandhi International Stadium, Uppal, Hyderabad': 'Rajiv Gandhi International Stadium',
    
    # DY Patil
    'Dr DY Patil Sports Academy, Mumbai': 'Dr DY Patil Sports Academy',
    
    # Brabourne
    'Brabourne Stadium, Mumbai': 'Brabourne Stadium',
    
    # MCA Stadium
    'Maharashtra Cricket Association Stadium, Pune': 'Maharashtra Cricket Association Stadium',
    
    # Punjab IS Bindra
    'Punjab Cricket Association Stadium, Mohali': 'Punjab Cricket Association IS Bindra Stadium',
    'Punjab Cricket Association IS Bindra Stadium, Mohali, Chandigarh': 'Punjab Cricket Association IS Bindra Stadium',
    'Punjab Cricket Association IS Bindra Stadium, Mohali': 'Punjab Cricket Association IS Bindra Stadium',
    
    # ACA-VDCA
    'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium, Visakhapatnam': 'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium',
    
    # Sheikh Zayed
    'Zayed Cricket Stadium, Abu Dhabi': 'Sheikh Zayed Stadium'
}

# Apply the mapping to standardize venues
del_before['venue'] = del_before['venue'].replace(venue_standardization)

# Optional: Strip extra whitespace (if any)
del_before['venue'] = del_before['venue'].str.strip()

# Check final unique venues and counts
print(del_before['venue'].value_counts())


Wankhede Stadium                                                         27765
Eden Gardens                                                             21171
M Chinnaswamy Stadium                                                    21120
Arun Jaitley Stadium                                                     20509
MA Chidambaram Stadium                                                   19545
Rajiv Gandhi International Stadium                                       17665
Punjab Cricket Association IS Bindra Stadium                             14476
Dubai International Cricket Stadium                                      11229
Sawai Mansingh Stadium                                                   11150
Dr DY Patil Sports Academy                                                8898
Sheikh Zayed Stadium                                                      8799
Maharashtra Cricket Association Stadium                                   8414
Sharjah Cricket Stadium                             

## Function that returns the Player Historic and Recent Stats

In [266]:
def player_performance(player,form,historical_data=del_before):
    bowler = player
    if form =="recent":
        del_before = del_recent
    else :
        del_before = historical_data
    # ================================Bowling Stats==========================================
    # Balls bowled
    player_df = del_before[(del_before['batter']== bowler)|(del_before['bowler']==bowler)|(del_before['fielder']==bowler)]
    matches_played = player_df['match_id'].unique()
    balls_bowled = player_df[player_df['bowler']==bowler].groupby(['match_id'])['is_wicket'].count()
    balls_bowled  = balls_bowled.reindex(matches_played,fill_value=0).values

    # Dot balls
    dot_balls = player_df[(player_df['batsman_runs']==0)&(player_df['bowler']==bowler)].groupby(['match_id'])['total_runs'].count()
    dot_balls = dot_balls.reindex(matches_played,fill_value=0).values


    # Wickts not Runouts
    wickets = player_df[(player_df['bowler']==bowler)&(player_df['dismissal_kind']!='run out')&(player_df['is_wicket']==1)].groupby('match_id')['is_wicket'].sum()
    wickets = wickets.reindex(matches_played,fill_value=0).values

    # LBW/Bowled
    lbw = player_df[(player_df['bowler']==bowler)&((player_df['dismissal_kind']=='lbw')|(player_df['dismissal_kind']=='bowled'))].groupby('match_id')['is_wicket'].sum()
    lbw = lbw.reindex(matches_played,fill_value=0).values

    # Maiden overs
    dot_balls_ = player_df[(player_df['bowler']==bowler)&(player_df['total_runs']==0)].groupby(['match_id','over'])['total_runs'].count().reset_index()
    maiden = dot_balls_[dot_balls_['total_runs']==6].groupby(['match_id'])['over'].count()
    maiden_overs = maiden.reindex(matches_played,fill_value=0).values

    # Runs given 
    runs_given =player_df[player_df['bowler']==bowler].groupby('match_id')['batsman_runs'].sum()
    runs_given = runs_given.reindex(matches_played,fill_value=0).values

    # Economy 
    economy = runs_given*6/balls_bowled


    # 3 Wkts Haul
    wkts3 = player_df[(player_df['bowler']==bowler) & 
              (player_df['is_wicket']==1) & 
              (player_df['dismissal_kind']!='run out')].groupby(['match_id'])['is_wicket'].sum().reset_index()
    wkts3=wkts3[wkts3['is_wicket']==3].groupby('match_id')['is_wicket'].count()
    wkts3 = wkts3.reindex(matches_played,fill_value=0).values

    # 4 Wkts Haul 
    wkts4 = player_df[(player_df['bowler']==bowler) & 
              (player_df['is_wicket']==1) & 
              (player_df['dismissal_kind']!='run out')].groupby(['match_id'])['is_wicket'].sum().reset_index()
    wkts4=wkts4[wkts4['is_wicket']==4].groupby('match_id')['is_wicket'].count()
    wkts4 = wkts4.reindex(matches_played,fill_value=0).values


    # 5 Wkts Haul
    wkts5 = player_df[(player_df['bowler']==bowler) & 
              (player_df['is_wicket']==1) & 
              (player_df['dismissal_kind']!='run out')].groupby(['match_id'])['is_wicket'].sum().reset_index()
    wkts5=wkts5[wkts5['is_wicket']==5].groupby('match_id')['is_wicket'].count()
    wkts5 = wkts5.reindex(matches_played,fill_value=0).values



    # ========================= Batting Stats============================
    # Runs Scored 
    runs_scored = player_df[player_df['batter'] == bowler].groupby('match_id')['batsman_runs'].sum()
    runs_scored = runs_scored.reindex(matches_played,fill_value=0).values

    # Fours 
    fours = player_df[(player_df['batter']==bowler)&(player_df['batsman_runs']==4)].groupby('match_id')['total_runs'].count()
    fours = fours.reindex(matches_played,fill_value=0).values

    # Sixers 
    sixers = player_df[(player_df['batter']==bowler)&(player_df['batsman_runs']==6)].groupby('match_id')['total_runs'].count()
    sixers = sixers.reindex(matches_played,fill_value=0).values

    # Balls Faced 
    balls_faced = player_df[player_df['batter']==bowler].groupby('match_id')['ball'].count()
    balls_faced = balls_faced.reindex(matches_played,fill_value=0).values

    # Strike Rate 
    strike_rate = runs_scored*100/balls_faced

    # Ducks 
    player_runs = player_df[(player_df['batter']==bowler)].groupby(['match_id'])['batsman_runs'].sum()
    ducks = player_runs[player_runs==0].reset_index().groupby('match_id')['batsman_runs'].count()
    ducks = ducks.reindex(matches_played,fill_value=0).values


    # ========================== Fielding Points =====================

    # Catches 
    catch = del_before[(del_before['dismissal_kind']== 'caught') & 
                       (del_before['fielder']==bowler)].groupby('match_id')['is_wicket'].sum()

    catch = catch.reindex(matches_played,fill_value=0).values

    # 3+ Catches 
    catch3 = del_before[(del_before['dismissal_kind']== 'caught') & 
                       (del_before['fielder']==bowler)].groupby('match_id')['is_wicket'].sum()

    catch3 = catch3[catch3>=3]
    catch3= catch3.reindex(matches_played,fill_value=0).values


    # Stumps 
    stumps = del_before[(del_before['dismissal_kind']=='stumped')&(del_before['fielder']==bowler)]
    stumps = stumps.groupby('match_id')['is_wicket'].sum()
    stumps = stumps.reindex(matches_played,fill_value=0).values

    # Runouts 
    runouts = del_before[(del_before['dismissal_kind']=='run out')&(del_before['fielder']==bowler)]
    runouts = runouts.groupby('match_id')['is_wicket'].sum()
    runouts = runouts.reindex(matches_played,fill_value=0).values
    
    # returning the dictinary 
    player_performance = {
    "Name": bowler,
    "Balls_bowled":balls_bowled,
    "Dot_balls_bowled":dot_balls,
    "Wickets_not_runouts":wickets,
    "LBW":lbw,
    "Maiden_overs":maiden_overs,
    "Runs_given": runs_given,
    "Economy":economy,
    "Wkts3":wkts3,
    "Wkts4":wkts4,
    "Wkts5":wkts5,
    "Runs_scored":runs_scored,
    "Fours":fours,
    "Sixers":sixers,
    "Balls_faced":balls_faced,
    "Strike_rate":strike_rate,
    "Ducks":ducks,
    "Catches":catch,
    "Cathes3":catch3,
    "Stumps":stumps,
    "Runouts":runouts
    
    }
    
    return player_performance


In [214]:
# player_performance("K Gowtham",'')

## Stadium vs Player

In [267]:
def player_vs_stadium(player1,stadium):
    # Matches Played in stadium
    matches = del_before[(del_before['venue']==stadium)&
                         ((del_before['batter']==player1)| (del_before['bowler']==player1))]['match_id'].nunique()

    # Runs Scored 
    runs = del_before[(del_before['venue']==stadium)&
                         (del_before['batter']==player1)]['batsman_runs'].sum()

    # Avg Runs per match
    avg = runs/matches

    # Balls Faced
    balls_faced = del_before[(del_before['venue']==stadium)&
                         (del_before['batter']==player1)]['batsman_runs'].count()

    # Strike rate 
    if runs==0:
        strike_rate = 0
    else:
        strike_rate = runs*100/balls_faced

    # Fours 
    fours_scored = del_before[(del_before['venue']==stadium)&
               (del_before['batsman_runs']==4)&
               (del_before['batter']==player1)
              ]['batsman_runs'].count()

    # Sixers
    sixers_scored = del_before[(del_before['venue']==stadium)&
               (del_before['batsman_runs']==6)&
               (del_before['batter']==player1)
              ]['batsman_runs'].count()

    # Fours per match 
    if fours_scored==0:
        fours_per_match = 0
    else:
        fours_per_match = fours_scored/matches

    # Sixes per match 
    if sixers_scored==0:
        sixers_per_match = 0
    else:
        sixers_per_match = sixers_scored/matches

    # Outs
    outs = del_before[(del_before['venue']==stadium)&
               (del_before['batter']==player1)&
               (del_before['is_wicket']==1)
              ]['is_wicket'].count()

    # not outs
    not_outs = matches - outs


    # ================================ As a bowler ===========================================
    # Wickets
    wickets = del_before[(del_before['bowler']==player1)&(del_before['venue']==stadium)&
                         (del_before['is_wicket']==1)]['is_wicket'].count()


    # Avg Wickets
    if wickets ==0:
        avg_wickets =0
    else:
        avg_wickets = wickets/matches


    # runs given 
    runs_given = del_before[(del_before['bowler']==player1)&(del_before['venue']==stadium)]['batsman_runs'].sum()

    # Balls bowled 
    balls_bowled = del_before[(del_before['bowler']==player1)&(del_before['venue']==stadium)]['batsman_runs'].count()


    # Balls bowled per Match
    if balls_bowled ==0:
        balls_bowled_per_match = 0
    else:
        balls_bowled_per_match = balls_bowled/matches

    # Avg economy 
    if runs_given==0:
        economy = 0
    else:
        economy = runs_given*6/balls_bowled
        
        
    # Creating dictionary 
    return_dict = {
        "Name":player1,
        "Matches":matches,
        "Runs_scored":runs,
        "Avg_runs":avg,
        "Balls_faced":balls_faced,
        "Strike_rate":strike_rate,
        "Fours":fours_scored,
        "Sixers":sixers_scored,
        "Fours_per_match":fours_per_match,
        "Sixers_per_match": sixers_per_match,
        "Outs":outs,
        "Not-outs":not_outs,
        "Wickets":wickets,
        "Avg_Wickets":avg_wickets,
        "Balls_bowled":balls_bowled,
        "Runs_given":runs_given,
        "Balls_bowled_per_match":balls_bowled_per_match,
        "Economy":economy
    }
        
    return return_dict   



In [268]:
#player_vs_stadium("JJ Bumrah",'MA Chidambaram Stadium')

## Player vs Player Stats

In [269]:
def playerVsPlayer(player1,player2):
    # Matches 
    matches = del_before[(del_before['batter']==player1)&(del_before['bowler']==player2)]['match_id'].nunique()
    
    # Runs
    runs_scored = del_before[(del_before['batter']==player1)&(del_before['bowler']==player2)]['batsman_runs'].sum()
    
    # Balls 
    balls_faced = del_before[(del_before['batter']==player1)&(del_before['bowler']==player2)]['batsman_runs'].count()
    
    # Strike Rate
    if runs_scored == 0:
        strike_rate = 0
    else:
        strike_rate = runs_scored*100/balls_faced
        
    # Avg 
    if runs_scored==0:
        avg_runs = 0
    else:
        avg_runs = runs_scored/matches

    
    # Fours 
    fours = del_before[(del_before['batter']==player1)&(del_before['bowler']==player2)&
           (del_before['batsman_runs']==4)]['batsman_runs'].count()
    
    # sixers 
    sixers = del_before[(del_before['batter']==player1)&(del_before['bowler']==player2)&
               (del_before['batsman_runs']==6)]['batsman_runs'].count()
    
    # Economy 
    if runs_scored==0:
        economy = 0
    else:
        economy = runs_scored*6/balls_faced
        
    # Outs
    outs = del_before[(del_before['batter']==player1)&(del_before['bowler']==player2)&
           (del_before['is_wicket']==1)]['is_wicket'].count()
        
    player_vs_player = {
        "Batter":player1,
        "Bowler":player2,
        "Matches":matches,
        "Runs_scored":runs_scored,
        "Balls_faced":balls_faced,
        "Strike_rate": strike_rate,
        "Avg_runs":avg_runs,
        "Fours":fours,
        "Sixers":sixers,
        "Economy":economy,
        "Outs":outs
    }
    
    return player_vs_player
    
    


    

## Main Function

In [309]:
def simulate_player_dream11_scores(players, form="historical", n_simulations=40):
    weight = 0.2 if form == "historical" else 0.4
    player_points = {}

    for player in players:
        perf = player_performance(player, form=form)

        # Simulate values
        def sim_avg(values):
            arr = np.array(values)
            if len(arr) == 0:
                return 0.0
            samples = np.random.choice(arr, size=n_simulations, replace=True)
            return round(safe_mean(samples), 2)

        # Batting
        runs = sim_avg(perf["Runs_scored"])
        fours = sim_avg(perf["Fours"])
        sixers = sim_avg(perf["Sixers"])
        balls_faced = sim_avg(perf["Balls_faced"])
        ducks = sim_avg(perf["Ducks"])

        # Bowling
        wickets = sim_avg(perf["Wickets_not_runouts"])
        maiden_overs = sim_avg(perf["Maiden_overs"])
        dot_balls = sim_avg(perf["Dot_balls_bowled"])
        wkts3 = sim_avg(perf["Wkts3"])
        wkts4 = sim_avg(perf["Wkts4"])
        wkts5 = sim_avg(perf["Wkts5"])
        economy = safe_mean(perf["Economy"])  # already a rate
        balls_bowled = sim_avg(perf["Balls_bowled"])
        runs_given = sim_avg(perf["Runs_given"])

        # Fielding
        catches = sim_avg(perf["Catches"])
        catches3 = sim_avg(perf["Cathes3"])
        stumps = sim_avg(perf["Stumps"])
        runouts = sim_avg(perf["Runouts"])

        # ===============================
        # 🏏 Dream11 Fantasy Points Logic
        # ===============================

        total_points = 0

        # ✅ Batting
        total_points += runs * 1
        total_points += fours * 1
        total_points += sixers * 2
        total_points -= ducks * 2  # optional duck penalty

        # Strike rate penalty
        if balls_faced >= 10:
            sr = (runs * 100) / balls_faced
            if 50 <= sr < 60:
                total_points -= 4
            elif 60 <= sr < 70:
                total_points -= 2
            elif sr < 50:
                total_points -= 6

        # ✅ Bowling
        total_points += wickets * 25
        total_points += maiden_overs * 12
        total_points += dot_balls * 1

        # Economy bonus (if ≥2 overs)
        overs_bowled = balls_bowled / 6
        if overs_bowled >= 2:
            eco = runs_given / overs_bowled
            if 5 <= eco < 6:
                total_points += 2
            elif 4 <= eco < 5:
                total_points += 4
            elif eco < 4:
                total_points += 6
            elif 9 <= eco < 10:
                total_points -= 2
            elif 10 <= eco < 11:
                total_points -= 4
            elif eco >= 11:
                total_points -= 6

        total_points += wkts3 * 4
        total_points += wkts4 * 8
        total_points += wkts5 * 16

        # ✅ Fielding
        total_points += catches * 8
        total_points += catches3 * 4  # 3+ catch bonus
        total_points += stumps * 12
        total_points += runouts * 6

        # Multiply by form weight
        player_points[player] = round(total_points * weight, 2)

    return player_points


In [310]:
#simulate_player_dream11_scores(players, form='recent')

In [311]:
def simulate_stadium_adjusted_dream11_scores(players, stadium, weight=0.15):
    player_points = {}

    for player in players:
        stats = player_vs_stadium(player, stadium)

        matches = max(1, stats["Matches"])  # Prevent divide-by-zero

        # Batting Metrics (per match)
        runs = stats["Runs_scored"] / matches
        fours = stats["Fours"] / matches
        sixers = stats["Sixers"] / matches
        strike_rate = stats["Strike_rate"]

        # Fielding
        outs = stats["Outs"] / matches

        # Bowling Metrics (per match)
        wickets = stats["Wickets"] / matches
        balls_bowled = stats["Balls_bowled"] / matches
        runs_given = stats["Runs_given"] / matches
        economy = stats["Economy"]

        # ===============================
        # 🏏 Dream11 Fantasy Points Logic
        # ===============================

        total_points = 0

        # ✅ Batting
        total_points += runs * 1
        total_points += fours * 1
        total_points += sixers * 2

        # Strike rate adjustment if faced enough balls
        if balls_bowled >= 10:
            sr = strike_rate
            if 50 <= sr < 60:
                total_points -= 4
            elif 60 <= sr < 70:
                total_points -= 2
            elif sr < 50:
                total_points -= 6

        # ✅ Bowling
        total_points += wickets * 25

        if balls_bowled >= 12:  # ≥2 overs
            if 5 <= economy < 6:
                total_points += 2
            elif 4 <= economy < 5:
                total_points += 4
            elif economy < 4:
                total_points += 6
            elif 9 <= economy < 10:
                total_points -= 2
            elif 10 <= economy < 11:
                total_points -= 4
            elif economy >= 11:
                total_points -= 6

        # ✅ Fielding (outs interpreted here as dismissals)
        total_points += outs * 6  # Conservative fielding score

        # Scale with stadium weight
        player_points[player] = round(total_points * weight, 2)

    return player_points


In [312]:
#simulate_stadium_adjusted_dream11_scores(players,"M Chinnaswamy Stadium")

In [313]:
import pandas as pd
from itertools import permutations

def process_player_vs_player_metrics(players):
    """
    Takes a list of players and computes match-level head-to-head metrics
    between all unique batter-bowler pairs.
    
    Returns:
        pd.DataFrame: Player-vs-player match-level metrics
    """
    results = []

    for batter, bowler in permutations(players, 2):  # All ordered pairs, excluding self-matchups
        metrics = playerVsPlayer(batter, bowler)
        
        if metrics['Matches'] == 0:
            continue  # Skip if there's no match history
        
        matches = metrics['Matches']
        
        # Normalize total stats to per-match level where applicable
        result_row = {
            "Batter": batter,
            "Bowler": bowler,
            "Matches": matches,
            "Runs_scored_per_match": round(metrics["Runs_scored"] / matches, 2),
            "Balls_faced_per_match": round(metrics["Balls_faced"] / matches, 2),
            "Strike_rate": round(metrics["Strike_rate"], 2),
            "Avg_runs": round(metrics["Avg_runs"], 2),
            "Fours_per_match": round(metrics["Fours"] / matches, 2),
            "Sixers_per_match": round(metrics["Sixers"] / matches, 2),
            "Economy": round(metrics["Economy"], 2),
            "Outs_per_match": round(metrics["Outs"] / matches, 2)
        }

        results.append(result_row)

    return pd.DataFrame(results)


In [314]:
def simulate_pvp_adjusted_dream11_scores(players, weight=0.15):
    """
    For every batter-bowler pair, calculate weighted Dream11 points per match
    and aggregate for each batter over all bowlers.
    
    Returns:
        dict: {player_name: total_weighted_points}
    """
    from collections import defaultdict
    player_points = defaultdict(float)

    for batter in players:
        for bowler in players:
            if batter == bowler:
                continue

            stats = playerVsPlayer(batter, bowler)
            matches = max(1, stats["Matches"])  # Avoid div by zero

            # Per match metrics
            runs = stats["Runs_scored"] / matches
            fours = stats["Fours"] / matches
            sixers = stats["Sixers"] / matches
            strike_rate = stats["Strike_rate"]
            balls_faced = stats["Balls_faced"] / matches
            wickets = 0  # batter doesn't bowl here, so zero
            economy = stats["Economy"]
            outs = stats["Outs"] / matches

            # Dream11 points calculation per match
            points = 0
            # Batting
            points += runs * 1
            points += fours * 1
            points += sixers * 2

            if balls_faced >= 10:
                sr = strike_rate
                if 50 <= sr < 60:
                    points -= 4
                elif 60 <= sr < 70:
                    points -= 2
                elif sr < 50:
                    points -= 6

            # Bowling - zero here for batter-vs-bowler since batter is batting
            # Fielding
            points += outs * 6  # dismissals

            # Multiply by weight
            weighted_points = points * weight

            # Aggregate points for this batter over all bowlers
            player_points[batter] += weighted_points

    # Round the points for neatness
    player_points = {k: round(v, 2) for k, v in player_points.items()}

    return player_points


In [315]:
#simulate_pvp_adjusted_dream11_scores(players)

In [332]:
def classify_player_by_stats(player_stats):
    total_matches = len(player_stats.get("Runs_scored", []))
    if total_matches == 0:
        return "Batsman"

    balls_faced = np.array(player_stats.get("Balls_faced", []))
    runs_scored = np.array(player_stats.get("Runs_scored", []))
    balls_bowled = np.array(player_stats.get("Balls_bowled", []))
    stumps = np.array(player_stats.get("Stumps", []))

    batting_matches = np.sum(balls_faced > 0)
    bowling_matches = np.sum(balls_bowled > 0)
    overs_bowled_per_match = balls_bowled / 6
    avg_overs_bowled = np.mean(overs_bowled_per_match[overs_bowled_per_match > 0]) if bowling_matches > 0 else 0

    total_runs = np.sum(runs_scored)
    total_balls_faced = np.sum(balls_faced)
    strike_rate = (total_runs * 100 / total_balls_faced) if total_balls_faced > 0 else 0

    if np.sum(stumps) >= 1:
        return "WicketKeeper"
    if batting_matches / total_matches >= 0.5 and bowling_matches / total_matches >= 0.5 and strike_rate >= 100:
        return "AllRounder"
    if batting_matches / total_matches >= 0.6 and strike_rate >= 110:
        return "Batsman"
    if bowling_matches / total_matches >= 0.6 and avg_overs_bowled >= 2:
        return "Bowler"

    return "Batsman"


def select_best_11_with_classification(players, stadium):
    # Step 1: Get score dictionaries from each module
    hist_scores = simulate_player_dream11_scores(players, form='historical')
    recent_scores = simulate_player_dream11_scores(players, form='recent')
    stadium_scores = simulate_stadium_adjusted_dream11_scores(players, stadium)
    pvp_scores = simulate_pvp_adjusted_dream11_scores(players)

    # Step 2: Combine all weighted scores
    combined_scores = {}
    for p in players:
        combined_scores[p] = (
            0.25 * hist_scores.get(p, 0) +
            0.4 * recent_scores.get(p, 0) +
            0.15 * stadium_scores.get(p, 0) +
            0.20 * pvp_scores.get(p, 0)
        )

    # Step 3: Classify players using statistical logic
    classification = {}
    for p in players:
        stats = player_performance(p, form="historical")
        classification[p] = classify_player_by_stats(stats)

    # Step 4: Divide into categories
    wk = [p for p in players if classification[p] == 'WicketKeeper']
    bat = [p for p in players if classification[p] == 'Batsman']
    bowl = [p for p in players if classification[p] == 'Bowler']
    ar = [p for p in players if classification[p] == 'AllRounder']

    # Step 5: Sort each by score
    wk = sorted(wk, key=lambda x: combined_scores.get(x, 0), reverse=True)
    bat = sorted(bat, key=lambda x: combined_scores.get(x, 0), reverse=True)
    bowl = sorted(bowl, key=lambda x: combined_scores.get(x, 0), reverse=True)
    ar = sorted(ar, key=lambda x: combined_scores.get(x, 0), reverse=True)

    # Step 6: Initial selections
    selected_wk = wk[:1]
    selected_bat = bat[:4]
    selected_bowl = bowl[:4]
    selected_ar = ar[:2]

    # Step 6.1: If WKs have high scores and only 1 WK picked, allow one more as batsman
    if len(wk) > 1 and len(selected_bat) >= 4:
        # Include second WK as Batsman if his score is better than the 4th batsman
        if combined_scores[wk[1]] > combined_scores[selected_bat[-1]]:
            selected_bat[-1] = wk[1]  # Replace last batsman
            if wk[1] not in selected_wk:
                selected_wk.append(wk[1])  # Add to WK list too

    # Step 6.2: Ensure at least 2 allrounders
    if len(selected_ar) < 2:
        extras = bat[4:] + bowl[4:]
        extras = sorted(extras, key=lambda x: combined_scores.get(x, 0), reverse=True)
        for extra in extras:
            if extra not in selected_ar and len(selected_ar) < 2:
                selected_ar.append(extra)

    # Step 7: Combine and finalize team
    combined_selected = list(set(selected_wk + selected_bat + selected_bowl + selected_ar))
    combined_selected = sorted(combined_selected, key=lambda x: combined_scores.get(x, 0), reverse=True)[:11]

    # Step 8: Pick Captain and Vice Captain
    captain = combined_selected[0]
    vice_captain = combined_selected[1]

    # Step 9: Assign roles from actual 11
    final_roles = {
        "Captain": captain,
        "Vice_Captain": vice_captain,
        "WK": [p for p in combined_selected if classification[p] == 'WicketKeeper'],
        "Batsman": [p for p in combined_selected if classification[p] == 'Batsman' or (p in wk and p not in selected_wk)],
        "Bowler": [p for p in combined_selected if classification[p] == 'Bowler'],
        "Allrounders": [p for p in combined_selected if classification[p] == 'AllRounder']
    }

    return final_roles


In [334]:
select_best_11_with_classification(players,stadium_name)

  economy = runs_given*6/balls_bowled
  strike_rate = runs_scored*100/balls_faced
  economy = runs_given*6/balls_bowled
  strike_rate = runs_scored*100/balls_faced
  strike_rate = runs_scored*100/balls_faced
  economy = runs_given*6/balls_bowled
  strike_rate = runs_scored*100/balls_faced
  strike_rate = runs_scored*100/balls_faced
  economy = runs_given*6/balls_bowled
  strike_rate = runs_scored*100/balls_faced
  economy = runs_given*6/balls_bowled
  strike_rate = runs_scored*100/balls_faced
  economy = runs_given*6/balls_bowled
  strike_rate = runs_scored*100/balls_faced
  economy = runs_given*6/balls_bowled
  economy = runs_given*6/balls_bowled
  economy = runs_given*6/balls_bowled
  economy = runs_given*6/balls_bowled
  strike_rate = runs_scored*100/balls_faced
  strike_rate = runs_scored*100/balls_faced
  economy = runs_given*6/balls_bowled
  strike_rate = runs_scored*100/balls_faced
  economy = runs_given*6/balls_bowled
  strike_rate = runs_scored*100/balls_faced
  economy = runs

  avg = runs/matches
  economy = runs_given*6/balls_bowled
  strike_rate = runs_scored*100/balls_faced
  economy = runs_given*6/balls_bowled
  strike_rate = runs_scored*100/balls_faced
  strike_rate = runs_scored*100/balls_faced
  economy = runs_given*6/balls_bowled
  strike_rate = runs_scored*100/balls_faced
  strike_rate = runs_scored*100/balls_faced
  economy = runs_given*6/balls_bowled
  strike_rate = runs_scored*100/balls_faced
  economy = runs_given*6/balls_bowled
  strike_rate = runs_scored*100/balls_faced
  economy = runs_given*6/balls_bowled
  strike_rate = runs_scored*100/balls_faced
  economy = runs_given*6/balls_bowled
  economy = runs_given*6/balls_bowled
  economy = runs_given*6/balls_bowled
  economy = runs_given*6/balls_bowled
  strike_rate = runs_scored*100/balls_faced
  strike_rate = runs_scored*100/balls_faced
  economy = runs_given*6/balls_bowled
  strike_rate = runs_scored*100/balls_faced
  economy = runs_given*6/balls_bowled
  strike_rate = runs_scored*100/balls_f

{'Captain': 'SP Narine',
 'Vice_Captain': 'AD Russell',
 'WK': ['JM Bairstow'],
 'Batsman': ['PD Salt', 'SS Iyer', 'RK Singh', 'Shashank Singh'],
 'Bowler': ['HV Patel', 'K Rabada', 'Harshit Rana', 'CV Varun'],
 'Allrounders': ['SP Narine', 'AD Russell']}

In [318]:
players

['Avesh Khan',
 'JC Buttler',
 'Sandeep Sharma',
 'KL Rahul',
 'SV Samson',
 'A Mishra',
 'Dhruv Jurel',
 'DJ Hooda',
 'N Pooran',
 'TA Boult',
 'R Ashwin',
 'Q de Kock',
 'A Badoni',
 'YS Chahal',
 'Mohsin Khan',
 'KH Pandya',
 'MJ Henry',
 'YBK Jaiswal',
 'Ravi Bishnoi',
 'Yash Thakur',
 'MP Stoinis',
 'R Parag']

### Rough for validation 

In [419]:
def get_batting_stats(deliveries_df, match_id):
    # Runs scored
    runs = deliveries_df[deliveries_df['match_id'] == match_id] \
        .groupby('batter')['batsman_runs'].sum().rename("Runs")

    # Balls faced
    balls = deliveries_df[deliveries_df['match_id'] == match_id] \
        .groupby('batter')['batsman_runs'].count().rename("Balls")

    # Sixes
    sixes = deliveries_df[
        (deliveries_df['match_id'] == match_id) &
        (deliveries_df['batsman_runs'] == 6)
    ].groupby('batter')['batsman_runs'].count().rename("Sixes")

    # Fours
    fours = deliveries_df[
        (deliveries_df['match_id'] == match_id) &
        (deliveries_df['batsman_runs'] == 4)
    ].groupby('batter')['batsman_runs'].count().rename("Fours")

    # Merge all stats
    stats_df = pd.concat([runs, balls, sixes, fours], axis=1).fillna(0)

    # Convert to integers
    stats_df[['Runs', 'Balls', 'Sixes', 'Fours']] = stats_df[['Runs', 'Balls', 'Sixes', 'Fours']].astype(int)

    # Strike rate = (Runs / Balls) * 100
    stats_df['Strike Rate'] = stats_df.apply(
        lambda row: round((row['Runs'] / row['Balls']) * 100, 2) if row['Balls'] > 0 else 0.0, axis=1
    )

    return stats_df.reset_index()

x = get_batting_stats(del_2024,1426280)

In [420]:
x.sort_values(by='Runs',ascending=False)

Unnamed: 0,batter,Runs,Balls,Sixes,Fours,Strike Rate
1,JM Bairstow,108,48,9,8,225.0
3,PD Salt,75,39,6,6,192.31
7,SP Narine,71,36,4,9,197.22
9,Shashank Singh,68,29,8,2,234.48
2,P Simran Singh,54,21,5,4,257.14
10,VR Iyer,39,24,2,3,162.5
8,SS Iyer,28,10,3,1,280.0
5,RR Rossouw,26,18,2,1,144.44
0,AD Russell,24,15,2,2,160.0
6,Ramandeep Singh,6,3,1,0,200.0


In [421]:
def get_bowling_stats(deliveries_df, match_id):
    match_df = deliveries_df[deliveries_df['match_id'] == match_id]

    # Wickets (excluding runouts and retired hurt)
    valid_wickets = match_df[
        match_df['dismissal_kind'].isin(['bowled', 'caught', 'lbw', 'stumped', 'caught and bowled', 'hit wicket'])
    ]
    wickets = valid_wickets.groupby('bowler')['is_wicket'].count().rename("Wickets")

    # Balls bowled
    balls_bowled = match_df.groupby('bowler')['is_wicket'].count().rename("Balls")

    # Dot balls
    dots = match_df[match_df['batsman_runs'] == 0] \
        .groupby('bowler')['is_wicket'].count().rename("Dot Balls")

    # Runs conceded (exclude extras not credited to bowler)
    runs_conceded = match_df.groupby('bowler')['total_runs'].sum().rename("Runs Conceded")

    # Economy = (Runs Conceded) / (Balls / 6)
    stats_df = pd.concat([wickets, balls_bowled, dots, runs_conceded], axis=1).fillna(0)
    stats_df[['Wickets', 'Balls', 'Dot Balls', 'Runs Conceded']] = stats_df[['Wickets', 'Balls', 'Dot Balls', 'Runs Conceded']].astype(int)
    
    stats_df['Economy'] = stats_df.apply(
        lambda row: round((row['Runs Conceded'] / (row['Balls'] / 6)), 2) if row['Balls'] > 0 else 0.0,
        axis=1
    )

    # Sort by wickets taken descending
    stats_df = stats_df.sort_values(by='Wickets', ascending=False)

    return stats_df.reset_index()
get_bowling_stats(del_2024,1426280)

Unnamed: 0,bowler,Wickets,Balls,Dot Balls,Runs Conceded,Economy
0,Arshdeep Singh,2,29,15,45,9.31
1,HV Patel,1,20,8,49,14.7
2,RD Chahar,1,24,9,34,8.5
3,SM Curran,1,24,6,60,15.0
4,SP Narine,1,24,12,24,6.0
5,AD Russell,0,14,4,36,15.43
6,AS Roy,0,12,2,36,18.0
7,CV Varun,0,18,2,46,15.33
8,Harpreet Brar,0,12,3,21,10.5
9,Harshit Rana,0,25,6,62,14.88


In [416]:
players = list(set(del_2024[del_2024['match_id'] ==1426305]['batter'].unique()).union(set(del_2024[del_2024['match_id'] ==1426305]['bowler'].unique())))

In [417]:
for i in players:
    print(i,end=",")

N Thushara,HH Pandya,PP Chawla,KL Rahul,A Kamboj,N Wadhera,RG Sharma,Naman Dhir,DJ Hooda,R Shepherd,Ishan Kishan,N Pooran,D Padikkal,D Brevis,Arshad Khan,A Badoni,SA Yadav,Mohsin Khan,KH Pandya,MJ Henry,Ravi Bishnoi,Arjun Tendulkar,Naveen-ul-Haq,MP Stoinis,