In [95]:
import json
import csv
import os
from collections import defaultdict
import pandas as pd
def compute_batting_fp(runs, fours, sixes, balls, is_duck):
    """
    T20 Fantasy Cricket Batting Points:
      - Run: +1 per run ✅
      - Boundary Bonus: +4 per four hit ✅
      - Six Bonus: +6 per six hit ✅
      - 25 Run Bonus: +4 if runs >= 25 (and below 50)
      - Half-Century Bonus: +8 if runs >= 50 (and below 75)
      - 75 Run Bonus: +12 if runs >= 75 (and below 100)
      - Century Bonus: +16 if runs >= 100 (no additional bonus applied)
      - Duck penalty: if player faced >=1 ball and scored 0, -2. ✅
    """
    
    fp = runs + (fours * 4) + (sixes * 6)
    if runs >= 100:
        fp += 16
    elif runs >= 75:
        fp += 12
    elif runs >= 50:
        fp += 8
    elif runs >= 25:
        fp += 4
    if is_duck and balls > 0 and runs == 0:
        fp -= 2
    #strike rate
    #!!! struke rate bounus/ panelty are only aplicable for the strictly non-bowlers (strict - bowlers are exempt from st.rate criteria)  
    
    return fp


def compute_bowling_fp(wickets, dot_balls, maidens, lbw, bowled, legal_balls, runs_conceded, overs):
    """
    T20 Fantasy Cricket Bowling Points:
      - Wicket (excluding run out): +25 per wicket.✅
      - Bonus for LBW, Bowled: +8 each. ✅
      - Wicket haul bonus: if wickets>=3: +4; >=4: +8; >=5: +12. ✅
      - Dot Ball: +1 per legal dot ball. ✅
      - Maiden Over: +12 per maiden over. ✅
      - Economy bonus (if legal_balls >= 12):
          * economy < 5: +6 ✅
          * 5 <= economy <= 5.99: +4 ✅
          * 6 <= economy <= 7: +2 ✅
          * 10 <= economy <= 11: -2 ✅
          * 11.01 <= economy <= 12: -4 ✅
          * economy > 12: -6 ✅
    """
    fp = wickets * 25
    fp += (lbw + bowled) * 8
    if wickets >= 5:
        fp += 12
    elif wickets >= 4:
        fp += 8
    elif wickets >= 3:
        fp += 4
    fp += dot_balls
    fp += maidens * 12
    if legal_balls >= 12 and overs > 1:
        economy = runs_conceded / overs
        if economy < 5:
            fp += 6
        elif 5 <= economy <= 5.99:
            fp += 4
        elif 6 <= economy <= 7:
            fp += 2
        elif 10 <= economy <= 11:
            fp -= 2
        elif 11.01 <= economy <= 12:
            fp -= 4
        elif economy > 12:
            fp -= 6
    return fp


def compute_fielding_fp(catches, stumps, direct_runouts, indirect_runouts):
    """
    T20 Fantasy Cricket Fielding Points:
      - Catch: +8 per catch. ✅
      - Additionally, if catches >= 3, add one bonus of +4 (only once). ✅
      - Stumping: +12 per stumping. ✅
      - Run out (Direct hit): +12 per direct run out. ✅
      - Run out (Indirect): +6 per indirect run out. ✅
    """
    fp = catches * 8
    if catches >= 3:
        fp += 4
    fp += stumps * 12
    fp += direct_runouts * 12
    fp += indirect_runouts * 6
    return fp

def process_match(json_file):
    # Reset global aggregators for each match processing
    batting_stats = defaultdict(lambda: {"runs": 0, "balls": 0, "fours": 0, "sixes": 0,"is_dismissed":False})
    bowling_stats = defaultdict(lambda: {
        "legal_balls": 0, "dot_balls": 0, "maidens": 0, "runs_conceded": 0,
        "fours_conceded": 0, "sixes_conceded": 0, "wickets": 0,
        "lbw": 0, "bowled": 0,"wides": 0, "noballs": 0, "overs_bowled": set(),
        'overs': 0
    })
    fielding_stats = defaultdict(lambda: {"catches": 0, "stumps": 0, "direct_runouts": 0, "indirect_runouts": 0})
    
    # dict to capture the batting entry order
    batting_entry = {}

    with open(json_file, 'r') as f:
        match = json.load(f)
    info = match.get("info",{})
    if info.get('gender','male') == "female": 
        return None
    balls_per_over = match.get("info", {}).get("balls_per_over", 6) # just 6
    
    for inning in match.get("innings", []):
        entry_counter = 0
        for over_index, over in enumerate(inning.get("overs", [])):
            legal_balls_in_over = 0
            over_runs = 0
            current_bowler = None
            for delivery in over.get("deliveries", []):
                batter = delivery.get("batter")
                non_striker = delivery.get("non_striker")
                if batter and batter not in batting_entry:
                    entry_counter += 1
                    batting_entry[batter] = entry_counter
                if non_striker and non_striker not in batting_entry:
                    entry_counter += 1
                    batting_entry[non_striker] = entry_counter
                bowler = delivery.get("bowler")
                extras = delivery.get("extras", {})
                extras_int = {k: int(v) for k, v in extras.items()}
                runs = delivery.get("runs", {})
                batter_runs = runs.get("batter", 0)
                total_runs = runs.get("total", 0)
                
                # Update batting stats (ignoring wides and no-balls for ball count)
                is_wide = 'wides' in extras
                is_noball = 'noballs' in extras
                if not is_wide and not is_noball:
                    batting_stats[batter]["balls"] += 1 # check for valid batter conceded balls 
                batting_stats[batter]["runs"] += batter_runs
                # bounderies 
                if batter_runs == 4:
                    batting_stats[batter]["fours"] += 1
                if batter_runs == 6:
                    batting_stats[batter]["sixes"] += 1
                
                # batting correct features implementation 
                
                # Update bowling stats
                if current_bowler is None:
                    current_bowler = bowler
                if not is_wide and not is_noball:
                    bowling_stats[bowler]["legal_balls"] += 1
                    legal_balls_in_over += 1
                    
                if is_wide:
                    bowling_stats[bowler]["wides"] += extras_int.get("wides", 0)
                if is_noball:
                    bowling_stats[bowler]["noballs"] += extras_int.get("noballs", 0)
                
                #runs conceded
                bowling_stats[bowler]["runs_conceded"] += (batter_runs +
                                                           extras_int.get("wides", 0) +
                                                           extras_int.get("noballs", 0) +
                                                           extras_int.get("penalty", 0))
                #correctly implemented as per T_20 the rules 
                
                if not is_wide and not is_noball and total_runs == 0: # incorrect check if not correct
                    bowling_stats[bowler]["dot_balls"] += 1
                    
                if batter_runs == 4:
                    bowling_stats[bowler]["fours_conceded"] += 1
                if batter_runs == 6:
                    bowling_stats[bowler]["sixes_conceded"] += 1
                
                # if not is_wide and not is_noball:
                over_runs += total_runs #ALL runs including extras are covered by over_runs
                
                #Wicket info
                if 'wickets' in delivery:
                    for wicket_info in delivery['wickets']: #wicket is a list handle it in that way
                        kind = wicket_info.get("kind","").lower()
                        batsman = wicket_info.get("player_out","")
                        if kind not in ["run out","retired hurt",'retired out','obstructing the field']:
                            batting_stats[batsman]['is_dismissed'] = True                            
                            bowling_stats[bowler]["wickets"] += 1
                            if kind == "lbw":
                                bowling_stats[bowler]["lbw"] += 1
                            elif kind == "bowled":
                                bowling_stats[bowler]["bowled"] += 1
                            elif kind in ['caught','caught and bowled']:
                                if kind == 'caught and bowled':
                                    fielding_stats[bowler]["catches"] += 1   # count of bowler catch is incremented
                                    # bowling_stats[bowler]["caught_bowled"] += 1   # should we count it here or in the fielding cuz it's a caught
                                else:
                                    for fielder in wicket_info.get("fielders",[]):
                                        fname = fielder.get("name")
                                        if fname:
                                            fielding_stats[fname]["catches"] += 1
                            elif kind == 'stumped':
                                for fielder in wicket_info.get("fielders",[]):
                                    fname = fielder.get("name")
                                    if fname:
                                        fielding_stats[fname]["stumps"] += 1
                        elif kind == "run out":
                            fielders = [f.get('name') for f in wicket_info.get('fielders',[]) if f.get('name')]
                            if len(fielders) == 1:
                                fielding_stats[fielders[0]]["direct_runouts"] += 1 
                            elif len(fielders) >= 2:
                                for f in fielders[-2:]:
                                    fielding_stats[f]["indirect_runouts"] += 1 #last 2 player will be rewarded 
                                
                            # wickets are taken care of some rare wickets like - retired hurt, retired out or obstructing the fields are ignored (wait last one should be credited to the fielder shouldn't)
            #end of over : check for madien over (insort no inc in team-total for that over)
            if legal_balls_in_over == balls_per_over and over_runs == 0 and current_bowler:
                 bowling_stats[current_bowler]["maidens"] += 1
            # now it's fine and correctly implemented 
            if current_bowler:
                bowling_stats[current_bowler]["overs_bowled"].add(over_index + 1) # using 1indexed over numbers
                
    # # Compute overs using the actual balls_per_over value
    for bowler, stats in bowling_stats.items():
        stats["overs"] = round(stats["legal_balls"] / balls_per_over,3) if stats["legal_balls"] else 0.0  
    return match,batting_stats,bowling_stats,fielding_stats,batting_entry


def build_player_feature_map(json_file):
    # Reset global stats by processing the match freshly
    if process_match(json_file) is None:
        return None
    match, batting_stats, bowling_stats, fielding_stats,batting_entry = process_match(json_file)
    
    # Extract batting order from info.players (order reflects batting order)
    batting_order = {}
    players_info = match.get("info", {}).get("players", {})
    for team,players in players_info.items():
        idx = 1
        for player, order in batting_entry.items():
            if player in players:
                batting_order[player] = order
                idx += 1          
        for player in players:
            if player not in batting_order:
                batting_order[player] = order
                order += 1 
    registry = match.get("info", {}).get("registry", {}).get("people", {})
    team_rosters = match.get("info",{}).get('players',{})
    all_players = set()
    for team,players in team_rosters.items():
        if isinstance(players,list):
            for player in players:
                all_players.add((player,team))
    # make sure correct use of 11 lineup 
    final_stats = {}
    for player,team in all_players:
        #Batting stats 
        bat = batting_stats.get(player,{"runs": 0, "balls": 0, "fours": 0, "sixes": 0,'is_dismissed':False})
        runs = bat["runs"]
        balls_faced = bat["balls"] #make use ball_faced are logically-correct 
        fours = bat["fours"]
        sixes = bat["sixes"]
        is_dismissed = bat['is_dismissed'] #do we need it here
        batting_fp = compute_batting_fp(runs,fours,sixes,balls_faced,is_dismissed) # TODO include the strike_rate point 
        strike_rate = round((runs/balls_faced * 100),2) if balls_faced > 0 else 0.0 # why don't encapsulate it's fp in batting _fp
        
        #Bowling stats
        bowl = bowling_stats.get(player,{
            "legal_balls": 0, "dot_balls": 0, "maidens": 0, "runs_conceded": 0,
            "fours_conceded": 0, "sixes_conceded": 0, "wickets": 0, "overs": 0.0,
            "lbw": 0, "bowled": 0, "wides": 0, "noballs": 0, "overs_bowled": set(),
            #"hit_wicket": 0, "caught_bowled": 0,
        })
        overs = bowl["overs"]
        economy_rate = round(bowl['runs_conceded'] / overs,2) if overs > 0 else 0.0 # check for correct logic
        
        bowling_fp = compute_bowling_fp(
            bowl["wickets"], bowl["dot_balls"], bowl["maidens"],
            bowl["lbw"], bowl["bowled"], bowl["legal_balls"], bowl["runs_conceded"], overs
        )
        # make sure correct 
        
        #fielding stats
        field = fielding_stats.get(player,{"catches": 0, "stumps": 0, "direct_runouts": 0, "indirect_runouts": 0})
        fielding_fp = compute_fielding_fp(field["catches"], field["stumps"], field["direct_runouts"], field["indirect_runouts"])
        
        #strike rate ponits for batters (only if player did not bowl and faced atleast 10 balls )
        strike_rate_points = 0
        if balls_faced >= 10: # it should only apply to a non-blowler striclty (!isbowler)
            sr = (runs / balls_faced) * 100
            if sr > 170:
                strike_rate_points = 6
            elif 150.01 <= sr <= 170:
                strike_rate_points = 4
            elif 130 <= sr < 150:
                strike_rate_points = 2 
            elif 60 <= sr <= 70:
                strike_rate_points = -2
            elif 50 <= sr <= 59.99:
                strike_rate_points = -4
            elif sr < 50:
                strike_rate_points = -6
        strike_rate_fp = strike_rate_points
        batting_fp += strike_rate_fp # we have to deal it later for allrounders since they are eligible for st.rate 
         
        final_stats[player] = {
            "Team":team,
            "name": player,
            "player_id": registry.get(player, ""),
            "batting_position": batting_order.get(player, 0),
            # Batting features
            "runs": runs,
            "balls": balls_faced,
            "fours": fours,
            "sixes": sixes,
            "strike_rate": strike_rate,
            # Bowling features
            "overs_bowled": sorted(list(bowl.get("overs_bowled", set()))),
            "overs" : bowl.get("overs",0.0),
            "total_balls": bowl.get("legal_balls", 0),
            "dots": bowl.get("dot_balls", 0),
            "maidens": bowl.get("maidens", 0),
            "conceded": bowl.get("runs_conceded", 0),
            "fours_conceded": bowl.get("fours_conceded", 0),
            "sixes_conceded": bowl.get("sixes_conceded", 0),
            "wickets": bowl.get("wickets", 0),
            "LBW": bowl.get("lbw", 0),
            "Bowled": bowl.get("bowled", 0),
            "noballs": bowl.get("noballs", 0),
            "wides": bowl.get("wides", 0),
            "economy_rate": economy_rate,
            # Fielding features
            "catches": field.get("catches", 0),
            "stumping": field.get("stumps", 0),
            "direct_hit": field.get("direct_runouts", 0),
            "indirect_hit": field.get("indirect_runouts", 0),
            # fantasy points
            "strike_rate_fp":strike_rate_fp,
            "batting_fp": batting_fp,
            "bowling_fp": bowling_fp,
            "fielding_fp": fielding_fp,
            "total_fp": batting_fp + bowling_fp + fielding_fp + 4           
        }
    return final_stats

def write_features_to_csv(feature_map,output_csv):
    if not feature_map:
        print("No data to write.")
        return
    columns = list(next(iter(feature_map.values())).keys())
    sorted_features = sorted(feature_map.values(), key=lambda x: (x["Team"], x["batting_position"]))
    with open(output_csv,'w',newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=columns)
        writer.writeheader()
        for features in sorted_features:
            writer.writerow(features)
    print(f"Player feature map written to {output_csv}")

def process_all_matches(global_folder, output_folder):
    # if output folder already exists we assume that data has been already processed no need for further processing 
    if os.path.exists(output_folder):
        print("Matches are already processed!...")
        return 
    os.makedirs(output_folder)
    #Traverse all subfolders and files in the global folder
    for subdir in os.listdir(global_folder):
        subdir_path = os.path.join(global_folder,subdir)
        if os.path.isdir(subdir_path):
            for file in os.listdir(subdir_path):
                if file.endswith(".json"):
                    json_file_path = os.path.join(subdir_path, file)
                    match_id = os.path.splitext(file)[0]
                    print(f"Processing match: {match_id}")
                    try:
                        player_features = build_player_feature_map(json_file_path)
                        output_csv = os.path.join(output_folder, f"{match_id}.csv")
                        write_features_to_csv(player_features, output_csv)
                    except Exception as e:
                        print(f"Error processing {json_file_path}: {e}")                   

if __name__ == "__main__":
    
    global_folder = r"C:\Users\kumar\IPL_Fantasy_Score_Prediction\tests"
    output_folder = r"C:\Users\kumar\IPL_Fantasy_Score_Prediction\tests\ipl_2025_MatchData"  
    process_all_matches(global_folder, output_folder)
    


Processing match: 1473438
Player feature map written to C:\Users\kumar\IPL_Fantasy_Score_Prediction\tests\ipl_2025_MatchData\1473438.csv
Processing match: 1473439
Player feature map written to C:\Users\kumar\IPL_Fantasy_Score_Prediction\tests\ipl_2025_MatchData\1473439.csv
Processing match: 1473440
Player feature map written to C:\Users\kumar\IPL_Fantasy_Score_Prediction\tests\ipl_2025_MatchData\1473440.csv
Processing match: 1473441
Player feature map written to C:\Users\kumar\IPL_Fantasy_Score_Prediction\tests\ipl_2025_MatchData\1473441.csv
Processing match: 1473442
Player feature map written to C:\Users\kumar\IPL_Fantasy_Score_Prediction\tests\ipl_2025_MatchData\1473442.csv
Processing match: 1473443
Player feature map written to C:\Users\kumar\IPL_Fantasy_Score_Prediction\tests\ipl_2025_MatchData\1473443.csv
Processing match: 1473444
Player feature map written to C:\Users\kumar\IPL_Fantasy_Score_Prediction\tests\ipl_2025_MatchData\1473444.csv


In [96]:
import numpy as np
import pandas as pd

In [97]:
match01_player = pd.read_csv('SquadPlayerNames_IndianT20League - Match_1.csv')
match01_player.sort_values(by=['Team','lineupOrder'],inplace=True)
match01_player = match01_player[match01_player['IsPlaying'] != 'NOT_PLAYING']
match01_player

Unnamed: 0,Credits,Player Type,Player Name,Team,IsPlaying,lineupOrder
15,8.0,WK,Quinton de Kock,KKR,PLAYING,1
25,8.0,BAT,Venkatesh Iyer,KKR,PLAYING,2
27,7.5,BAT,Ajinkya Rahane,KKR,PLAYING,3
24,7.5,BAT,Rinku Singh,KKR,PLAYING,4
22,6.5,BAT,Angkrish Raghuvanshi,KKR,PLAYING,5
10,9.0,ALL,Sunil Narine,KKR,PLAYING,6
9,8.5,ALL,Andre Russell,KKR,PLAYING,7
7,7.5,ALL,Ramandeep Singh,KKR,PLAYING,8
40,6.5,BOWL,Spencer Johnson,KKR,PLAYING,9
36,8.0,BOWL,Harshit Rana,KKR,PLAYING,10


In [98]:
team_map = {
    'KKR': 'Kolkata Knight Riders',
    'RCB': 'Royal Challengers Bengaluru'
}

# Define a mapping from the player names in match01_player to the corresponding names in df_stats.
player_map = {
    'Quinton de Kock': 'Q de Kock',#
    'Venkatesh Iyer': 'VR Iyer',#
    'Ajinkya Rahane': 'AM Rahane',#
    'Angkrish Raghuvanshi': 'A Raghuvanshi',#
    'Sunil Narine': 'SP Narine',#
    'Rinku Singh' :'RK Singh',#
    'Andre Russell': 'AD Russell',#
    'Ramandeep Singh': 'Ramandeep Singh',#  # same in both dataframes
    'Spencer Johnson': 'SH Johnson',#
    'Harshit Rana': 'Harshit Rana',#
    'Varun Chakravarthy': 'CV Varun',#
    'Vaibhav Arora': 'VG Arora',#
    # RCB mappings
    'Virat Kohli': 'V Kohli',#
    'Philip Salt': 'PD Salt',#
    'Devdutt Padikkal' : 'D Padikkal',#
    'Rajat Patidar' : 'RM Patidar',#
    'Suyash Sharma' : 'Suyash Sharma',
    'Liam Livingstone': 'LS Livingstone',
    'Jitesh Sharma': 'JM Sharma',
    'Tim David': 'TH David',
    'Krunal Pandya': 'KH Pandya',
    'Rasikh Salam': 'Rasikh Salam',
    'Josh Hazlewood' : 'JR Hazlewood',
    'Yash Dayal' : 'Yash Dayal'
}
match01_player['Team'] = match01_player['Team'].map(team_map)
match01_player['Player Name'] = match01_player['Player Name'].map(player_map)
match01_player = match01_player.dropna(axis=0)
match01_player


Unnamed: 0,Credits,Player Type,Player Name,Team,IsPlaying,lineupOrder
15,8.0,WK,Q de Kock,Kolkata Knight Riders,PLAYING,1
25,8.0,BAT,VR Iyer,Kolkata Knight Riders,PLAYING,2
27,7.5,BAT,AM Rahane,Kolkata Knight Riders,PLAYING,3
24,7.5,BAT,RK Singh,Kolkata Knight Riders,PLAYING,4
22,6.5,BAT,A Raghuvanshi,Kolkata Knight Riders,PLAYING,5
10,9.0,ALL,SP Narine,Kolkata Knight Riders,PLAYING,6
9,8.5,ALL,AD Russell,Kolkata Knight Riders,PLAYING,7
7,7.5,ALL,Ramandeep Singh,Kolkata Knight Riders,PLAYING,8
40,6.5,BOWL,SH Johnson,Kolkata Knight Riders,PLAYING,9
36,8.0,BOWL,Harshit Rana,Kolkata Knight Riders,PLAYING,10


In [99]:
match01 = pd.read_csv(r"C:\Users\kumar\IPL_Fantasy_Score_Prediction\tests\ipl_2025_MatchData\1473438.csv")
match01

Unnamed: 0,Team,name,player_id,batting_position,runs,balls,fours,sixes,strike_rate,overs_bowled,...,economy_rate,catches,stumping,direct_hit,indirect_hit,strike_rate_fp,batting_fp,bowling_fp,fielding_fp,total_fp
0,Kolkata Knight Riders,Q de Kock,372455c4,1,4,5,1,0,80.0,[],...,0.0,0,0,0,0,0,8,0,0,12
1,Kolkata Knight Riders,SP Narine,9d430b40,2,44,26,5,3,169.23,"[8, 10, 12, 14]",...,6.75,0,0,0,0,4,90,31,0,125
2,Kolkata Knight Riders,AM Rahane,29e95537,3,56,31,6,4,180.65,[],...,0.0,0,0,0,0,6,118,0,0,122
3,Kolkata Knight Riders,VR Iyer,a24be938,4,6,7,1,0,85.71,[],...,0.0,0,0,0,0,0,10,0,0,14
4,Kolkata Knight Riders,A Raghuvanshi,d7017798,5,30,22,2,1,136.36,[],...,0.0,0,0,0,0,2,50,0,0,54
5,Kolkata Knight Riders,VG Arora,7c3b3b78,5,0,0,0,0,0.0,"[1, 3, 16]",...,14.0,0,0,0,0,0,0,23,0,27
6,Kolkata Knight Riders,RK Singh,0a509d6b,6,12,10,1,0,120.0,[],...,0.0,1,0,0,0,0,16,0,8,28
7,Kolkata Knight Riders,CV Varun,5b7ab5a9,6,0,0,0,0,0.0,"[4, 7, 9, 11]",...,10.75,0,0,0,0,0,0,30,0,34
8,Kolkata Knight Riders,AD Russell,bbd41817,7,4,3,1,0,133.33,[],...,0.0,0,0,0,0,0,8,0,0,12
9,Kolkata Knight Riders,Ramandeep Singh,be24ead0,8,6,9,0,0,66.67,[],...,0.0,1,0,0,0,0,6,0,8,18


In [100]:
# match01_player = match01_player.merge(match01[['name','total_fp']],left_on='Player Name',right_on='name',how='left')
# match01_player

In [101]:
match01_player = match01_player.join(
    match01.set_index('name')['total_fp'],
    on='Player Name'
)
match01_player.reset_index()



Unnamed: 0,index,Credits,Player Type,Player Name,Team,IsPlaying,lineupOrder,total_fp
0,15,8.0,WK,Q de Kock,Kolkata Knight Riders,PLAYING,1,12
1,25,8.0,BAT,VR Iyer,Kolkata Knight Riders,PLAYING,2,14
2,27,7.5,BAT,AM Rahane,Kolkata Knight Riders,PLAYING,3,122
3,24,7.5,BAT,RK Singh,Kolkata Knight Riders,PLAYING,4,28
4,22,6.5,BAT,A Raghuvanshi,Kolkata Knight Riders,PLAYING,5,54
5,10,9.0,ALL,SP Narine,Kolkata Knight Riders,PLAYING,6,125
6,9,8.5,ALL,AD Russell,Kolkata Knight Riders,PLAYING,7,12
7,7,7.5,ALL,Ramandeep Singh,Kolkata Knight Riders,PLAYING,8,18
8,40,6.5,BOWL,SH Johnson,Kolkata Knight Riders,PLAYING,9,13
9,36,8.0,BOWL,Harshit Rana,Kolkata Knight Riders,PLAYING,10,15


In [102]:
match01_player.reset_index(inplace=True)
match01_player

Unnamed: 0,index,Credits,Player Type,Player Name,Team,IsPlaying,lineupOrder,total_fp
0,15,8.0,WK,Q de Kock,Kolkata Knight Riders,PLAYING,1,12
1,25,8.0,BAT,VR Iyer,Kolkata Knight Riders,PLAYING,2,14
2,27,7.5,BAT,AM Rahane,Kolkata Knight Riders,PLAYING,3,122
3,24,7.5,BAT,RK Singh,Kolkata Knight Riders,PLAYING,4,28
4,22,6.5,BAT,A Raghuvanshi,Kolkata Knight Riders,PLAYING,5,54
5,10,9.0,ALL,SP Narine,Kolkata Knight Riders,PLAYING,6,125
6,9,8.5,ALL,AD Russell,Kolkata Knight Riders,PLAYING,7,12
7,7,7.5,ALL,Ramandeep Singh,Kolkata Knight Riders,PLAYING,8,18
8,40,6.5,BOWL,SH Johnson,Kolkata Knight Riders,PLAYING,9,13
9,36,8.0,BOWL,Harshit Rana,Kolkata Knight Riders,PLAYING,10,15


1-04-25


In [2]:
import pandas as pd 
import pulp 

df = pd.read_csv('SquadPlayerNames_IndianT20League - Match_1.csv')
df = df[df['IsPlaying'] != 'NOT_PLAYING']
df.sort_values(['Team','lineupOrder'],inplace=True)
df


Unnamed: 0,Credits,Player Type,Player Name,Team,IsPlaying,lineupOrder
15,8.0,WK,Quinton de Kock,KKR,PLAYING,1
25,8.0,BAT,Venkatesh Iyer,KKR,PLAYING,2
27,7.5,BAT,Ajinkya Rahane,KKR,PLAYING,3
24,7.5,BAT,Rinku Singh,KKR,PLAYING,4
22,6.5,BAT,Angkrish Raghuvanshi,KKR,PLAYING,5
10,9.0,ALL,Sunil Narine,KKR,PLAYING,6
9,8.5,ALL,Andre Russell,KKR,PLAYING,7
7,7.5,ALL,Ramandeep Singh,KKR,PLAYING,8
40,6.5,BOWL,Spencer Johnson,KKR,PLAYING,9
36,8.0,BOWL,Harshit Rana,KKR,PLAYING,10


In [None]:
team_map = {
    'KKR': 'Kolkata Knight Riders',
    'RCB': 'Royal Challengers Bengaluru'
}

# Define a mapping from the player names in match01_player to the corresponding names in df_stats.
player_map = {
    'Quinton de Kock': 'Q de Kock',#
    'Venkatesh Iyer': 'VR Iyer',#
    'Ajinkya Rahane': 'AM Rahane',#
    'Angkrish Raghuvanshi': 'A Raghuvanshi',#
    'Sunil Narine': 'SP Narine',#
    'Rinku Singh' :'RK Singh',#
    'Andre Russell': 'AD Russell',#
    'Ramandeep Singh': 'Ramandeep Singh',#  # same in both dataframes
    'Spencer Johnson': 'SH Johnson',#
    'Harshit Rana': 'Harshit Rana',#
    'Varun Chakravarthy': 'CV Varun',#
    'Vaibhav Arora': 'VG Arora',#
    # RCB mappings
    'Virat Kohli': 'V Kohli',#
    'Philip Salt': 'PD Salt',#
    'Devdutt Padikkal' : 'D Padikkal',#
    'Rajat Patidar' : 'RM Patidar',#
    'Suyash Sharma' : 'Suyash Sharma',
    'Liam Livingstone': 'LS Livingstone',
    'Jitesh Sharma': 'JM Sharma',
    'Tim David': 'TH David',
    'Krunal Pandya': 'KH Pandya',
    'Rasikh Salam': 'Rasikh Salam',
    'Josh Hazlewood' : 'JR Hazlewood',
    'Yash Dayal' : 'Yash Dayal'
}
df['Team'] = df['Team'].map(team_map)
df['Player Name'] = df['Player Name'].map(player_map)
df


Unnamed: 0,Credits,Player Type,Player Name,Team,IsPlaying,lineupOrder
15,8.0,WK,Q de Kock,Kolkata Knight Riders,PLAYING,1
25,8.0,BAT,VR Iyer,Kolkata Knight Riders,PLAYING,2
27,7.5,BAT,AM Rahane,Kolkata Knight Riders,PLAYING,3
24,7.5,BAT,RK Singh,Kolkata Knight Riders,PLAYING,4
22,6.5,BAT,A Raghuvanshi,Kolkata Knight Riders,PLAYING,5
10,9.0,ALL,SP Narine,Kolkata Knight Riders,PLAYING,6
9,8.5,ALL,AD Russell,Kolkata Knight Riders,PLAYING,7
7,7.5,ALL,Ramandeep Singh,Kolkata Knight Riders,PLAYING,8
40,6.5,BOWL,SH Johnson,Kolkata Knight Riders,PLAYING,9
36,8.0,BOWL,Harshit Rana,Kolkata Knight Riders,PLAYING,10


In [6]:
match_1 = pd.read_csv(r'ipl_2025_MatchData\1473438.csv')
match_1

Unnamed: 0,Team,name,player_id,batting_position,runs,balls,fours,sixes,strike_rate,overs_bowled,...,economy_rate,catches,stumping,direct_hit,indirect_hit,strike_rate_fp,batting_fp,bowling_fp,fielding_fp,total_fp
0,Kolkata Knight Riders,Q de Kock,372455c4,1,4,5,1,0,80.0,[],...,0.0,0,0,0,0,0,8,0,0,12
1,Kolkata Knight Riders,SP Narine,9d430b40,2,44,26,5,3,169.23,"[8, 10, 12, 14]",...,6.75,0,0,0,0,4,90,31,0,125
2,Kolkata Knight Riders,AM Rahane,29e95537,3,56,31,6,4,180.65,[],...,0.0,0,0,0,0,6,118,0,0,122
3,Kolkata Knight Riders,VR Iyer,a24be938,4,6,7,1,0,85.71,[],...,0.0,0,0,0,0,0,10,0,0,14
4,Kolkata Knight Riders,A Raghuvanshi,d7017798,5,30,22,2,1,136.36,[],...,0.0,0,0,0,0,2,50,0,0,54
5,Kolkata Knight Riders,VG Arora,7c3b3b78,5,0,0,0,0,0.0,"[1, 3, 16]",...,14.0,0,0,0,0,0,0,23,0,27
6,Kolkata Knight Riders,RK Singh,0a509d6b,6,12,10,1,0,120.0,[],...,0.0,1,0,0,0,0,16,0,8,28
7,Kolkata Knight Riders,CV Varun,5b7ab5a9,6,0,0,0,0,0.0,"[4, 7, 9, 11]",...,10.75,0,0,0,0,0,0,30,0,34
8,Kolkata Knight Riders,AD Russell,bbd41817,7,4,3,1,0,133.33,[],...,0.0,0,0,0,0,0,8,0,0,12
9,Kolkata Knight Riders,Ramandeep Singh,be24ead0,8,6,9,0,0,66.67,[],...,0.0,1,0,0,0,0,6,0,8,18


In [19]:
# match01_player = match01_player.join(
#     match01.set_index('name')['total_fp'],
#     on='Player Name'
# )
# match01_player.reset_index()

df = df.join(
    match_1.set_index('name')['total_fp'],
    on = 'Player Name'
)


ValueError: columns overlap but no suffix specified: Index(['total_fp'], dtype='object')

In [None]:
# df = df.reset_index()

In [20]:
df

Unnamed: 0,index,Credits,Player Type,Player Name,Team,IsPlaying,lineupOrder,total_fp
0,15,8.0,WK,Q de Kock,Kolkata Knight Riders,PLAYING,1,12.0
1,25,8.0,BAT,VR Iyer,Kolkata Knight Riders,PLAYING,2,14.0
2,27,7.5,BAT,AM Rahane,Kolkata Knight Riders,PLAYING,3,122.0
3,24,7.5,BAT,RK Singh,Kolkata Knight Riders,PLAYING,4,28.0
4,22,6.5,BAT,A Raghuvanshi,Kolkata Knight Riders,PLAYING,5,54.0
5,10,9.0,ALL,SP Narine,Kolkata Knight Riders,PLAYING,6,125.0
6,9,8.5,ALL,AD Russell,Kolkata Knight Riders,PLAYING,7,12.0
7,7,7.5,ALL,Ramandeep Singh,Kolkata Knight Riders,PLAYING,8,18.0
8,40,6.5,BOWL,SH Johnson,Kolkata Knight Riders,PLAYING,9,13.0
9,36,8.0,BOWL,Harshit Rana,Kolkata Knight Riders,PLAYING,10,15.0


In [None]:
df.dropna(inplace=True)
df

Unnamed: 0,index,Credits,Player Type,Player Name,Team,IsPlaying,lineupOrder,total_fp
0,15,8.0,WK,Q de Kock,Kolkata Knight Riders,PLAYING,1,12.0
1,25,8.0,BAT,VR Iyer,Kolkata Knight Riders,PLAYING,2,14.0
2,27,7.5,BAT,AM Rahane,Kolkata Knight Riders,PLAYING,3,122.0
3,24,7.5,BAT,RK Singh,Kolkata Knight Riders,PLAYING,4,28.0
4,22,6.5,BAT,A Raghuvanshi,Kolkata Knight Riders,PLAYING,5,54.0
5,10,9.0,ALL,SP Narine,Kolkata Knight Riders,PLAYING,6,125.0
6,9,8.5,ALL,AD Russell,Kolkata Knight Riders,PLAYING,7,12.0
7,7,7.5,ALL,Ramandeep Singh,Kolkata Knight Riders,PLAYING,8,18.0
8,40,6.5,BOWL,SH Johnson,Kolkata Knight Riders,PLAYING,9,13.0
9,36,8.0,BOWL,Harshit Rana,Kolkata Knight Riders,PLAYING,10,15.0


In [28]:
def PredictTeam(df: pd.DataFrame):
    """ 
    The optimization maximizes the total fanstasy points while satisfying the following Contraints:-
    -Exactly 11 pplayers are selected.
    -The total credits of selected players does not exceed 100
    -For each player type (role) the selection is between the specified minimum and maximum limits.
    -No more than the maximum allowed players are selected from any single real-life team.
    
    :params df: processed DataFrame containing Players details including expected fanstasy score
    : return : list of ids of 11 players (maybe including their names.)
    """
    #Dream 11 constraints 
    Total_Players = 11
    MAX_TOTAL_CREDITS = 100
    MIN_PER_ROLE = 1
    MAX_PER_ROLE = 4
    MAX_PER_TEAM = 7  # Adjust this based on your requirements
    roles = ['BAT','BOWL','WK','ALL']
    prob = pulp.LpProblem(name='OPtimal_Team',sense=pulp.LpMaximize)
    decision_var = {i : pulp.LpVariable(f'x_{i}',cat='Binary') for i in df.index}
    prob += pulp.lpSum(df.loc[i,'total_fp']* decision_var[i] for i in df.index), 'TotalFantasyPoints'
    prob += pulp.lpSum(decision_var[i] for i in df.index) == Total_Players, 'TotalPlayers'
    prob += pulp.lpSum(df.loc[i,'Credits']*decision_var[i] for i in df.index) <= MAX_TOTAL_CREDITS ,'CreditLimit'
    for role in roles:
        role_indices = df.index[df['Player Type'] == role].tolist()
        if role_indices:
            prob += pulp.lpSum(decision_var[i] for i in role_indices) >= MIN_PER_ROLE, f'MIN_{role}'
            prob += pulp.lpSum(decision_var[i] for i in role_indices) <= MAX_PER_ROLE, f'MAX_{role}'
    for team in df['Team'].unique():
        team_indices = df.index[df['Team'] == team].tolist()
        prob += pulp.lpSum(decision_var[i] for i in team_indices) <= MAX_PER_TEAM, f'TeamLimt_{team}'
    
    #solve the optimization problem 
    solution_status = prob.solve()
    # print(prob)
    print("Optimization Status:", pulp.LpStatus[solution_status])
    selected_indices = [i for i in df.index if pulp.value(decision_var[i] == 1)]
    selected_team_df = df.loc[selected_indices]
    return selected_team_df



In [32]:
optimal_team_df = PredictTeam(df)
optimal_team_df.sort_values(by=['total_fp'],ascending= False)
# print("\nSelected Dream11 Team:")
# print(optimal_team_df[['Player Name', 'Player Type', 'Credits', 'Team', 'total_fp', 'lineupOrder']])
total_fp_selected = optimal_team_df['total_fp'].sum()
total_credits_selected = optimal_team_df['Credits'].sum()
print(f"\nTotal Fantasy Points: {total_fp_selected}")
print(f"Total Credits Used: {total_credits_selected}")

Optimization Status: Optimal

Total Fantasy Points: 274.0
Total Credits Used: 97.5


In [33]:
optimal_team_df

Unnamed: 0,index,Credits,Player Type,Player Name,Team,IsPlaying,lineupOrder,total_fp
0,15,8.0,WK,Q de Kock,Kolkata Knight Riders,PLAYING,1,12.0
1,25,8.0,BAT,VR Iyer,Kolkata Knight Riders,PLAYING,2,14.0
3,24,7.5,BAT,RK Singh,Kolkata Knight Riders,PLAYING,4,28.0
6,9,8.5,ALL,AD Russell,Kolkata Knight Riders,PLAYING,7,12.0
7,7,7.5,ALL,Ramandeep Singh,Kolkata Knight Riders,PLAYING,8,18.0
8,40,6.5,BOWL,SH Johnson,Kolkata Knight Riders,PLAYING,9,13.0
9,36,8.0,BOWL,Harshit Rana,Kolkata Knight Riders,PLAYING,10,15.0
13,38,6.5,BOWL,VG Arora,Kolkata Knight Riders,X_FACTOR_SUBSTITUTE,14,27.0
19,4,8.0,ALL,LS Livingstone,Royal Challengers Bengaluru,PLAYING,4,37.0
20,11,7.5,WK,JM Sharma,Royal Challengers Bengaluru,PLAYING,5,40.0


In [25]:
optimal_team = PredictTeam(df)
 

OPtimal_Team:
MAXIMIZE
12.0*x_0 + 14.0*x_1 + 34.0*x_10 + 27.0*x_13 + 109.0*x_16 + 122.0*x_17 + 74.0*x_18 + 37.0*x_19 + 122.0*x_2 + 40.0*x_20 + 4.0*x_21 + 107.0*x_22 + 41.0*x_23 + 41.0*x_24 + 73.0*x_25 + 36.0*x_26 + 18.0*x_27 + 28.0*x_3 + 54.0*x_4 + 125.0*x_5 + 12.0*x_6 + 18.0*x_7 + 13.0*x_8 + 15.0*x_9 + 0.0
SUBJECT TO
TotalPlayers: x_0 + x_1 + x_10 + x_13 + x_16 + x_17 + x_18 + x_19 + x_2 + x_20
 + x_21 + x_22 + x_23 + x_24 + x_25 + x_26 + x_27 + x_3 + x_4 + x_5 + x_6
 + x_7 + x_8 + x_9 = 11

CreditLimit: 8 x_0 + 8 x_1 + 9 x_10 + 6.5 x_13 + 9 x_16 + 8.5 x_17 + 8 x_18
 + 8 x_19 + 7.5 x_2 + 7.5 x_20 + 7.5 x_21 + 7.5 x_22 + 6.5 x_23 + 6 x_24
 + 8.5 x_25 + 7 x_26 + 7 x_27 + 7.5 x_3 + 6.5 x_4 + 9 x_5 + 8.5 x_6 + 7.5 x_7
 + 6.5 x_8 + 8 x_9 <= 100

MIN_BAT: x_1 + x_16 + x_18 + x_2 + x_21 + x_27 + x_3 + x_4 >= 1

MAX_BAT: x_1 + x_16 + x_18 + x_2 + x_21 + x_27 + x_3 + x_4 <= 4

MIN_BOWL: x_10 + x_13 + x_23 + x_24 + x_25 + x_26 + x_8 + x_9 >= 1

MAX_BOWL: x_10 + x_13 + x_23 + x_24 + x_25 + x_26 

In [26]:
optimal_team

Unnamed: 0,index,Credits,Player Type,Player Name,Team,IsPlaying,lineupOrder,total_fp
0,15,8.0,WK,Q de Kock,Kolkata Knight Riders,PLAYING,1,12.0
1,25,8.0,BAT,VR Iyer,Kolkata Knight Riders,PLAYING,2,14.0
3,24,7.5,BAT,RK Singh,Kolkata Knight Riders,PLAYING,4,28.0
6,9,8.5,ALL,AD Russell,Kolkata Knight Riders,PLAYING,7,12.0
7,7,7.5,ALL,Ramandeep Singh,Kolkata Knight Riders,PLAYING,8,18.0
8,40,6.5,BOWL,SH Johnson,Kolkata Knight Riders,PLAYING,9,13.0
9,36,8.0,BOWL,Harshit Rana,Kolkata Knight Riders,PLAYING,10,15.0
13,38,6.5,BOWL,VG Arora,Kolkata Knight Riders,X_FACTOR_SUBSTITUTE,14,27.0
19,4,8.0,ALL,LS Livingstone,Royal Challengers Bengaluru,PLAYING,4,37.0
20,11,7.5,WK,JM Sharma,Royal Challengers Bengaluru,PLAYING,5,40.0
