In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ast
import stats
import random

In [323]:
second_innings_run_rates_df = pd.read_csv('Datasets/dataset.csv')
ball_by_ball_2008_to_2022_df = pd.read_csv('Datasets/IPL_Ball_by_Ball_2008_2022.csv')
second_innings_2023_df = pd.read_csv('Datasets/csv2023.csv')
matches_df = pd.read_csv('Datasets/IPL_Matches_2008_2022.csv')
deliveries = pd.read_csv('Datasets/deliveries.csv')

In [324]:
# Define a function to get the extra type and runs
def get_extras(row):
    for extra_type in ['wides', 'noballs', 'byes', 'legbyes']:
        if pd.notna(row[extra_type]):
            return pd.Series([extra_type, row[extra_type]])
    return pd.Series([np.nan, np.nan])

# Apply the function to each row
deliveries[['extra_type', 'extras_run']] = deliveries.apply(get_extras, axis=1)

# Split the 'ball' column into 'overs' and 'ballnumber'
deliveries['overs'], deliveries['ballnumber'] = np.divmod(deliveries['ball'], 1)

# Convert 'ballnumber' to actual ball number by multiplying by 10 and converting to integer
deliveries['ballnumber'] = (deliveries['ballnumber'] * 10).astype(int)

deliveries['isWicketDelivery'] = ~deliveries['wicket_type'].isna().astype(int)

deliveries.drop(columns=['wides', 'noballs', 'byes', 'legbyes', 'season', 'start_date', 'venue', 'other_wicket_type', 'other_player_dismissed', 'ball', 'penalty'], inplace=True)

# Rename the columns
deliveries.rename(columns={'match_id': 'ID', 'player_dismissed': 'player_out', 'runs_off_bat': 'batsman_run', 'striker': 'batter', 'batting_team' : 'BattingTeam'}, inplace=True)
#deliveries = deliveries[['ID', 'innings', 'overs', 'ballnumber', 'batter', 'bowler',
 #      'non-striker', 'extra_type', 'batsman_run', 'extras_run', 'total_run',
  #     'non_boundary', 'isWicketDelivery', 'player_out', 'kind', 'BattingTeam']]

In [325]:
# Stack the two DataFrames
merged_df = pd.concat([ball_by_ball_2008_to_2022_df, deliveries], ignore_index=True)
merged_df

Unnamed: 0,ID,innings,overs,ballnumber,batter,bowler,non-striker,extra_type,batsman_run,extras_run,...,non_boundary,isWicketDelivery,player_out,kind,fielders_involved,BattingTeam,bowling_team,non_striker,extras,wicket_type
0,1312200,1,0.0,1,YBK Jaiswal,Mohammed Shami,JC Buttler,,0,0.0,...,0.0,0,,,,Rajasthan Royals,,,,
1,1312200,1,0.0,2,YBK Jaiswal,Mohammed Shami,JC Buttler,legbyes,0,1.0,...,0.0,0,,,,Rajasthan Royals,,,,
2,1312200,1,0.0,3,JC Buttler,Mohammed Shami,YBK Jaiswal,,1,0.0,...,0.0,0,,,,Rajasthan Royals,,,,
3,1312200,1,0.0,4,YBK Jaiswal,Mohammed Shami,JC Buttler,,0,0.0,...,0.0,0,,,,Rajasthan Royals,,,,
4,1312200,1,0.0,5,YBK Jaiswal,Mohammed Shami,JC Buttler,,0,0.0,...,0.0,0,,,,Rajasthan Royals,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
243812,1370353,2,14.0,1,S Dube,MM Sharma,,,1,,...,,-2,,,,Chennai Super Kings,Gujarat Titans,RA Jadeja,0.0,
243813,1370353,2,14.0,3,RA Jadeja,MM Sharma,,,1,,...,,-2,,,,Chennai Super Kings,Gujarat Titans,S Dube,0.0,
243814,1370353,2,14.0,4,S Dube,MM Sharma,,,1,,...,,-2,,,,Chennai Super Kings,Gujarat Titans,RA Jadeja,0.0,
243815,1370353,2,14.0,5,RA Jadeja,MM Sharma,,,6,,...,,-2,,,,Chennai Super Kings,Gujarat Titans,S Dube,0.0,


In [326]:
total_runs = merged_df.groupby(['ID','innings']).sum()['total_run'].add(1).reset_index()
match_and_tot_runs_df = matches_df.merge(total_runs[['ID','total_run']],left_on='ID',right_on='ID')

In [327]:
# Data pre-processing
match_and_tot_runs_df['Team1'] = match_and_tot_runs_df['Team1'].replace('Delhi Daredevils','Delhi Capitals')
match_and_tot_runs_df['Team2'] = match_and_tot_runs_df['Team2'].replace('Delhi Daredevils','Delhi Capitals')

match_and_tot_runs_df['Team1'] = match_and_tot_runs_df['Team1'].replace('Deccan Chargers','Sunrisers Hyderabad')
match_and_tot_runs_df['Team2'] = match_and_tot_runs_df['Team2'].replace('Deccan Chargers','Sunrisers Hyderabad')

match_and_tot_runs_df['Team1'] = match_and_tot_runs_df['Team1'].replace('Kings XI Punjab','Punjab Kings')
match_and_tot_runs_df['Team2'] = match_and_tot_runs_df['Team2'].replace('Kings XI Punjab','Punjab Kings')


ball_and_match_data_df = match_and_tot_runs_df.merge(ball_by_ball_2008_to_2022_df,on='ID')
ball_and_match_data_df = ball_and_match_data_df.rename(columns={'total_run_y' : 'total_runs_ball', 'total_run_x' : 'innings_total', 'kind' : 'wicket_type'})
ball_and_match_data_df['Team1Players'] = ball_and_match_data_df['Team1Players'].apply(ast.literal_eval)
ball_and_match_data_df['Team2Players'] = ball_and_match_data_df['Team2Players'].apply(ast.literal_eval)
#ball_and_match_data_df['Team1Players'] = ball_and_match_data_df['Team1Players'].apply(set)
#ball_and_match_data_df['Team2Players'] = ball_and_match_data_df['Team2Players'].apply(set)

ball_and_match_data_df.columns

Index(['ID', 'City', 'Date', 'Season', 'MatchNumber', 'Team1', 'Team2',
       'Venue', 'TossWinner', 'TossDecision', 'SuperOver', 'WinningTeam',
       'WonBy', 'Margin', 'method', 'Player_of_Match', 'Team1Players',
       'Team2Players', 'Umpire1', 'Umpire2', 'innings_total', 'innings',
       'overs', 'ballnumber', 'batter', 'bowler', 'non-striker', 'extra_type',
       'batsman_run', 'extras_run', 'total_runs_ball', 'non_boundary',
       'isWicketDelivery', 'player_out', 'wicket_type', 'fielders_involved',
       'BattingTeam'],
      dtype='object')

In [328]:
class BattingStatistics:
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def batting_average(self, player_name, venue=None):
        if venue:
            player_data = self.dataframe[(self.dataframe['batter'] == player_name) & (self.dataframe['City'] == venue)]
        else:
            player_data = self.dataframe[self.dataframe['batter'] == player_name]
        runs_scored = player_data['batsman_run'].sum()
        total_outs = player_data['isWicketDelivery'].sum()
        if total_outs == 0:
            return "N/A"  # Avoid division by zero error
        else:
            return runs_scored / total_outs

    def batting_strike_rate(self, player_name, venue=None):
        if venue:
            player_data = self.dataframe[(self.dataframe['batter'] == player_name) & (self.dataframe['City'] == venue)]
        else:
            player_data = self.dataframe[self.dataframe['batter'] == player_name]
        balls_faced = player_data['batter'].count() - player_data[player_data['extra_type'].notna()].shape[0]
        runs_scored = player_data['batsman_run'].sum()
        if balls_faced == 0:
            return "N/A"  # Avoid division by zero error
        else:
            return (runs_scored / balls_faced) * 100
    
    def average_balls_faced(self, player_name, venue=None):
        if venue:
            player_data = self.dataframe[(self.dataframe['batter'] == player_name) & (self.dataframe['City'] == venue)]
        else:
            player_data = self.dataframe[self.dataframe['batter'] == player_name]
        
        total_balls_faced = player_data['ID'].count() - player_data[player_data['extra_type'] =='wides'].shape[0] - player_data[player_data['extra_type'] =='noballs'].shape[0]

        if venue:
            matches_with_batter = self.dataframe[
                (self.dataframe['Team1Players'].apply(lambda x: player_name in x if isinstance(x, list) else False)) & (self.dataframe['City'] == venue) |
                (self.dataframe['Team2Players'].apply(lambda x: player_name in x if isinstance(x, list) else False)) & (self.dataframe['City'] == venue)
            ]
        else:
            matches_with_batter = self.dataframe[
                (self.dataframe['Team1Players'].apply(lambda x: player_name in x if isinstance(x, list) else False)) |
                (self.dataframe['Team2Players'].apply(lambda x: player_name in x if isinstance(x, list) else False))
        ]        
        total_matches_played = len(matches_with_batter['ID'].unique())
        
        if total_matches_played == 0:
            return 0
        
        average_balls_faced = total_balls_faced / (2*total_matches_played)
        return average_balls_faced
        
    

    def player_venue_batting_stats(self, player_name, venue):
        avg = self.batting_average(player_name, venue)
        strike_rate = self.batting_strike_rate(player_name, venue)
        return avg, strike_rate


# Example usage:
# Initialize the class with the dataframe
batting_stats = BattingStatistics(ball_and_match_data_df)

# Compute batting average for a specific player
avg = batting_stats.batting_average('MS Dhoni')

# Compute batting strike rate for a specific player
strike_rate = batting_stats.batting_strike_rate('MS Dhoni')

avg_balls = batting_stats.average_balls_faced('V Kohli', 'Bangalore')

print(avg_balls)

23.666666666666668


In [329]:
class BowlingStatistics:
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def bowling_average(self, player_name, venue=None):
        if venue:
            player_data = self.dataframe[(self.dataframe['bowler'] == player_name) & (self.dataframe['City'] == venue)]
        else:
            player_data = self.dataframe[self.dataframe['bowler'] == player_name]
        runs_conceded = player_data['total_runs_ball'].sum()
        total_wickets = player_data['isWicketDelivery'].sum()
        if total_wickets == 0:
            return "N/A"  # Avoid division by zero error
        else:
            return runs_conceded / total_wickets

    def economy_rate(self, player_name, venue=None):
        if venue:
            player_data = self.dataframe[(self.dataframe['bowler'] == player_name) & (self.dataframe['City'] == venue)]
        else:
            player_data = self.dataframe[self.dataframe['bowler'] == player_name]
        balls_bowled = player_data['bowler'].count() - player_data[(player_data['extra_type'] == 'wides') | player_data['extra_type'] == 'noballs'].shape[0]
        overs_bowled = balls_bowled/6
        runs_conceded = player_data['total_runs_ball'].sum() - player_data['extras_run'].sum()
        if balls_bowled == 0:
            return "N/A"  # Avoid division by zero error
        else:
            return (runs_conceded / overs_bowled)

    def bowling_strike_rate(self, player_name, venue=None):
        if venue:
            player_data = self.dataframe[(self.dataframe['bowler'] == player_name) & (self.dataframe['City'] == venue)]
        else:
            player_data = self.dataframe[self.dataframe['bowler'] == player_name]
        total_wickets = player_data['isWicketDelivery'].sum()
        balls_bowled = player_data['bowler'].count() - player_data[(player_data['extra_type'] == 'wides') | player_data['extra_type'] == 'noballs'].shape[0]
        if total_wickets == 0:
            return "N/A"  # Avoid division by zero error
        else:
            return (balls_bowled / total_wickets)


    def wides_and_no_balls_per_over(self, bowler_name):
        bowler_data = self.dataframe[self.dataframe['bowler'] == bowler_name]
        total_wides_no_balls = bowler_data[bowler_data['extra_type']=='wides'].shape[0] + bowler_data[bowler_data['extra_type']=='noballs'].shape[0]
        total_balls_bowled = bowler_data['bowler'].count()  - bowler_data[(bowler_data['extra_type'] == 'wides') | bowler_data['extra_type'] == 'noballs'].shape[0]
        total_overs_bowled = total_balls_bowled/6

        if total_overs_bowled == 0:
            return "N/A"  # Avoid division by zero error
        else:
            return total_wides_no_balls / total_overs_bowled

    def byes_and_leg_byes_per_over(self, bowler_name):
        bowler_data = self.dataframe[self.dataframe['bowler'] == bowler_name]
        total_byes_leg_byes = bowler_data[bowler_data['extra_type']=='byes'].shape[0] + bowler_data[bowler_data['extra_type']=='legbyes'].shape[0]
        total_overs_bowled = (bowler_data['bowler'].shape[0] - bowler_data[bowler_data['extra_type'] == 'wides'].shape[0] - bowler_data[bowler_data['extra_type'] =='noballs'].shape[0])/6

        if total_overs_bowled == 0:
            return "N/A"  # Avoid division by zero error
        else:
            return total_byes_leg_byes / total_overs_bowled


    def bowling_probability_score(self, bowler_name):
        bowler_data = self.dataframe[self.dataframe['bowler'] == bowler_name]
        total_matches = bowler_data['ID'].nunique()
        total_overs_bowled = (bowler_data['bowler'].shape[0] - bowler_data[bowler_data['extra_type'] == 'wides'].shape[0] - bowler_data[bowler_data['extra_type'] =='noballs'].shape[0])/6

        if total_matches == 0:
            return 0
        else:
            return total_overs_bowled / total_matches

    def classify_bowlers(self):
        bowlers = self.dataframe['bowler'].unique()
        bowling_scores = [self.bowling_probability_score(bowler) for bowler in bowlers]
        quantiles = np.quantile(bowling_scores, [0.8, 0.6])

        classifications = {}
        for bowler, score in zip(bowlers, bowling_scores):
            if score >= quantiles[0]:
                classifications[bowler] = 'Frontline'
            elif score >= quantiles[1]:
                classifications[bowler] = 'Reliable'
            else:
                classifications[bowler] = 'Part-time'

        return classifications
    
    def get_bowling_classification(self, bowler_name):
        classifications = self.classify_bowlers()
        return classifications[bowler_name]
    

    def probability_bowling_overs(self, bowler_name):
        classifications = self.classify_bowlers()

        if bowler_name not in classifications:
            return {}

        classification = classifications[bowler_name]

        # Assign baseline probabilities based on bowler classification
        if classification == 'Frontline':
            probabilities = {4: 0.75, 3: 0.2, 2: 0.05, 1: 0}
        elif classification == 'Reliable':
            probabilities = {4: 0.5, 3: 0.25, 2: 0.2, 1: 0}
        else:
            # Less probability for part-time bowlers to bowl 3 or 4 overs
            probabilities = {4: 0.05, 3: 0.15, 2: 0.4, 1: 0.4}

        return probabilities

    def probability_bowling_overs2(self, bowler_name, innings_type):
        bowler_data = self.dataframe[self.dataframe['bowler'] == bowler_name]

        if innings_type == '1st':
            total_overs_available = 20 - bowler_data['overs'].sum()
        elif innings_type == '2nd':
            total_overs_available = max(20 - bowler_data['overs'].sum(), 0)
        else:
            return "Invalid innings type"

        if total_overs_available <= 0:
            return 0

        max_overs_to_bowl = min(total_overs_available, 4)  # A bowler can bowl a maximum of 4 overs in T20
        probability_distribution = [1 / max_overs_to_bowl] * max_overs_to_bowl

        return probability_distribution


    def player_venue_bowling_stats(self, player_name, venue):
        avg = self.bowling_average(player_name, venue)
        economy = self.economy_rate(player_name, venue)
        strike_rate = self.bowling_strike_rate(player_name, venue)
        return avg, economy, strike_rate

# Example usage:
# Initialize the class with the dataframe

bowling_stats = BowlingStatistics(ball_and_match_data_df)

classifytest = bowling_stats.classify_bowlers()
classifytest['V Kohli']



'Part-time'

In [330]:
class FieldingStatistics:
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def avg_catches_per_match(self, fielder_name):
        matches_with_fielder = self.dataframe[
            (self.dataframe['fielders_involved'].apply(lambda x: isinstance(x, list) and any(fielder_name.strip().lower() == p.strip().lower() for p in x if isinstance(p, str)))) |
            (self.dataframe['fielders_involved'].str.lower().str.contains(fielder_name.strip().lower(), na=False))
        ]
        total_catches = matches_with_fielder[(matches_with_fielder['wicket_type']== 'caught') | (matches_with_fielder['wicket_type'] == 'caught and bowled') & (matches_with_fielder['fielders_involved'] == fielder_name)].shape[0]
        total_matches = len(matches_with_fielder)
        if total_matches == 0:
            return "N/A"  # Avoid division by zero error
        else:
            return total_catches / total_matches

    def avg_stumpings_per_match(self, wicketkeeper_name):
        matches_with_keeper = self.dataframe[
            (self.dataframe['fielders_involved'].apply(lambda x: isinstance(x, list) and any(wicketkeeper_name.strip().lower() == p.strip().lower() for p in x if isinstance(p, str)))) |
            (self.dataframe['fielders_involved'].str.lower().str.contains(wicketkeeper_name.strip().lower(), na=False))
        ]
        total_stumpings = matches_with_keeper['wicket_type'].apply(lambda x: 1 if 'stumped' in str(x).lower() else 0).sum()
        total_matches = len(matches_with_keeper)
        if total_matches == 0:
            return "N/A"  # Avoid division by zero error
        else:
            return total_stumpings / total_matches

    def avg_run_outs_per_match(self, fielder_name):
        matches_with_fielder = self.dataframe[
            (self.dataframe['fielders_involved'].apply(lambda x: isinstance(x, list) and any(fielder_name.strip().lower() == p.strip().lower() for p in x if isinstance(p, str)))) |
            (self.dataframe['fielders_involved'].str.lower().str.contains(fielder_name.strip().lower(), na=False))
        ]
        total_run_outs = matches_with_fielder['wicket_type'].apply(lambda x: 1 if 'run out' in str(x).lower() else 0).sum()
        total_matches = len(matches_with_fielder)
        if total_matches == 0:
            return np.nan  # Avoid division by zero error
        else:
            return total_run_outs / total_matches


# Example usage:
# Initialize the class with the dataframe
fielding_stats = FieldingStatistics(ball_and_match_data_df)

# Compute average catches per match for a specific fielder
avg_catches = fielding_stats.avg_catches_per_match('V Kohli')

# Compute average run outs per match for a specific fielder
avg_run_outs = fielding_stats.avg_run_outs_per_match('RA Jadeja')

# Compute average stumpings per match for a specific wicketkeeper
avg_stumpings = fielding_stats.avg_stumpings_per_match('MS Bisla')

print(avg_catches, avg_run_outs, avg_stumpings)


0.8727272727272727 0.19811320754716982 0.22580645161290322


In [465]:
ball_and_match_data_df.columns

Index(['ID', 'City', 'Date', 'Season', 'MatchNumber', 'Team1', 'Team2',
       'Venue', 'TossWinner', 'TossDecision', 'SuperOver', 'WinningTeam',
       'WonBy', 'Margin', 'method', 'Player_of_Match', 'Team1Players',
       'Team2Players', 'Umpire1', 'Umpire2', 'innings_total', 'innings',
       'overs', 'ballnumber', 'batter', 'bowler', 'non-striker', 'extra_type',
       'batsman_run', 'extras_run', 'total_runs_ball', 'non_boundary',
       'isWicketDelivery', 'player_out', 'wicket_type', 'fielders_involved',
       'BattingTeam'],
      dtype='object')

In [485]:
class VenueStatistics:
    def __init__(self, df, venue, season_start, season_end):
        self.df = df
        self.venue = venue
        self.season_start = season_start
        self.season_end = season_end

    def average_score_at_venue(self):
        # Filter the dataframe for the specific venue and season range
        venue_df = self.df[(self.df['City'] == self.venue) & 
                           (self.df['Season'] >= self.season_start) & 
                           (self.df['Season'] <= self.season_end)]

        # Calculate the average score at the venue
        avg_score = venue_df['innings_total'].mean()

        return avg_score
    def innings_score_stdev(self):
        # Filter the dataframe for the specific venue and season range
        venue_df = self.df[(self.df['City'] == self.venue) & 
                           (self.df['Season'] >= self.season_start) & 
                           (self.df['Season'] <= self.season_end)]

        # Calculate the standard deviation of scores at the venue
        score_stdev = venue_df['innings_total'].std()

        return score_stdev

    def average_number_of_wickets(self):
        # Filter the dataframe for the specific venue and season range
        venue_df = self.df[(self.df['City'] == self.venue) & 
                           (self.df['Season'] >= self.season_start) & 
                           (self.df['Season'] <= self.season_end)]

        # Calculate the average number of wickets fallen at the venue
        avg_wickets = venue_df['isWicketDelivery'].sum() / venue_df['match_id'].nunique()

        return avg_wickets

In [487]:
venue_stats = VenueStatistics(ball_and_match_data_df, 'Bangalore')
avg_score = venue_stats.average_score_at_venue()
avg_wickets = venue_stats.average_number_of_wickets()
print(avg_score, avg_wickets)

158.10492682603802 11.592307692307692


In [None]:
import numpy as np

def assign_runs(teams, avg_score):
    batting_stats = BattingStatistics(ball_and_match_data_df)
    # Calculate the total batting average of all players
    total_batting_avg = batting_stats.batting_average

    # Calculate the proportion of each player's batting average to the total
    players_df['proportion'] = players_df['batting_average'] / total_batting_avg

    # Assign runs to each player based on their proportion of the total batting average
    players_df['runs'] = np.floor(players_df['proportion'] * avg_score)

    return players_df

def assign_wickets(players_df, avg_wickets):
    # Calculate the total bowling average of all players
    total_bowling_avg = players_df['bowling_average'].sum()

    # Calculate the proportion of each player's bowling average to the total
    players_df['proportion'] = players_df['bowling_average'] / total_bowling_avg

    # Assign wickets to each player based on their proportion of the total bowling average
    players_df['wickets'] = np.floor(players_df['proportion'] * avg_wickets)

    return players_df

In [365]:
import random

def assign_overs_innings1(playing_11, bowling_stats):
    # Shuffle the players
    random.shuffle(playing_11)


    num_players = len(playing_11)
    overs_assigned = np.zeros(num_players, dtype=int)
    remaining_overs = 20
    overs_dict = {player: [] for player in playing_11}  # Dictionary to store overs assigned to each bowler

    # Calculate the bowling probability score for each player
    scores = np.array([bowling_stats.bowling_probability_score(player) for player in playing_11])

    # Pre-calculate bowler classifications
    bowler_classifications = bowling_stats.classify_bowlers()

    last_bowler_idx = None

    while remaining_overs > 0:
        # Only consider players who haven't bowled 4 overs yet and didn't bowl the last over
        can_bowl = (overs_assigned < 4) & (np.arange(num_players) != last_bowler_idx)

        # If no players can bowl, break the loop
        if not np.any(can_bowl):
            break

        # Normalize the scores for the players who can bowl
        current_scores = scores.copy()
        current_scores[~can_bowl] = 0
        current_scores /= current_scores.sum()

        # Choose a player to bowl the next over based on their scores
        chosen_player_idx = np.random.choice(np.arange(num_players), p=current_scores)
        chosen_player = playing_11[chosen_player_idx]

        # Assign the over to the chosen player
        overs_assigned[chosen_player_idx] += 1
        remaining_overs -= 1

        # Classify the chosen player as a frontline, support, or part-time bowler
        bowler_classification = bowler_classifications[chosen_player]

        # Store the over number for the chosen player
        overs_dict[chosen_player].append(20 - remaining_overs)

        # Update the last bowler index
        last_bowler_idx = chosen_player_idx

    return overs_dict

playing_11 = ['PK Garg', 'Abhishek Sharma', 'RA Tripathi', 'AK Markram', 'N Pooran', 'Washington Sundar', 'R Shepherd', 'J Suchith', 'B Kumar', 'Umran Malik', 'Fazalhaq Farooqi']
bowling_stats = BowlingStatistics(ball_and_match_data_df)
overs_assigned = assign_overs_innings1(playing_11, bowling_stats)
print(overs_assigned)

{'AK Markram': [7, 19], 'R Shepherd': [1, 3, 16], 'B Kumar': [4, 12], 'Abhishek Sharma': [5], 'RA Tripathi': [], 'J Suchith': [2, 11, 15], 'Washington Sundar': [6, 8, 10, 18], 'N Pooran': [], 'Fazalhaq Farooqi': [9, 14, 17], 'PK Garg': [], 'Umran Malik': [13, 20]}


In [None]:
import matplotlib.pyplot as plt

# Initialize a dictionary to store the number of overs bowled by each player
overs_bowled = {player: [] for player in playing_11}

# Simulate 500 games
for _ in range(500):
    overs_assigned = assign_overs_innings1(playing_11, bowling_stats)
    for player, overs in overs_assigned.items():
        overs_bowled[player].append(overs)

# Plot the statistics
for player, overs in overs_bowled.items():
    plt.hist(overs, bins=range(6), alpha=0.5, label=player)

plt.xlabel('Number of overs bowled')
plt.ylabel('Frequency')
plt.legend(loc='upper right')
plt.show()

In [None]:
#Final working innings simulator

import random

def simulate_innings(batting_scores, bowling_scores, batting_order, bowling_order):
  """
  Simulates the fall of wickets in a 120-ball cricket innings, tracking balls faced.

  Args:
      batting_scores: A dictionary mapping batsmen names to their batting scores (pre-defined).
      bowling_scores: A dictionary mapping bowlers names to their bowling averages (pre-defined).
      batting_order: A list containing the batting order.

  Returns:
      A tuple containing:
          - wickets_fallen: A list of dismissed batsmen.
          - wicket_takers: A list of bowlers who took wickets (by name).
          - fielders: A list of fielders involved in dismissals (assuming not available for all).
          - overs_at_wicket_fall: A list of overs completed when each wicket fell.
          - balls_faced: A dictionary mapping batsmen to the number of balls faced.
          - bowler_overs: A dictionary tracking the number of overs bowled by each bowler.
          - all_out: A boolean indicating whether the team was all out.
  """

  # Extract bowling order from bowling order dictionary
  original_dict = bowling_order

  # Initialize an empty dictionary for the reverse dictionary
  bowler_overs_dict = {}

  # Iterate over the original dictionary
  for key, values in original_dict.items():
    # Iterate over the list of values
    for value in values:
      # Add the value as a key in the reverse dictionary with the original key as its value
      bowler_overs_dict[value] = key

  # Initialize an empty dictionary for the new dictionary
  bowler_overs_dict_mod = {}

  # Iterate over the original dictionary
  for key, value in bowler_overs_dict.items():
    # Subtract 1 from each key and add it to the new dictionary
    bowler_overs_dict_mod[key - 1] = value

  # Innings details
  wickets_fallen = []
  wicket_takers = []
  fielders = []
  overs = 0
  balls = 0
  balls_faced = {batsman: 0 for batsman in batting_order}
  overs_at_wicket_fall = []
  current_bowler = bowler_overs_dict_mod[overs]  # Keeps track of the bowler currently bowling
  bowlers_used = []  # Keeps track of bowlers who have bowled in the current over
  bowler_overs = {bowler: 0 for bowler in bowling_scores.keys()}  # Tracks overs bowled by each bowler
  dismissed_batsmen = []  # Track dismissed batsmen separately
  all_out = False  # Flag to track all-out

  batting_order_tmp = batting_order.copy()
  # Start with the opening batsman
  batsman_on_strike = batting_order[0]
  batsman_off_strike = batting_order[1]


  while balls < 120 and not all_out:  # Stop simulation on all-out

    # Random noise for batting scores (increased threshold)
    batting_score = batting_scores[batsman_on_strike] + random.gauss(0,30)
    bowling_score = bowling_scores[current_bowler]

    # Check for wicket
    if (batting_score <= bowling_score):  # Check dismissal only if not already out and overs < 20
      wicket_cond=True
      balls_faced[batsman_on_strike] += 1
      balls += 1
      overs = balls // 6 + (balls % 6) / 10  # Calculate overs bowled (e.g., 3.4 overs)
      overs_tracker = balls // 6
      [wickets_fallen.append(batsman_on_strike)]
      wicket_takers.append(current_bowler)  # Assuming bowler score refers to the bowler who bowled him (may need adjustment)
      fielders.append("N/A")  # Assuming fielder information not available for all dismissals
      overs_at_wicket_fall.append(overs)

      dismissed_batsmen.append(batsman_on_strike)  # Track dismissed batsmen separately
      if len(dismissed_batsmen) == 10:  # Check for all-out
        all_out = True
      else:
        # Remove the dismissed batsman from batting order (if it still exists)
        if batsman_on_strike in batting_order:
          batting_order.remove(batsman_on_strike)
          batsman_on_strike = batting_order[0]
        else:

          continue
    else:
      wicket_cond=False
      balls_faced[batsman_on_strike] += 1
      balls += 1
      overs = balls // 6 + (balls % 6) / 10  # Calculate overs bowled (e.g., 3.4 overs)
      overs_tracker = balls // 6
      if random.uniform(0,1) < 0.5:
        temp = batsman_on_strike
        batsman_on_strike = batsman_off_strike
        batsman_off_strike = temp
  

    # Switch strike after over completion (unless wicket taken on last ball)
    if balls % 6 == 0 and balls != 120:
      overs_tracker = balls // 6
      temp = batsman_on_strike
      batsman_on_strike = batsman_off_strike
      batsman_off_strike = temp

      # Change bowlers after each over (excluding the last over)
      bowlers_used = []
      current_bowler = bowler_overs_dict_mod[overs_tracker]  # Reset bowler for new over

    # Enforce bowler change after bowling 6 balls (unless last over or bowler can bowl another over)
    if current_bowler is not None and balls % 6 == 0 and balls != 120 and bowler_overs[current_bowler]:
      bowlers_used.append(current_bowler)
      bowler_overs[current_bowler] += 1  # Increment overs bowled by current bowler
    else:
      current_bowler = bowler_overs_dict_mod[overs_tracker]  # Exclude used bowlers in current over
      bowler_overs[current_bowler] = 0  # Reset overs for new bowler

  # Innings result
  if all_out:
    result = "all out"
  else:
    result = "not all out"

  return wickets_fallen, wicket_takers, fielders, overs_at_wicket_fall, balls_faced, all_out, result



# Example usage (more even scores for a full playing 11)
batting_team = ['RA Sharma', 'AB de Villiers', 'V Kohli', 'AK Markram', 'N Pooran', 'Washington Sundar', 'MS Dhoni', 'RA Jadeja', 'B Kumar', 'JJ Bumrah', 'Amit Mishra']
bowling_team = ['RV Uthappa', 'DA Warner', 'SK Raina', 'Yuvraj Singh', 'PP Shaw', 'RR Pant', 'AR Patel', 'R Powell', 'DS Kulkarni', 'Kuldeep Yadav', 'SN Thakur']

batting_scores = {batsman: fall_of_wickets._calculate_batting_score(batsman, fall_of_wickets.venue) for batsman in batting_team}
bowling_scores = {bowler: fall_of_wickets._calculate_bowling_score(bowler, fall_of_wickets.venue) for bowler in bowling_team}
batting_order = list(batting_scores.keys())

bowling_order = assign_overs_innings1(bowling_team, bowling_stats)

wickets, bowlers, fielders, overs_at_fall, balls_faced_by_batsman, did_all_out_occur, innings_result = simulate_innings(batting_scores, bowling_scores, batting_order, bowling_order)
bowler_overs = bowling_order

print("Wickets Fallen:", wickets)
print("Wicket Takers:", bowlers)  # Now contains bowling scores (could be bowler names if you modify the logic)
print("Overs at Wicket Fall:", overs_at_fall)
print("Balls Faced by Batsmen:", balls_faced_by_batsman)
print("Bowler Overs:", bowler_overs)
print("All Out:", did_all_out_occur)
print("Innings Result:", innings_result)

In [453]:
class FallofWickets1stinnings:
  """
  Simulates the fall of wickets in a cricket innings. (Modified to use innings_simulator)
  """

  def __init__(self, dataframe, batting_team, bowling_team, venue, bowling_order):
    self.dataframe = dataframe
    self.batting_stats = BattingStatistics(dataframe)
    self.bowling_stats = BowlingStatistics(dataframe)
    self.fielding_stats = FieldingStatistics(dataframe)
    self.batting_team = batting_team
    self.bowling_team = bowling_team
    self.venue = venue

  def predict_1st_innings(self):
    """
    Simulates the 1st innings using innings_simulator.
    """

    batting_order = self.batting_team.copy()  # Copy the batting order
    bowling_order = self.bowling_team.copy()  # Copy the bowling order

    # Calculate batting and bowling scores based on player statistics and venue
    batting_scores = {batsman: self._calculate_batting_score(batsman, self.venue) for batsman in batting_order}
    bowling_scores = {bowler: self._calculate_bowling_score(bowler, self.venue) for bowler in bowling_order}

    # Simulate the innings using innings_simulator
    wickets_fallen, wicket_takers, fielders, overs_at_wicket_fall, balls_faced_by_batsman, bowler_overs, did_all_out_occur, innings_result = simulate_innings(
        batting_scores, bowling_scores, batting_order, bowling_order)



    return wickets_fallen, fielders, overs_at_wicket_fall, wicket_takers, balls_faced_by_batsman, bowler_overs, did_all_out_occur, innings_result

  def _calculate_batting_score(self, batsman, venue):
    """
    Calculates a batting score based on player statistics and venue (replace with your logic).
    """
    batsman_avg, batsman_strike_rate, batsman_avg_balls_faced = self.batting_stats.batting_average(batsman, venue), self.batting_stats.batting_strike_rate(batsman, venue), self.batting_stats.average_balls_faced(batsman, venue)
    batsman_avg = 10 if batsman_avg == 'N/A' else batsman_avg  # Assign 0 if average not available
    batsman_strike_rate = 100 if batsman_strike_rate == 'N/A' else batsman_strike_rate  # Assign 100 if strike rate not available
    batsman_avg_balls_faced = 9 if batsman_avg_balls_faced == 'N/A' else batsman_avg_balls_faced  # Assign 20 if balls faced not available
    batting_score = (batsman_avg + batsman_strike_rate + batsman_avg_balls_faced)  # Add more factors as needed
    return batting_score

  def _calculate_bowling_score(self, bowler, venue):
    """
    Calculates a bowling score based on player statistics and venue (replace with your logic).
    """
    bowler_avg, bowler_economy, bowler_strike_rate = self.bowling_stats.bowling_average(bowler, venue), self.bowling_stats.economy_rate(bowler, venue), self.bowling_stats.bowling_strike_rate(bowler, venue)
    bowler_avg = np.random.normal(40, ) if bowler_avg == 'N/A' else bowler_avg  # Assign 40 if average not available
    bowler_economy = 9.5 if bowler_economy == 'N/A' else bowler_economy  # Assign 9.5 if economy rate not available
    bowler_strike_rate = 30 if bowler_strike_rate == 'N/A' else bowler_strike_rate  # Assign 30 if strike rate not available
    bowling_score = (1200 / (bowler_strike_rate + bowler_avg + bowler_economy))  # Adjust formula as needed
    return bowling_score

  # Existing methods with minor tweaks (if necessary)
  def predict_dismissal_type(self, bowler, batsman):
    interactions = self.dataframe[(self.dataframe['batter'] == batsman) & (self.dataframe['bowler'] == bowler) & (self.dataframe['isWicketDelivery'] == 1)]
    if interactions.empty:
        # If there are no historical interactions between the batsman and bowler, use default probabilities
        default_probabilities = {'caught': 0.6, 'bowled': 0.17, 'run-out': 0.1, 'stumped': 0.03, 'caught_and_bowled': 0.03, 'lbw': 0.07}
        dismissal_type = self.pick_most_probable_dismissal(default_probabilities)
        fielder_involved = np.nan
        return dismissal_type, fielder_involved

    dismissal_counts = interactions['wicket_type'].value_counts(normalize=True)
    dismissal_probabilities = dismissal_counts.to_dict()

    # Define default probabilities
    default_probabilities = {'caught': 0.6, 'bowled': 0.17, 'run-out': 0.1, 'stumped': 0.03, 'caught_and_bowled': 0.03, 'lbw': 0.07}

    # Check if any of the dismissal types have 'N/A' probabilities
    if any(value == 'N/A' for value in dismissal_probabilities.values()):
        # If any dismissal type has 'N/A' probability, return default probabilities
        dismissal_type = self.pick_most_probable_dismissal(default_probabilities)
        fielder_involved = self.assign_fielder_involved(dismissal_type, bowler)  # Corrected parameter from batsman to bowler
        return dismissal_type, fielder_involved

    # Handle 'N/A' cases
    for dismissal_type in default_probabilities:
        if dismissal_type not in dismissal_probabilities:
            dismissal_probabilities[dismissal_type] = default_probabilities[dismissal_type]

    dismissal_type = self.pick_most_probable_dismissal(dismissal_probabilities)
    fielder_involved = self.assign_fielder_involved(dismissal_type, bowler)  # Corrected parameter from batsman to bowler
        
    return dismissal_type, fielder_involved    
    

  def pick_most_probable_dismissal(self, dismissal_probabilities):
    sorted_probabilities = sorted(dismissal_probabilities.items(), key=lambda x: x[1], reverse=True)
    return sorted_probabilities[0][0]

  def assign_fielder_involved(self, dismissal_type, bowler):  # Corrected parameter from batsman to bowler
    if dismissal_type in ['bowled', 'lbw', 'stumped']:
      return np.nan

    if dismissal_type == 'run-out':
        # Get the average run-outs per match for each player
      avg_run_outs = {player: self.fielding_stats.avg_run_outs_per_match(player) for player in self.bowling_team}
      # Sort the players by average run-outs in descending order and get the top 4
      fielders = sorted(avg_run_outs, key=avg_run_outs.get, reverse=True)[:4]
    else:
      # Get the average catches per match for each player
      avg_catches = {player: self.fielding_stats.avg_catches_per_match(player) for player in self.bowling_team}
      # Sort the players by average catches in descending order and get the top 4
      fielders = sorted(avg_catches, key=avg_catches.get, reverse=True)[:4]

      # Probabilistically determine the fielder involved
    fielder_involved = random.choice(fielders)

    return fielder_involved