In [10]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


#**Top 20 Performers & Top 20 Performers by Venue**


---


## **Terminology used:**



> **Total Runs:** The total number of runs scored by a player. In cricket, a run is the basic unit of scoring, representing a single point.

> **Balls Faced:** The total number of deliveries (pitches) a batsman has faced.It indicates the amount of time a player has been at the crease, facing bowlers.

>**Times Out:** The number of times a batsman has been dismissed or got out. In cricket, a player's innings ends when they are out.

>**Most Common Dismissal:** The most frequent manner in which a batsman gets out (e.g., caught, bowled, lbw). It shows a batsman's vulnerability or common mode of dismissal.

>**Batting Average:** A statistical measure representing a player's average number of runs scored per dismissal. It's calculated by dividing the total runs by the number of times out. A higher average indicates better performance.

>**Strike Rate:** A measure of how quickly a batsman scores, expressed as runs per 100 balls faced. It indicates the aggressiveness or efficiency of a batsman.

>**Not Out:** A term indicating that a batsman has not been dismissed by the end of their team's innings.

>**Retired Hurt:** When a batsman leaves the field due to injury without being dismissed and does not return to bat in that innings.

>**Absent Hurt:** Used when a player is unable to bat due to an injury sustained either before the match or during the match but not on the field.

>**Dismissal Type:** The method by which a batsman is gotten out (e.g., caught, bowled, lbw, run out). It provides insight into how players most commonly lose their wicket.



In [11]:
import pandas as pd
import plotly.express as px
import numpy as np

df = pd.read_csv('/content/drive/My Drive/IPL 2008 to 2023/all_season_batting_card2.csv')
# Calculate times out (excluding 'Not Out', 'retired hurt', 'absent hurt' for batting average calculation)
df['is_out'] = df['dismissal_type'].notnull() & ~(df['dismissal_type'].isin(['not out', 'retired hurt', 'absent hurt']))

# Custom function to determine the most common dismissal type
def most_common_dismissal(x):
    if len(x) == 0:
        return 'not out'
    return x.mode().iloc[0]

# Aggregating the most common dismissal type along with the other metrics
agg_data = df.groupby(['venue', 'fullName']).agg(
    total_runs=('runs', 'sum'),
    total_balls_faced=('ballsFaced', 'sum'),
    times_out=('is_out', 'sum'),
    most_common_dismissal=('dismissal_type', most_common_dismissal)  # New aggregation for dismissal type
).reset_index()

# Calculate batting average and handle division by zero
agg_data['batting_average'] = agg_data['total_runs'] / agg_data['times_out']
agg_data['batting_average'].replace([np.inf, -np.inf], 0, inplace=True)

# Calculate strike rate
agg_data['strike_rate'] = (agg_data['total_runs'] / agg_data['total_balls_faced']) * 100

# Sort by runs, strike rate, and batting average in descending order
top_performers_overall = agg_data.sort_values(by=['total_runs', 'strike_rate', 'batting_average'], ascending=False).head(20)

# Visualization
# Bar Plot: Total Runs and Batting Average for Selected Performers
fig_bar_bat_avg = px.bar(top_performers_overall, x='venue', y='total_runs', color='fullName',
                         title="Total Runs by Top 20 Performers Across Venues",
                         hover_data=['fullName', 'total_runs', 'strike_rate', 'batting_average', 'most_common_dismissal'],
                         labels={'total_runs': 'Total Runs', 'fullName': 'Player Name', 'strike_rate': 'Strike Rate', 'batting_average': 'Batting Average', 'most_common_dismissal': 'Most Common Dismissal'})

fig_bar_bat_avg.show()

agg_data2 = df.groupby(['fullName']).agg(
    total_runs=('runs', 'sum'),
    total_balls_faced=('ballsFaced', 'sum'),
    times_out=('is_out', 'sum'),
    most_common_dismissal=('dismissal_type', most_common_dismissal)  # New aggregation for dismissal type
).reset_index()

# Calculate batting average and handle division by zero
agg_data2['batting_average'] = agg_data2['total_runs'] / agg_data2['times_out']
agg_data2['batting_average'].replace([np.inf, -np.inf], 0, inplace=True)

# Calculate strike rate
agg_data2['strike_rate'] = (agg_data2['total_runs'] / agg_data2['total_balls_faced']) * 100

# Sort by runs, strike rate, and batting average in descending order
top_performers_overall = agg_data2.sort_values(by=['total_runs', 'strike_rate', 'batting_average'], ascending=False).head(20)
# Scatter Plot: Strike Rate vs. Batting Average with Total Runs
fig_scatter_bat_avg = px.scatter(top_performers_overall, x='batting_average', y='strike_rate', color='fullName',
                                 size='total_runs', hover_name='fullName',
                                 hover_data=['total_runs', 'strike_rate', 'batting_average', 'most_common_dismissal'],
                                 title="Strike Rate vs. Batting Average with Total Runs",
                                 labels={'total_runs':'Total Runs', 'fullName':'Player Name', 'strike_rate':'Strike Rate', 'batting_average':'Batting Average', 'most_common_dismissal':'Most Common Dismissal'})

fig_scatter_bat_avg.show()


# **Strike Rate by Phase and Innings**
---
## **Terminology Used**

> **Innings:** In cricket, an innings is the period of play during which one team takes its turn to bat. Multiple innings can occur in a single match depending on the format.

>**Striker:** The batsman who is currently facing the bowler's delivery.

>**Phase:** A division of an innings in limited-overs cricket (like T20 or One Day Internationals) based on overs, commonly referred to as Powerplay (initial overs with field restrictions), Middle (middle overs), and Death (final overs).

>**Powerplay(Overs 1-6):** The initial set of overs in a limited-overs match where only two fielders are allowed outside the 30-yard circle, aimed at encouraging aggressive batting.

>**Middle Overs(Overs 7-15):** The phase of the game that comes after the powerplay and before the death overs, usually characterized by consolidation and building the innings.

>**Death Overs(Overs 16-20):** The final overs of an innings in limited-overs cricket where teams often aim to score rapidly.

>**Super Over:** A tiebreaker method used in limited-overs cricket matches where each team bats for one additional over to decide the winner in case of a tie.

>**Runs Off Bat:** Runs scored directly by the batsman hitting the ball, excluding extras like wides or no-balls.

>**Balls Faced:** The number of legal deliveries faced by a batsman.

>**Strike Rate:** A measure of how quickly a batsman scores, calculated as runs scored per 100 balls faced. It's an important metric in assessing a player's aggressiveness and efficiency.

>**Total Runs:** The sum of runs scored by a batsman or a team.

>**Balls Faced:** Refers to the total number of deliveries a batsman has faced.

In [13]:
import pandas as pd
import plotly.express as px
from ipywidgets import widgets, VBox
from IPython.display import display, clear_output
from google.colab import output
output.enable_custom_widget_manager()
# Load the dataset
df = pd.read_csv('/content/drive/My Drive/IPL 2008 to 2023/match_data2.csv')

# Introduce a new column for innings type (First and Second)
df['innings_type'] = df['innings'].apply(lambda x: 'First' if x in [1, 3, 5] else 'Second')

# Dropdown widget for player selection
players = ['All Players'] + sorted(df['striker'].unique().tolist())
player_dropdown = widgets.Dropdown(
    options=players,
    value='All Players',  # Default value
    description='Player:',
)

def update_chart(player):
    if player != 'All Players':
        filtered_df = df[df['striker'] == player]
    else:
        filtered_df = df

    # Aggregate runs and balls faced by phase and innings for the selected player or all players
    agg_data = filtered_df.groupby(['Phase', 'innings_type']).agg(
        total_runs=('runs_off_bat', 'sum'),
        total_balls_faced=('balls_faced', 'sum')
    ).reset_index()

    # Calculate strike rate
    agg_data['strike_rate'] = (agg_data['total_runs'] / agg_data['total_balls_faced']) * 100

    # Visualization: Strike Rate by Phase and Innings
    fig = px.bar(agg_data, x='Phase', y='strike_rate', color='innings_type', text='strike_rate', barmode='group',
                 title=f"Strike Rate by Phase and Innings for {player}",
                 labels={'strike_rate': 'Strike Rate', 'Phase': 'Phase', 'innings_type': 'Innings Type'},
                 category_orders={"Phase": ["Powerplay", "Middle", "Death", "Super Over"], "innings_type": ["First", "Second"]})
    fig.update_traces(texttemplate='%{text:.2f}', textposition='inside')
    fig.update_layout(xaxis_title="Phase", yaxis_title="Strike Rate", plot_bgcolor="white",
                      legend_title="Innings Type")


    clear_output(wait=True)
    display(player_dropdown)
    fig.show()


update_chart('All Players')

def on_player_change(change):
    update_chart(change.new)

player_dropdown.observe(on_player_change, names='value')


Dropdown(description='Player:', options=('All Players', 'A Ashish Reddy', 'A Badoni', 'A Chandila', 'A Chopra'…

# **Dot Ball Percentage By Phase**
---
## **Terminology Used**

> **Dot Ball:** A delivery bowled by a bowler that the batsman does not score any run off. It's called a "dot" because it's marked with a dot in the scorebook.

>**Dot Ball Percentage:** A statistic that measures the percentage of dot balls faced by a batsman or delivered by a bowler, indicating how often no runs are scored off a delivery.

In [None]:
import pandas as pd
import plotly.express as px
from ipywidgets import widgets, VBox
from IPython.display import display, clear_output

# Load the dataset
df = pd.read_csv('/content/drive/My Drive/IPL 2008 to 2023/match_data2.csv')


df['dot_ball'] = df['dots'] == 1  # create a column for dot balls

# Dropdown widget for player selection
players = ['All Players'] + sorted(df['striker'].unique().tolist())
player_dropdown = widgets.Dropdown(
    options=players,
    value='All Players',  # Default value
    description='Player:',
)

def calculate_dot_percentage(filtered_df):
    # Group by phase and calculate total dots and balls faced
    phase_stats = filtered_df.groupby('Phase').agg(
        total_dots=('dot_ball', 'sum'),
        total_balls_faced=('balls_faced', 'sum')
    ).reset_index()

    # Calculate dot percentage
    phase_stats['dot_percentage'] = (phase_stats['total_dots'] / phase_stats['total_balls_faced']) * 100

    return phase_stats

# Function to update the chart based on the selected player
def update_chart(change):
    selected_player = change['new']
    filtered_df = df.copy()
    if selected_player != 'All Players':
        filtered_df = filtered_df[filtered_df['striker'] == selected_player]

    dot_percentage_by_phase = calculate_dot_percentage(filtered_df)

    # Create a new Donut Chart based on the dot percentage by phase
    fig = px.pie(dot_percentage_by_phase, names='Phase', values='dot_percentage',
                 title=f'Dot Ball Percentage by Phase for {selected_player}', hole=0.4)
    fig.update_traces(textinfo='percent+label')
    fig.update_layout(legend_title_text='Phase')

    # Clear the previous output and display the updated chart
    clear_output(wait=True)
    display(player_dropdown)
    fig.show()

# Set up the dropdown to call update_chart on value change
player_dropdown.observe(update_chart, names='value')

display(player_dropdown)

update_chart({'new': 'All Players'})  # Call update_chart with the default value


Dropdown(description='Player:', options=('All Players', 'A Ashish Reddy', 'A Badoni', 'A Chandila', 'A Chopra'…

# **Dismissals By Bowling Style**
---
## **Terminology Used**

> **Dismissal Type:** The method by which a batsman is declared out, such as "caught", "bowled", "lbw" (leg before wicket), etc. Different types reflect the various ways a batsman can be dismissed according to cricket laws.

>**Bowling Style:** Refers to the method or technique used by a bowler to deliver the ball. Common styles include "fast", "spin", "leg-spin", "off-spin", etc.

>**Number of Outs:** Refers to the count of dismissals attributed to a bowler or the number of times a batsman has been dismissed in a specific manner.

Support for third party widgets will remain active for the duration of the session. To disable support:

In [165]:
import pandas as pd
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display, clear_output
from google.colab import output
output.enable_custom_widget_manager()

#Load the dataset
df = pd.read_csv('/content/drive/My Drive/IPL 2008 to 2023/all_season_batting_card2.csv')

def draw_figure(player):
    # Filter the dataframe based on the player selection
    if player == 'All Players':
        filtered_df = df[~df['dismissal_type'].isin(['not out', 'absent hurt', 'run out', 'retired hurt', 'retired out', 'retired not out', 'obstructing the field'])] # filtering these dismissal types because they do not count for wickets for bowlers
    else:
        filtered_df = df[(df['fullName'] == player) & ~df['dismissal_type'].isin(['not out', 'absent hurt', 'run out', 'retired hurt', 'retired out', 'retired not out', 'obstructing the field'])]

    # Aggregate dismissals by bowling style
    outs_by_bowling_style = filtered_df.groupby('Bowling Style')['dismissal_type'].count().reset_index()
    outs_by_bowling_style.rename(columns={'dismissal_type': 'Number of Outs'}, inplace=True)
    outs_by_bowling_style.sort_values(by='Number of Outs', ascending=False, inplace=True)  # Sorting

    # Create a bar chart
    fig = px.bar(outs_by_bowling_style, x='Bowling Style', y='Number of Outs',
                 title="Dismissals by Bowling Style",
                 labels={'Number of Outs': 'Number of Outs', 'Bowling Style': 'Bowling Style'},
                 color='Number of Outs', color_continuous_scale=px.colors.sequential.Viridis)
    fig.update_layout(xaxis_title="Bowling Style", yaxis_title="Number of Outs", plot_bgcolor="white")

    # Display the figure
    fig.show()

# Dropdown widget for player selection
players = ['All Players'] + sorted(df['fullName'].unique().tolist())
player_dropdown = widgets.Dropdown(options=players, value='All Players', description='Player:')

# Function to handle player selection change
def on_player_change(change):
    clear_output(wait=True)
    display(player_dropdown)
    draw_figure(change.new)  # Draw the figure for the selected player

player_dropdown.observe(on_player_change, names='value')


display(player_dropdown)
draw_figure('All Players')  # Draw the figure for "All Players" by default


Dropdown(description='Player:', options=('All Players', 'AB de Villiers', 'Aakash Chopra', 'Aaron Finch', 'Abd…

# **Player Performance Against Each Team**
---
## **Terminology Used**

>**Wicket Type:** Refers to the method by which a batsman is dismissed or gets out. Common types include caught, bowled, leg before wicket (LBW), and run out.

>**Bowling Team:** The team that is currently bowling and fielding, as opposed to the team that is batting.

In [14]:
import pandas as pd
import plotly.express as px
from ipywidgets import Dropdown, Output, VBox
from IPython.display import display, clear_output
from google.colab import output
output.enable_custom_widget_manager()
# Load the dataset
df = pd.read_csv('/content/drive/My Drive/IPL 2008 to 2023/match_data2.csv')

# Calculate additional metrics
df['is_dismissed'] = df['wicket_type'].notnull() & ~(df['wicket_type'].isin(['not out', 'retired hurt', 'absent hurt']))
agg_data = df.groupby(['striker', 'bowling_team'], as_index=False).agg(
    TotalRuns=('runs_off_bat', 'sum'),
    BallsFaced=('balls_faced', 'sum'),
    dismissals=('is_dismissed', 'sum')
)
agg_data['BattingAverage'] = agg_data['TotalRuns'] / agg_data['dismissals'].replace({0: np.nan})
agg_data['StrikeRate'] = (agg_data['TotalRuns'] / agg_data['BallsFaced']) * 100


batsmen = ['All Players'] + sorted(agg_data['striker'].unique())
batsman_dropdown = Dropdown(options=batsmen, description='Batsman:', value='All Players')

metric_names = {
    'Total Runs': 'TotalRuns',
    'Strike Rate': 'StrikeRate',
    'Batting Average': 'BattingAverage',
    'Balls Faced': 'BallsFaced'
}

# Dropdown for selecting the value metric for the treemap, using readable names
value_metric_dropdown = Dropdown(options=list(metric_names.keys()), description='Value Metric:', value='Total Runs')

plot_placeholder = Output()

def display_treemap(batsman, value_metric):
    with plot_placeholder:
        clear_output(wait=True)

        metric_key = metric_names[value_metric]

        if batsman == 'All Players':
            filtered_data = agg_data.groupby('bowling_team', as_index=False).agg(
                TotalRuns=('TotalRuns', 'sum'),
                BallsFaced=('BallsFaced', 'sum'),
                dismissals=('dismissals', 'sum')
            )
            filtered_data['BattingAverage'] = filtered_data['TotalRuns'] / filtered_data['dismissals'].replace({0: np.nan})
            filtered_data['StrikeRate'] = (filtered_data['TotalRuns'] / filtered_data['BallsFaced']) * 100
        else:
            filtered_data = agg_data[agg_data['striker'] == batsman]

        # Creating a Treemap
        fig = px.treemap(filtered_data, path=['bowling_team'], values=metric_key,
                         color=metric_key, hover_data=['BattingAverage', 'StrikeRate', 'TotalRuns', 'BallsFaced'],
                         color_continuous_scale='RdYlGn', title=f"{batsman}'s Performance Against Each Team")
        fig.update_layout(margin=dict(t=50, l=25, r=25, b=25), height=600, width=1400)
        fig.show()

def on_batsman_change(change):
    display_treemap(change.new, value_metric_dropdown.value)

def on_value_metric_change(change):
    display_treemap(batsman_dropdown.value, change.new)

batsman_dropdown.observe(on_batsman_change, names='value')
value_metric_dropdown.observe(on_value_metric_change, names='value')

# Display widgets
display(VBox([batsman_dropdown, value_metric_dropdown, plot_placeholder]))

# Initialize with "All Players" and "Total Runs"
display_treemap('All Players', 'Total Runs')


VBox(children=(Dropdown(description='Batsman:', options=('All Players', 'A Ashish Reddy', 'A Badoni', 'A Chand…

# **Performance Home and Away**
---
## **Terminology Used**

>**Home/Away:** "Home" refers to matches played at a team's home venue, while "Away" indicates matches played at the opponent's venue.

>**Fours:** Shots that result in four runs without the ball leaving the field.

>**Sixes:** Shots that result in six runs, typically when the ball is hit over the boundary without touching the ground.

In [15]:
import pandas as pd
import plotly.graph_objects as go
from ipywidgets import Dropdown, VBox, Output
from IPython.display import display, clear_output
from plotly.subplots import make_subplots
from google.colab import output
output.enable_custom_widget_manager()
# Load the Dataset
df = pd.read_csv('/content/drive/My Drive/IPL 2008 to 2023/all_season_batting_card2.csv')

# Data preparation
df['is_out'] = df['dismissal_type'].notnull() & ~df['dismissal_type'].isin(['not out', 'retired hurt', 'absent hurt'])
df['match_setting'] = df['home'].apply(lambda x: 'Home' if x == 1 else 'Away')

# Aggregating performance metrics
performance_agg = df.groupby(['fullName', 'match_setting'], as_index=False).agg(
    total_runs=('runs', 'sum'),
    total_balls_faced=('ballsFaced', 'sum'),
    times_out=('is_out', 'sum'),
    fours=('fours', 'sum'),
    sixes=('sixes', 'sum')
)

# Calculating batting average and strike rate
performance_agg['batting_average'] = performance_agg['total_runs'] / performance_agg['times_out']
performance_agg['strike_rate'] = (performance_agg['total_runs'] / performance_agg['total_balls_faced']) * 100
performance_agg.fillna(0, inplace=True)  # Handle NaN values


players = ['All Players'] + sorted(df['fullName'].unique())
player_dropdown = Dropdown(options=players, description='Player:', value='All Players')

plot_output = Output()

def plot_performance(player_name):
    if player_name == 'All Players':
        player_data = performance_agg.groupby('match_setting', as_index=False).agg(
            total_runs=('total_runs', 'sum'),
            total_balls_faced=('total_balls_faced', 'sum'),
            times_out=('times_out', 'sum'),
            fours=('fours', 'sum'),
            sixes=('sixes', 'sum'),

        )
        player_data['batting_average'] = player_data['total_runs'] / player_data['times_out'].replace({0: np.nan})
        player_data['strike_rate'] = (player_data['total_runs'] / player_data['total_balls_faced']) * 100
    else:
        player_data = performance_agg[performance_agg['fullName'] == player_name]

    # Create subplots
    fig = make_subplots(rows=3, cols=2, subplot_titles=('Runs', 'Batting Average', 'Strike Rate', 'Sixes', 'Fours', ''),
                        specs=[[{}, {}], [{}, {}], [{"colspan": 2}, None]],
                        horizontal_spacing=0.1, vertical_spacing=0.1)

    metrics = ['total_runs', 'batting_average', 'strike_rate', 'sixes', 'fours']
    positions = [(1, 1), (1, 2), (2, 1), (2, 2), (3, 1)]
    colors = ['lightskyblue', 'mediumseagreen', 'tomato', 'gold', 'lightpink']

    for metric, pos, color in zip(metrics, positions, colors):
        fig.add_trace(
            go.Bar(x=player_data['match_setting'], y=player_data[metric], name=metric.capitalize(), marker_color=color),
            row=pos[0], col=pos[1]
        )
    #update layout
    fig.update_layout(
        height=800, width=800,
        title_text="Performance Home and Away: {}".format('All Players' if player_name == 'All Players' else player_name),
        showlegend=False
    )
    fig.update_traces(marker_line_color='black', marker_line_width=1.5)

    with plot_output:
        clear_output(wait=True)
        fig.show()

player_dropdown.observe(lambda change: plot_performance(change.new), names='value')

# Display the widget and output
display(VBox([player_dropdown, plot_output]))

# Initial plot
plot_performance('All Players')


VBox(children=(Dropdown(description='Player:', options=('All Players', 'AB de Villiers', 'Aakash Chopra', 'Aar…