In [1]:
import pandas as pd
import numpy as np
from bayes_opt import BayesianOptimization
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

# Set dark mode
plt.rcParams.update(
    {
        "font.size": 12,
        "figure.figsize": (5, 5),
        "figure.dpi": 300,
        "font.family": "Courier New, monospace",
        "axes.facecolor": "#282828",
        "axes.edgecolor": "#282828",
    }
)


from src.plotting import plot_elo_ratings
from src.elo import calculate_elo_ratings
from src.constants import TEAM_COLORS, DEFUNCT_TEAMS

In [2]:
matches = pd.read_csv("./data/IPL_Matches_2008_2022.csv")
matches["Date"] = pd.to_datetime(matches["Date"])
matches = matches.sort_values(by="Date", ignore_index=True)

# Deccan Chargers were sold and renamed to Sunrisers Hyderabad
# Delhi Daredevils were renamed to Delhi Capitals
# Rising Pune Supergiants were renamed to Rising Pune Supergiant
# Kings XI Punjab were renamed to Punjab Kings
team_renames = {
    "Deccan Chargers": "Sunrisers Hyderabad",
    "Delhi Daredevils": "Delhi Capitals",
    "Rising Pune Supergiants": "Rising Pune Supergiant",
    "Kings XI Punjab": "Punjab Kings",
}
team_name_cols = ["Team1", "Team2", "TossWinner", "WinningTeam"]
for col in team_name_cols:
    for old_name, new_name in team_renames.items():
        matches[col] = matches[col].replace(old_name, new_name)
matches["LosingTeam"] = matches.apply(
    lambda row: row["Team2"] if row["Team1"] == row["WinningTeam"] else row["Team1"],
    axis=1,
)

In [3]:
matches.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 950 entries, 0 to 949
Data columns (total 21 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   ID               950 non-null    int64         
 1   City             899 non-null    object        
 2   Date             950 non-null    datetime64[ns]
 3   Season           950 non-null    object        
 4   MatchNumber      950 non-null    object        
 5   Team1            950 non-null    object        
 6   Team2            950 non-null    object        
 7   Venue            950 non-null    object        
 8   TossWinner       950 non-null    object        
 9   TossDecision     950 non-null    object        
 10  SuperOver        946 non-null    object        
 11  WinningTeam      946 non-null    object        
 12  WonBy            950 non-null    object        
 13  Margin           932 non-null    float64       
 14  method           19 non-null     object   

In [4]:
all_teams = matches["Team1"].unique()
all_teams

array(['Royal Challengers Bangalore', 'Punjab Kings', 'Delhi Capitals',
       'Mumbai Indians', 'Kolkata Knight Riders', 'Rajasthan Royals',
       'Sunrisers Hyderabad', 'Chennai Super Kings',
       'Kochi Tuskers Kerala', 'Pune Warriors', 'Gujarat Lions',
       'Rising Pune Supergiant', 'Lucknow Super Giants', 'Gujarat Titans'],
      dtype=object)

In [5]:
# Find all seasons where a team played in the IPL
team_seasons = {}
for team in all_teams:
    team_seasons[team] = (
        matches.loc[(matches.Team1 == team) | (matches.Team2 == team)]
        .Season.unique()
        .tolist()
    )

In [6]:
teams_in_season = matches.groupby("Season").agg({"Team1": set}).Team1.apply(list)
dates_in_season = matches.groupby("Season").agg({"Date": set}).Date.apply(list)

## Optimize K-, home advantage, and toss advantage factors

In [7]:
def black_box_function(K, home_advantage, toss_advantage):
    elo_df = calculate_elo_ratings(
        matches,
        K=K,
        home_advantage=home_advantage,
        toss_advantage=toss_advantage,
    )
    return -(
        (elo_df["Team1_expected"] - elo_df["Result"]) ** 2
        + (elo_df["Team2_expected"] - (1 - elo_df["Result"])) ** 2
    ).mean()

In [8]:
pbounds = {"K": (0, 100), "home_advantage": (0, 100), "toss_advantage": (0, 100)}

In [9]:
optimizer = BayesianOptimization(
    f=black_box_function,
    pbounds=pbounds,
    verbose=1,
)

In [10]:
optimizer.maximize(
    init_points=10,
    n_iter=200,
)

|   iter    |  target   |     K     | home_a... | toss_a... |
-------------------------------------------------------------
| [95m12       [0m | [95m-0.5     [0m | [95m0.0      [0m | [95m4.29e-15 [0m | [95m0.0      [0m |
| [95m15       [0m | [95m-0.4996  [0m | [95m0.0      [0m | [95m13.08    [0m | [95m19.51    [0m |
| [95m16       [0m | [95m-0.499   [0m | [95m12.51    [0m | [95m10.87    [0m | [95m0.0      [0m |
| [95m18       [0m | [95m-0.4976  [0m | [95m8.016    [0m | [95m6.426    [0m | [95m10.51    [0m |
| [95m26       [0m | [95m-0.4976  [0m | [95m7.107    [0m | [95m8.497    [0m | [95m11.66    [0m |
| [95m27       [0m | [95m-0.4976  [0m | [95m6.666    [0m | [95m6.046    [0m | [95m12.47    [0m |
| [95m28       [0m | [95m-0.4975  [0m | [95m7.021    [0m | [95m6.291    [0m | [95m12.21    [0m |
| [95m29       [0m | [95m-0.4975  [0m | [95m6.819    [0m | [95m6.26     [0m | [95m12.17    [0m |
| [95m44       [0m

In [11]:
print(optimizer.max["params"])

{'K': 6.872074934631704, 'home_advantage': 5.4615835376601, 'toss_advantage': 11.168179637635987}


## Use the K and home advantage factors


In [12]:
elo_df = calculate_elo_ratings(
    matches,
    **optimizer.max["params"],
)

elo_df.head(2)

Unnamed: 0,ID,City,Date,Season,MatchNumber,Team1,Team2,Venue,TossWinner,TossDecision,...,Umpire1,Umpire2,LosingTeam,Team1_elo_before,Team2_elo_before,Team1_expected,Team2_expected,Result,Team1_elo_after,Team2_elo_after
0,335982,Bangalore,2008-04-18,2007/08,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,...,Asad Rauf,RE Koertzen,Royal Challengers Bangalore,1500.0,1500.0,0.523914,0.476086,0,1496.399624,1503.600376
1,335983,Chandigarh,2008-04-19,2007/08,2,Punjab Kings,Chennai Super Kings,"Punjab Cricket Association Stadium, Mohali",Chennai Super Kings,bat,...,MR Benson,SL Shastri,Punjab Kings,1500.0,1500.0,0.491788,0.508212,0,1496.620394,1503.379606


In [13]:
team_elo = elo_df.melt(
    id_vars=[
        "ID",
        "Season",
        "Date",
        "Team1",
        "Team2",
    ],
    value_vars=["Team1_elo_after", "Team2_elo_after"],
    var_name="Team",
    value_name="Elo",
)
team_elo["Team"] = team_elo.apply(lambda row: row[row["Team"].split("_")[0]], axis=1)
team_elo = team_elo.sort_values(by=["Date", "ID"], ignore_index=True)
team_elo = team_elo.drop(columns=["Team1", "Team2"])
team_elo.head(2)

Unnamed: 0,ID,Season,Date,Team,Elo
0,335982,2007/08,2008-04-18,Royal Challengers Bangalore,1496.399624
1,335982,2007/08,2008-04-18,Kolkata Knight Riders,1503.600376


In [14]:
fig = plot_elo_ratings(team_elo, False)
fig.update_layout()
fig.show()

### Lowest all time elo


In [15]:
lowest_elo = elo_df[["Team1_elo_after", "Team2_elo_after"]].min().min()
lowest_elo_match = elo_df.loc[
    (elo_df.Team1_elo_after == lowest_elo) | (elo_df.Team2_elo_after == lowest_elo)
].iloc[0]
lowest_elo_team = (
    lowest_elo_match.Team1
    if lowest_elo_match.Team1_elo_after == lowest_elo
    else lowest_elo_match.Team2
)
other_team = (
    lowest_elo_match.Team1
    if lowest_elo_match.Team1_elo_after != lowest_elo
    else lowest_elo_match.Team2
)
print(
    f"On {lowest_elo_match.Date.date()}, {lowest_elo_team} lost to {other_team} by {lowest_elo_match.Margin:.0f} {lowest_elo_match.WonBy.lower()} to reach an all time ELO low of {lowest_elo:.2f}."
)

On 2013-05-11, Pune Warriors lost to Mumbai Indians by 5 wickets to reach an all time ELO low of 1448.27.


### Highest all time elo


In [16]:
highest_elo = elo_df[["Team1_elo_after", "Team2_elo_after"]].max().max()
highest_elo_match = elo_df.loc[
    (elo_df.Team1_elo_after == highest_elo) | (elo_df.Team2_elo_after == highest_elo)
].iloc[0]
highest_elo_team = (
    highest_elo_match.Team1
    if highest_elo_match.Team1_elo_after == highest_elo
    else highest_elo_match.Team2
)
other_team = (
    highest_elo_match.Team1
    if highest_elo_match.Team1_elo_after != highest_elo
    else highest_elo_match.Team2
)
print(
    f"On {highest_elo_match.Date.date()}, {highest_elo_team} beat {other_team} by {highest_elo_match.Margin:.0f} {highest_elo_match.WonBy.lower()} to reach an all time ELO high of {highest_elo:.2f}."
)

On 2014-05-13, Chennai Super Kings beat Rajasthan Royals by 5 wickets to reach an all time ELO high of 1538.62.


### Biggest upset and win of all time


In [17]:
# Elo difference before the match
elo_df["Elo_difference_before"] = abs(
    elo_df["Team1_elo_before"] - elo_df["Team2_elo_before"]
)
# Which team had the lower Elo rating before the match?
elo_df["Team_w_lower_before_elo"] = elo_df.apply(
    lambda row: row[
        "Team1" if row["Team1_elo_before"] <= row["Team2_elo_before"] else "Team2"
    ],
    axis=1,
)
elo_df["Team_w_higher_before_elo"] = elo_df.apply(
    lambda row: row[
        "Team2" if row["Team1_elo_before"] <= row["Team2_elo_before"] else "Team1"
    ],
    axis=1,
)

### Biggest wins by team

For a team, the biggest win is the game with the biggest Elo difference before the match, with the team having the lower Elo rating before the match


In [18]:
biggest_win = {}
for key, grp in elo_df.groupby("WinningTeam"):
    # find all games where the winning team had the lower Elo rating before the match
    games_when_team_was_lower_elo = grp.loc[
        grp["Team_w_lower_before_elo"] == key
    ].copy()
    games_when_team_was_lower_elo["Opposition"] = games_when_team_was_lower_elo.apply(
        lambda row: row[
            "Team2" if row["Team1"] == key else "Team1"
        ],  # find the name of the other team
        axis=1,
    )
    games_when_team_was_lower_elo["TeamElo"] = games_when_team_was_lower_elo.apply(
        lambda row: row[
            "Team1_elo_before" if row["Team1"] == key else "Team2_elo_before"
        ],  # find the name of the other team
        axis=1,
    )
    games_when_team_was_lower_elo[
        "OppositionElo"
    ] = games_when_team_was_lower_elo.apply(
        lambda row: row[
            "Team2_elo_before" if row["Team1"] == key else "Team1_elo_before"
        ],  # find the name of the other team
        axis=1,
    )
    biggest_win[key] = games_when_team_was_lower_elo.loc[
        games_when_team_was_lower_elo.Elo_difference_before.idxmax(),
        [
            "ID",
            "Date",
            "MatchNumber",
            "Opposition",
            "Margin",
            "WonBy",
            "TeamElo",
            "OppositionElo",
            "Elo_difference_before",
        ],
    ]
biggest_win = pd.DataFrame(biggest_win).T
biggest_win.index.name = "Team"
biggest_win = biggest_win.sort_values(by="Elo_difference_before", ascending=False)

biggest_win

Unnamed: 0_level_0,ID,Date,MatchNumber,Opposition,Margin,WonBy,TeamElo,OppositionElo,Elo_difference_before
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Delhi Capitals,829801,2015-05-12 00:00:00,49,Chennai Super Kings,6.0,Wickets,1459.052426,1530.203914,71.151487
Pune Warriors,598061,2013-05-15 00:00:00,65,Kolkata Knight Riders,7.0,Runs,1448.27353,1506.064071,57.790541
Royal Challengers Bangalore,1178414,2019-04-21 00:00:00,39,Chennai Super Kings,1.0,Runs,1473.987874,1531.354122,57.366249
Punjab Kings,1216517,2020-10-18 00:00:00,36,Mumbai Indians,,SuperOver,1477.546071,1529.908138,52.362067
Kolkata Knight Riders,392233,2009-05-20 00:00:00,53,Rajasthan Royals,4.0,Wickets,1472.466544,1519.698422,47.231878
Sunrisers Hyderabad,548376,2012-05-20 00:00:00,71,Royal Challengers Bangalore,9.0,Runs,1468.74474,1514.405691,45.660951
Rajasthan Royals,1254089,2021-10-02 00:00:00,47,Chennai Super Kings,7.0,Wickets,1484.114743,1527.525145,43.410402
Rising Pune Supergiant,1082618,2017-04-24 00:00:00,28,Mumbai Indians,3.0,Runs,1492.108473,1525.208714,33.100241
Mumbai Indians,1304097,2022-05-06 00:00:00,51,Gujarat Titans,5.0,Runs,1488.943827,1519.050204,30.106378
Chennai Super Kings,392202,2009-04-30 00:00:00,22,Rajasthan Royals,38.0,Runs,1497.209234,1521.033585,23.824351


### Biggest upset for team

For a team, the biggest upset is the game with the highest Elo difference before the game, where they had the higher Elo rating before the match, but lost.


In [19]:
biggest_upset = {}
for key, grp in elo_df.groupby("LosingTeam"):
    # find all games where the winning team had the lower Elo rating before the match
    games_when_team_was_higher_elo = grp.loc[
        grp["Team_w_lower_before_elo"] != key
    ].copy()
    games_when_team_was_higher_elo["Opposition"] = games_when_team_was_higher_elo.apply(
        lambda row: row[
            "Team2" if row["Team1"] == key else "Team1"
        ],  # find the name of the other team
        axis=1,
    )
    games_when_team_was_higher_elo["TeamElo"] = games_when_team_was_higher_elo.apply(
        lambda row: row[
            "Team1_elo_before" if row["Team1"] == key else "Team2_elo_before"
        ],  # find the name of the other team
        axis=1,
    )
    games_when_team_was_higher_elo[
        "OppositionElo"
    ] = games_when_team_was_higher_elo.apply(
        lambda row: row[
            "Team2_elo_before" if row["Team1"] == key else "Team1_elo_before"
        ],  # find the name of the other team
        axis=1,
    )
    biggest_upset[key] = games_when_team_was_higher_elo.loc[
        games_when_team_was_higher_elo.Elo_difference_before.idxmax(),
        [
            "ID",
            "Date",
            "MatchNumber",
            "Opposition",
            "Margin",
            "WonBy",
            "TeamElo",
            "OppositionElo",
            "Elo_difference_before",
        ],
    ]
biggest_upset = pd.DataFrame(biggest_upset).T
biggest_upset.index.name = "Team"
biggest_upset = biggest_upset.sort_values(by="Elo_difference_before", ascending=False)

biggest_upset

Unnamed: 0_level_0,ID,Date,MatchNumber,Opposition,Margin,WonBy,TeamElo,OppositionElo,Elo_difference_before
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Chennai Super Kings,829801,2015-05-12 00:00:00,49,Delhi Capitals,6.0,Wickets,1530.203914,1459.052426,71.151487
Kolkata Knight Riders,598061,2013-05-15 00:00:00,65,Pune Warriors,7.0,Runs,1506.064071,1448.27353,57.790541
Mumbai Indians,1216517,2020-10-18 00:00:00,36,Punjab Kings,,SuperOver,1529.908138,1477.546071,52.362067
Punjab Kings,829725,2015-04-15 00:00:00,10,Delhi Capitals,5.0,Wickets,1514.257337,1461.918268,52.339069
Rajasthan Royals,392233,2009-05-20 00:00:00,53,Kolkata Knight Riders,4.0,Wickets,1519.698422,1472.466544,47.231878
Sunrisers Hyderabad,1136611,2018-05-17 00:00:00,51,Royal Challengers Bangalore,14.0,Runs,1528.317922,1482.255103,46.062819
Royal Challengers Bangalore,548376,2012-05-20 00:00:00,71,Sunrisers Hyderabad,9.0,Runs,1514.405691,1468.74474,45.660951
Delhi Capitals,392237,2009-05-22 00:00:00,Semi Final,Sunrisers Hyderabad,6.0,Wickets,1518.151368,1479.747231,38.404137
Gujarat Lions,980955,2016-05-01 00:00:00,28,Punjab Kings,23.0,Runs,1516.678015,1478.6214,38.056615
Gujarat Titans,1304094,2022-05-03 00:00:00,48,Punjab Kings,8.0,Wickets,1522.988017,1488.515295,34.472722


### Biggest upset win of all time

In [20]:
biggest_win_match = biggest_win.iloc[0]

print(
    f"On {biggest_win_match.Date.date()}, {biggest_win_match.name} ({biggest_win_match.TeamElo:.2f}) upset {biggest_win_match.Opposition} ({biggest_win_match.OppositionElo}) by {biggest_win_match.Margin:.0f} {biggest_win_match.WonBy.lower()}.\nThe Elo difference before the match was {biggest_win_match.Elo_difference_before:.2f}."
)

On 2015-05-12, Delhi Capitals (1459.05) upset Chennai Super Kings (1530.203913576534) by 6 wickets.
The Elo difference before the match was 71.15.


### Consistency


In [21]:
team_elo_grp = team_elo.groupby("Team")

team_order = (
    team_elo_grp.agg({"Elo": ["min", "max"]})
    .sort_values([("Elo", "max"), ("Elo", "min")], ascending=[True, True])
    .index
)

In [22]:
std = team_elo_grp.Elo.std().sort_values(ascending=False)
std.loc[~std.index.isin(DEFUNCT_TEAMS)]

Team
Delhi Capitals                 16.220447
Sunrisers Hyderabad            13.212207
Punjab Kings                   11.395303
Royal Challengers Bangalore    10.999483
Kolkata Knight Riders          10.970314
Chennai Super Kings            10.670784
Mumbai Indians                 10.574587
Rajasthan Royals               10.319405
Gujarat Titans                  6.480536
Lucknow Super Giants            5.437192
Name: Elo, dtype: float64

In [23]:
fig = px.violin(
    team_elo,
    y="Elo",
    x="Team",
    box=True,
    points="outliers",
    color="Team",
    color_discrete_map=TEAM_COLORS,
    category_orders={"Team": team_order},
)
fig.update_layout(
    title="IPL Elo Ratings",
    yaxis_title="Team",
    xaxis_title="Elo Rating",
    showlegend=False,
    font=dict(family="Courier New, monospace", size=12, color="#7f7f7f"),
    margin=dict(l=0, r=0, t=50, b=0),
    width=1000,
    height=600,
    hovermode="x unified",
    template="plotly_dark",
)
fig.show()

In [24]:
fig = go.Figure()

team_grp_dict = {}
for team, grp in team_elo_grp:
    team_grp_dict[team] = grp

for team in team_order:
    fig.add_trace(
        go.Violin(x=team_grp_dict[team].Elo, line_color=TEAM_COLORS[team], name=team)
    )

fig.update_traces(
    orientation="h",
    side="positive",
    width=3,
    points=False,
)
fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False)

fig.update_layout(
    title="IPL Elo Ratings",
    yaxis_title="Team",
    xaxis_title="Elo Rating",
    showlegend=False,
    font=dict(family="Courier New, monospace", size=12, color="#7f7f7f"),
    margin=dict(l=0, r=0, t=50, b=0),
    width=1000,
    height=600,
    hovermode="x unified",
    template="plotly_dark",
)
fig.show()

### Export data for race charts


In [25]:
race_chart_df = team_elo.pivot_table(
    index="Team",
    columns="Date",
    values="Elo",
).ffill(axis=1)

# Set the Elo rating for teams that didn't play in a season to NaN
for team, seasons in team_seasons.items():
    race_chart_df.loc[
        team, ~race_chart_df.columns.isin(dates_in_season.loc[team_seasons[team]].sum())
    ] = np.nan

race_chart_w_colors_df = race_chart_df.copy()
race_chart_w_colors_df["colors"] = race_chart_w_colors_df.index.map(
    TEAM_COLORS
).tolist()

In [27]:
race_chart_w_colors_df.to_csv("./results/team_elo.csv", index=True)
race_chart_w_colors_df.T.to_csv("./results/team_elo_transposed.csv", index=True)

In [28]:
from src.plotting import get_dt_breaks

data = []
for col in race_chart_df.T.columns:
    data.append(
        go.Scatter(
            x=race_chart_df.T.index,
            y=race_chart_df.T[col],
            mode="lines",
            name=col,
            line=dict(color=TEAM_COLORS[col]),
        )
    )
fig = go.Figure(data=data)
fig.update_xaxes(
    rangebreaks=[
        dict(
            values=get_dt_breaks(
                team_elo=race_chart_df.T.reset_index(),
            )
        )
    ]
)
fig.update_layout(
    title="IPL Elo Ratings",
    yaxis_title="Elo Rating",
    xaxis_title="Date",
    font=dict(family="Courier New, monospace", size=12, color="#7f7f7f"),
    margin=dict(l=0, r=0, t=50, b=0),
    width=1000,
    height=600,
    hovermode="x unified",
    template="plotly_dark",
    yaxis=dict(range=[race_chart_df.min().min(), race_chart_df.max().max()]),
)
fig.show()

### RANK

In [45]:
rank = race_chart_df[race_chart_df.columns.difference(["colors"])].rank(
    axis=0, ascending=False
)
rank.to_csv("results/rank.csv")
rank

Date,2008-04-18,2008-04-19,2008-04-20,2008-04-21,2008-04-22,2008-04-23,2008-04-24,2008-04-25,2008-04-26,2008-04-27,...,2022-05-17,2022-05-18,2022-05-19,2022-05-20,2022-05-21,2022-05-22,2022-05-24,2022-05-25,2022-05-27,2022-05-29
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Chennai Super Kings,,3.0,3.0,3.0,3.0,3.0,3.0,3.0,1.0,1.0,...,7.0,6.0,6.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0
Delhi Capitals,,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,4.0,...,2.0,3.0,3.0,3.0,3.0,3.0,3.0,4.0,3.0,3.0
Gujarat Lions,,,,,,,,,,,...,,,,,,,,,,
Gujarat Titans,,,,,,,,,,,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
Kochi Tuskers Kerala,,,,,,,,,,,...,,,,,,,,,,
Kolkata Knight Riders,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,3.0,...,5.0,7.0,7.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0
Lucknow Super Giants,,,,,,,,,,,...,3.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,2.0,2.0
Mumbai Indians,,,8.0,7.0,6.0,8.0,7.0,8.0,8.0,8.0,...,9.0,9.0,9.0,9.0,8.0,8.0,8.0,8.0,8.0,8.0
Pune Warriors,,,,,,,,,,,...,,,,,,,,,,
Punjab Kings,,4.0,5.0,8.0,7.0,6.0,6.0,6.0,6.0,5.0,...,8.0,8.0,8.0,8.0,9.0,9.0,9.0,9.0,9.0,9.0


### Days at rank 1

In [30]:
days_at_rank_1 = rank.apply(lambda w: w.loc[w == 1].sum(), axis=1).sort_values(
    ascending=False
)
days_at_rank_1

Team
Mumbai Indians                 242.0
Chennai Super Kings            234.0
Rajasthan Royals                67.0
Gujarat Titans                  43.0
Delhi Capitals                  41.0
Sunrisers Hyderabad             39.0
Kolkata Knight Riders           31.0
Lucknow Super Giants             3.0
Gujarat Lions                    2.0
Punjab Kings                     2.0
Royal Challengers Bangalore      2.0
Kochi Tuskers Kerala             0.0
Pune Warriors                    0.0
Rising Pune Supergiant           0.0
dtype: float64

In [31]:
fig = px.bar(
    days_at_rank_1,
    color=days_at_rank_1.index,
    color_discrete_map=TEAM_COLORS,
    labels={"value": "Number of Days at Rank 1"},
)

fig.update_layout(
    title="IPL Days at Rank 1",
    # yaxis_title="Number of Days at Rank 1",
    xaxis_title="Team",
    font=dict(family="Courier New, monospace", size=12, color="#7f7f7f"),
    margin=dict(l=0, r=0, t=50, b=0),
    width=1000,
    height=600,
    showlegend=False,
    template="plotly_dark",
)
fig.show()

### Team with Rank 1 over time. 

In [32]:
# for each col, find the team that was at rank 1

rank_1_teams = {}
for col in rank.columns:
    rank_1_teams[col] = rank.loc[rank[col] == 1].index[0]
rank_1_teams = pd.DataFrame.from_dict(
    rank_1_teams, orient="index", columns=["Team"]
).reset_index(names=["Date"])
rank_1_teams["Elo"] = rank_1_teams.apply(
    lambda row: race_chart_df.loc[row.Team, row.Date], axis=1
)
rank_1_teams = rank_1_teams.set_index("Date").drop_duplicates(
    subset=["Team", "Elo"], keep="first"
)

In [33]:
fig = go.Figure(
    go.Scatter(
        x=rank_1_teams.index,
        y=rank_1_teams.Elo,
        mode="markers+lines",
        marker=dict(color=rank_1_teams.Team.map(TEAM_COLORS)),
        text=rank_1_teams.Team.tolist(),
        hovertemplate="%{x}<br>" + "%{text} (%{y:.2f})",
        name="Highest Elo Rating",
    )
)
fig.update_xaxes(
    rangebreaks=[
        dict(
            values=get_dt_breaks(
                team_elo=rank_1_teams.reset_index(),
            )
        )
    ]
)

fig.update_layout(
    title="IPL Highest Elo Ratings",
    yaxis_title="Elo Rating",
    xaxis_title="Date",
    font=dict(family="Courier New, monospace", size=12, color="#7f7f7f"),
    margin=dict(l=0, r=0, t=50, b=0),
    width=1000,
    height=600,
    # hovermode="x unified",
    template="plotly_dark",
    yaxis=dict(range=[rank_1_teams.Elo.min() - 5, rank_1_teams.Elo.max() + 5]),
)

### Home advantage

In [34]:
home_line = {}
home_line["Overall"] = (
    matches.loc[matches.Team1 == matches.WinningTeam].shape[0] / matches.shape[0]
)
for season, grp in matches.groupby("Season"):
    home_line[season] = grp.loc[grp.Team1 == grp.WinningTeam].shape[0] / grp.shape[0]
home_line = pd.DataFrame.from_dict(home_line, orient="index", columns=["HomeWinPct"])
home_line

Unnamed: 0,HomeWinPct
Overall,0.505263
2007/08,0.517241
2009,0.54386
2009/10,0.55
2011,0.534247
2012,0.445946
2013,0.710526
2014,0.466667
2015,0.525424
2016,0.466667


In [35]:
fig = go.Figure(
    go.Scatter(
        x=home_line.iloc[1:].index,
        y=home_line.iloc[1:].HomeWinPct,
        mode="lines",
        marker=dict(color="#1f77b4"),
        text=home_line.HomeWinPct.tolist(),
        hovertemplate="%{x}<br>" + "%{text:.2f}",
        showlegend=False,
        name="Seasonal Home Win %",
    )
)
fig.add_trace(
    go.Scatter(
        x=home_line.iloc[1:].index,
        y=[home_line.iloc[0].HomeWinPct for _ in range(home_line.iloc[1:].shape[0])],
        mode="lines",
        # text=home_line.HomeWinPct.tolist(),
        # hovertemplate="%{x}<br>" + "%{text:.2f}",
        name="Overall Home Win %",
    )
)
fig.update_layout(
    title="IPL Home Win Percentage",
    xaxis_title="Season",
    font=dict(family="Courier New, monospace", size=12, color="#7f7f7f"),
    margin=dict(l=0, r=0, t=50, b=0),
    width=600,
    height=600,
    hovermode="x unified",
    template="plotly_dark",
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1,
    ),
)
fig.show()

In [36]:
toss_line = {}
toss_line["Overall"] = (
    matches.loc[matches.Team1 == matches.TossWinner].shape[0] / matches.shape[0]
)
for season, grp in matches.groupby("Season"):
    toss_line[season] = grp.loc[grp.Team1 == grp.TossWinner].shape[0] / grp.shape[0]
toss_line = pd.DataFrame.from_dict(toss_line, orient="index", columns=["TossWinPct"])
toss_line

Unnamed: 0,TossWinPct
Overall,0.405263
2007/08,0.413793
2009,0.54386
2009/10,0.566667
2011,0.465753
2012,0.486486
2013,0.447368
2014,0.5
2015,0.457627
2016,0.366667


In [37]:
fig = go.Figure(
    go.Scatter(
        x=toss_line.iloc[1:].index,
        y=toss_line.iloc[1:].TossWinPct,
        mode="lines",
        marker=dict(color="#1f77b4"),
        text=toss_line.TossWinPct.tolist(),
        hovertemplate="%{x}<br>" + "%{text:.2f}",
        showlegend=False,
        name="Seasonal Home Win %",
    )
)
fig.add_trace(
    go.Scatter(
        x=toss_line.iloc[1:].index,
        y=[toss_line.iloc[0].TossWinPct for _ in range(toss_line.iloc[1:].shape[0])],
        mode="lines",
        # text=toss_line.TossWinPct.tolist(),
        # hovertemplate="%{x}<br>" + "%{text:.2f}",
        name="Overall",
    )
)
fig.update_layout(
    title="IPL Win Percentage when Winning the Toss",
    xaxis_title="Season",
    font=dict(family="Courier New, monospace", size=12, color="#7f7f7f"),
    margin=dict(l=0, r=0, t=50, b=0),
    width=600,
    height=600,
    hovermode="x unified",
    template="plotly_dark",
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1,
    ),
)
fig.show()

### HWP by team

In [38]:
for team, grp in matches.groupby("Team1"):
    print(team, (grp.loc[grp.WinningTeam == team].shape[0] / grp.shape[0] - 0.5) * 100)

Chennai Super Kings 8.558558558558559
Delhi Capitals -3.508771929824561
Gujarat Lions -18.75
Gujarat Titans 7.14285714285714
Kochi Tuskers Kerala -7.142857142857145
Kolkata Knight Riders 3.3333333333333326
Lucknow Super Giants 37.5
Mumbai Indians 8.03571428571429
Pune Warriors -23.91304347826087
Punjab Kings -4.545454545454547
Rajasthan Royals 5.0561797752809
Rising Pune Supergiant 0.0
Royal Challengers Bangalore -1.5873015873015872
Sunrisers Hyderabad -4.629629629629628


In [39]:
for team, grp in matches.groupby("TossWinner"):
    print(team, (grp.loc[grp.WinningTeam == team].shape[0] / grp.shape[0] - 0.5) * 100)

Chennai Super Kings 12.385321100917434
Delhi Capitals -1.7241379310344807
Gujarat Lions 16.666666666666664
Gujarat Titans 19.999999999999996
Kochi Tuskers Kerala 0.0
Kolkata Knight Riders 6.140350877192979
Lucknow Super Giants 7.14285714285714
Mumbai Indians 6.910569105691056
Pune Warriors -35.0
Punjab Kings -7.446808510638298
Rajasthan Royals -0.5050505050505028
Rising Pune Supergiant 11.538461538461542
Royal Challengers Bangalore 0.4761904761904745
Sunrisers Hyderabad -4.700854700854701


### Longest streak at rank 1

In [40]:
# get the longest streak of days at rank 1
