In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
import plotly.offline as pyo

In [2]:
''' 
Interactive visualization in the form of HTML files
Drawback -> cannot work with live data source for that use "DASH"
'''

' \nInteractive visualization in the form of HTML files\nDrawback -> cannot work with live data source for that use "DASH"\n'

In [3]:
match = pd.read_csv("./matches.csv")
delivery = pd.read_csv("./deliveries.csv")

ipl = delivery.merge(match, left_on='match_id', right_on='id')

In [4]:
ipl.head()

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,batsman_runs,...,toss_decision,winner,result,result_margin,target_runs,target_overs,super_over,method,umpire1,umpire2
0,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,1,SC Ganguly,P Kumar,BB McCullum,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
1,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,2,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
2,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,3,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
3,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,4,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
4,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,5,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen


In [5]:
# ---------------- Scatter plot
# batsman avg (x-axis) and batsman strike rate(y-axis) top 50 batsman of all time 

ipl.groupby('batter')['batsman_runs'].sum().sort_values(ascending=False).head(50).reset_index()
top50 = ipl.groupby('batter')['batsman_runs'].sum().sort_values(ascending=False).head(50).index.to_list()
new_ipl = ipl[ipl['batter'].isin(top50)]

In [6]:
# Calculate strike rate 

runs = new_ipl.groupby("batter")['batsman_runs'].sum()
balls = new_ipl.groupby("batter")['batsman_runs'].count()
sr = ((runs/balls) * 100).reset_index()
sr.head()

Unnamed: 0,batter,batsman_runs
0,AB de Villiers,148.580442
1,AD Russell,164.224422
2,AJ Finch,123.349057
3,AM Rahane,120.32141
4,AT Rayudu,124.584527


In [7]:
# calculating avg

out = ipl[ipl['player_dismissed'].isin(top50)]
no_of_outs = out['player_dismissed'].value_counts()
avg = (runs/no_of_outs).reset_index()
avg.head()
avg.rename(columns={
    'index': 'batter',
    0: 'avg' 
}, inplace=True)

In [8]:
avg = avg.merge(sr, on = 'batter')

In [9]:
avg.head()

Unnamed: 0,batter,avg,batsman_runs
0,AB de Villiers,39.853846,148.580442
1,AD Russell,28.930233,164.224422
2,AJ Finch,24.904762,123.349057
3,AM Rahane,30.142857,120.32141
4,AT Rayudu,28.051613,124.584527


In [10]:
# draw scatter plot

trace = go.Scatter(x = avg['avg'], y = avg['batsman_runs'], mode='markers', text=avg['batter'],
                    marker={'color': '#00a65a'}
                    )
data = [trace]
layout = go.Layout(title='Batsman runs vs Strike rate', xaxis={'title': 'Batsman runs'}, yaxis={'title': 'Strike rate'})
fig = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename = 'myfile.html')

'myfile.html'

In [11]:
# ----------------------- Line chart 

# year by year batsman performance

single = ipl[ipl['batter'] == 'V Kohli']
performance = single.groupby('season')['batsman_runs'].sum()
performance = performance.reset_index()


single1 = ipl[ipl['batter'] == 'MS Dhoni']
performance1 = single1.groupby('season')['batsman_runs'].sum()
performance1 = performance1.reset_index()

In [12]:

trace = go.Scatter(x = performance['season'], y = performance['batsman_runs'], mode='lines', name ='V Kohli')
trace1 = go.Scatter(x = performance1['season'], y = performance1['batsman_runs'], mode='lines', name ='MS Dohni')
data = [trace, trace1]
layout = go.Layout(title = 'Year by Year Performance',
                   xaxis = {'title': 'Season'},
                   yaxis = {'title': 'Total runs'}
                   )
                   
fig1 = go.Figure(data=data, layout=layout)

pyo.plot(fig1)

'temp-plot.html'

In [13]:
# Multiple line plot

def batsman_com(*name):
    data = []
    for i in name:
        single = ipl[ipl['batter'] == i]
        performance = single.groupby('season')['batsman_runs'].sum().reset_index()
        trace = go.Scatter(x = performance['season'], y = performance['batsman_runs'], mode='lines', name = i)
        data.append(trace)

    layout = go.Layout(title='Batsman run comparator',
                        xaxis={'title': 'Season'},
                        yaxis={'title': 'Runs'}
                        )
    fig = go.Figure(data=data, layout=layout)
    pyo.plot(fig, filename = 'year_by_year.html')

In [14]:
batsman_com('V Kohli', 'MS Dhoni', 'RG Sharma', 'DA Warner')

In [15]:
# ------------------------ Bar plot

top10 = ipl.groupby('batter')['batsman_runs'].sum().sort_values(ascending=False).head(10).index.to_list()
top10_df = ipl[ipl['batter'].isin(top10)]

In [16]:
top10_score = top10_df.groupby('batter')['batsman_runs'].sum().reset_index()

In [17]:
top10_score

Unnamed: 0,batter,batsman_runs
0,AB de Villiers,5181
1,CH Gayle,4997
2,DA Warner,6567
3,KD Karthik,4843
4,MS Dhoni,5243
5,RG Sharma,6630
6,RV Uthappa,4954
7,S Dhawan,6769
8,SK Raina,5536
9,V Kohli,8014


In [18]:
trace = go.Bar(x = top10_score['batter'], y = top10_score['batsman_runs'])
data = [trace]
layout = go.Layout(title = 'Top 10 IPL batsman', xaxis={'title': 'Batsman name'},
                   yaxis={'title': 'Batsman runs'})
fig = go.Figure(data=data, layout=layout)
pyo.plot(fig)

'temp-plot.html'

In [19]:
''' 
Type of bar chart
    - Nested bar chart
    - Stacked bar chart
    - Overlayed bar chart
'''

' \nType of bar chart\n    - Nested bar chart\n    - Stacked bar chart\n    - Overlayed bar chart\n'

In [20]:
iw = top10_df.groupby(['batter', 'inning'])['batsman_runs'].sum().reset_index()
mask1 = iw['inning'] == 1
mask2 = iw['inning'] == 2

one = iw[mask1]
two = iw[mask2]

one.rename(columns={
    'batsman_runs': '1st innings' 
}, inplace=True)

two.rename(columns={
    'batsman_runs': '2nd innings' 
}, inplace=True)

final = one.merge(two, on = 'batter')[['batter', '1st innings', '2nd innings']]



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [21]:
final

Unnamed: 0,batter,1st innings,2nd innings
0,AB de Villiers,3163,1999
1,CH Gayle,2873,2092
2,DA Warner,3280,3285
3,KD Karthik,2743,2099
4,MS Dhoni,3065,2178
5,RG Sharma,3600,3028
6,RV Uthappa,2120,2832
7,S Dhawan,3926,2843
8,SK Raina,3194,2334
9,V Kohli,4400,3604


In [22]:
trace1 = go.Bar(x = final['batter'], y = final['1st innings'], name = '1st innings')
trace2 = go.Bar(x = final['batter'], y = final['2nd innings'], name = '2nd innings')

data = [trace1, trace2]
# layout = go.Layout(title='Inning wise scores', xaxis={'title': 'Batsman'}, yaxis={'title': 'Runs'}, barmode='overlay')
layout = go.Layout(title='Inning wise scores', xaxis={'title': 'Batsman'}, yaxis={'title': 'Runs'}, barmode='stack')
# layout = go.Layout(title='Inning wise scores', xaxis={'title': 'Batsman'}, yaxis={'title': 'Runs'}, barmode='group')
fig = go.Figure(data=data, layout=layout)
pyo.plot(fig)

'temp-plot.html'

In [23]:
# ---------------------- Bubble plot

new_ipl = new_ipl[new_ipl['batsman_runs'] == 6]
six = new_ipl.groupby('batter')['batsman_runs'].count().reset_index()
x = avg.merge(six, on = 'batter')

In [24]:
trace = go.Scatter(x = x['avg'], y = x['batsman_runs_x'] , mode = 'markers', marker = {'size': x['batsman_runs_y']})

trace = go.Scatter(
    x=x['avg'],
    y=x['batsman_runs_x'],
    mode='markers',
    marker={'size': x['batsman_runs_y']},
    name='Bubble Chart'  # A single string for the entire trace
)

data = [trace]
layout = go.Layout(title='Bubble chart', xaxis={'title': 'Average'}, yaxis={'title': 'Strike Rate'})
fig = go.Figure(data=data, layout=layout)
pyo.plot(fig)

'temp-plot.html'

In [25]:
match_agg = delivery.groupby('match_id')['total_runs'].sum().reset_index()
season_wise = match_agg.merge(match, left_on='match_id', right_on='id')[['match_id', 'total_runs', 'season']]

In [26]:
trace = go.Box(x = season_wise['total_runs'], name = 'All season', marker={
    'color': 'Brown'
})
data = [trace]
layout = go.Layout(title = 'Total score analysis', xaxis={'title': 'Total score'})
fig = go.Figure(data=data, layout=layout)
pyo.plot(fig)

'temp-plot.html'

In [27]:
trace1 = go.Box(x = season_wise[season_wise['season'] == '2024']['total_runs'], name = '2024', marker={'color': 'Brown'})
trace2 = go.Box(x = season_wise[season_wise['season'] == '2007/08']['total_runs'], name = '2007/08', marker={'color': 'Chocolate'})

data = [trace1, trace2]
layout = go.Layout(title = 'Total score analysis', xaxis={'title': 'Total score'})
fig = go.Figure(data=data, layout=layout)
pyo.plot(fig)

'temp-plot.html'

In [28]:
# ---------------------- Displot (Combination of 3 graph -> hist, kde, rug)

import plotly.figure_factory as ff

hist_data = [avg['avg'], avg['batsman_runs']]
group_labels = ['Average', 'Strike rate']
fig = ff.create_distplot(hist_data, group_labels, bin_size = [10, 20])
pyo.plot(fig) 

'temp-plot.html'

In [29]:
# -------------------- Histogram

x = delivery.groupby('batter')['batsman_runs'].count() > 150 
x = x[x].index.tolist()

new = delivery[delivery['batter'].isin(x)]

runs = new.groupby('batter')['batsman_runs'].sum()
balls = new.groupby('batter')['batsman_runs'].count()

sr = ((runs / balls) * 100).reset_index()

In [30]:
# trace = go.Histogram(x = sr['batsman_runs'], name = 'Strike rate variation', xbins={'size': 2})
trace = go.Histogram(x = sr['batsman_runs'], name = 'Strike rate variation', xbins={'size': 5, 'start': 50, 'end': 100})
data = [trace]
layout = go.Layout(title = 'Strike rate analysis', xaxis={'title': 'Strike rates'})
fig = go.Figure(data=data, layout=layout)
pyo.plot(fig)

'temp-plot.html'

In [31]:
# ------------------------- Heatmap

six = delivery[delivery['batsman_runs'] == 6]
six = six.groupby(['batting_team', 'over'])['batsman_runs'].count().reset_index()

In [32]:
trace = go.Heatmap(x = six['batting_team'], y = six['over'], z = six['batsman_runs'])
data = [trace]
layout = go.Layout(title = 'Six heatmap')
fig = go.Figure(data=data, layout=layout)
pyo.plot(fig)

'temp-plot.html'

In [33]:
# side by side heatmap

dots = delivery[delivery['batsman_runs'] == 0]
dots = dots.groupby(['batting_team', 'over'])['batsman_runs'].count().reset_index()

from plotly import tools

trace1 = go.Heatmap(x = six['batting_team'], y = six['over'], z = six['batsman_runs'].values.tolist())

trace2 = go.Heatmap(x = dots['batting_team'], y = dots['over'], z = dots['batsman_runs'].values.tolist())

fig = tools.make_subplots(rows = 1, cols = 2, subplot_titles = ["6's", "0's"], shared_xaxes=True)

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)

pyo.plot(fig)


plotly.tools.make_subplots is deprecated, please use plotly.subplots.make_subplots instead



'temp-plot.html'