In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

In [33]:
data = pd.read_csv("kohli.txt")


In [34]:
print(data.head())

   Runs  BF  4s  6s     SR  Pos Dismissal  Inns   Opposition         Ground  \
0    12  22   1   0  54.54  2.0       lbw     1  v Sri Lanka       Dambulla   
1    37  67   6   0  55.22  2.0    caught     2  v Sri Lanka       Dambulla   
2    25  38   4   0  65.78  1.0   run out     1  v Sri Lanka  Colombo (RPS)   
3    54  66   7   0  81.81  1.0    bowled     1  v Sri Lanka  Colombo (RPS)   
4    31  46   3   1  67.39  1.0       lbw     2  v Sri Lanka  Colombo (RPS)   

  Start Date  
0  18-Aug-08  
1  20-Aug-08  
2  24-Aug-08  
3  27-Aug-08  
4  29-Aug-08  


In [35]:
print(data.isnull().sum())

Runs          0
BF            0
4s            0
6s            0
SR            0
Pos           0
Dismissal     0
Inns          0
Opposition    0
Ground        0
Start Date    0
dtype: int64


In [36]:
print("total runs: ")
data["Runs"].sum()

total runs: 


6184

In [37]:
print("Average: ")
data["Runs"].mean()

Average: 


46.84848484848485

In [38]:
matches = data.index
figure = px.line(data, x=matches, y="Runs", 
                 title='Runs Scored Between 2008 and 2017')
figure.show()

In [39]:
centuries = data.query("Runs >= 100")
figure = px.bar(centuries, x=centuries["Inns"], y = centuries["Runs"], 
                color = centuries["Runs"],
                title="Centuries By Virat Kohli in First Innings Vs. Second Innings")
figure.show()

In [40]:
dismissal = data["Dismissal"].value_counts()
label = dismissal.index
counts = dismissal.values
colors = ['gold','lightgreen', "pink", "blue", "skyblue", "cyan", "orange"]

fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Dismissals of Virat Kohli')
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=30,
                  marker=dict(colors=colors, line=dict(color='black', width=3)))
fig.show()

In [41]:
label = data["Pos"]
counts = data["Runs"]
colors = ['gold','lightgreen', "pink", "blue", "skyblue", "cyan", "orange"]

fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Runs at Different Batting Positions')
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=30,
                  marker=dict(colors=colors, line=dict(color='black', width=3)))
fig.show()

In [42]:
data["Pos"] = data["Pos"].map({3.0: "Batting At 3", 4.0: "Batting At 4", 2.0: "Batting At 2", 
                               1.0: "Batting At 1", 7.0:"Batting At 7", 5.0:"Batting At 5", 
                               6.0: "batting At 6"})

Pos = data["Pos"].value_counts()
label = Pos.index
counts = Pos.values
colors = ['gold','lightgreen', "pink", "blue", "skyblue", "cyan", "orange"]

fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Number of Matches At Different Batting Positions')
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=30,
                  marker=dict(colors=colors, line=dict(color='black', width=3)))
fig.show()

In [44]:
figure = px.bar(data, x=data["Opposition"], y = data["Runs"], color = data["Runs"],
            title="Total Runs Against Teams")
figure.show()

In [48]:
figure = px.bar(centuries, x=centuries["Opposition"], y = centuries["Runs"], 
                color = centuries["Runs"],
                title="Most Centuries Against Teams")
figure.show()

In [50]:
strike_rate = data.query("SR >= 100")
print(strike_rate)

     Runs   BF  4s  6s      SR           Pos   Dismissal  Inns  \
5       2    2   0   0  100.00  Batting At 7     not out     1   
8      27   19   4   0  142.10  Batting At 7      bowled     1   
12    102   95  11   0  107.36  Batting At 3     not out     2   
22    105  104  10   0  100.96  Batting At 3      caught     1   
32    100   83   8   2  120.48  Batting At 4     not out     1   
45      9    9   0   0  100.00  Batting At 4      caught     1   
47    107   93   9   1  115.05  Batting At 4  hit wicket     1   
49    112   98  16   0  114.28  Batting At 4     not out     2   
50     35   30   5   0  116.66  Batting At 4         lbw     2   
56     23   11   3   0  209.09  batting At 6     not out     1   
66    128  119  12   1  107.56  Batting At 3     not out     2   
76     43   34   4   1  126.47  Batting At 3      caught     1   
78    102   83  13   2  122.89  Batting At 3      caught     1   
79    115  108  13   1  106.48  Batting At 3      caught     2   
83    100 

In [51]:
strike_rate = data.query("SR <= 100")
print(strike_rate)

     Runs  BF  4s  6s     SR           Pos Dismissal  Inns     Opposition  \
0      12  22   1   0  54.54  Batting At 2       lbw     1    v Sri Lanka   
1      37  67   6   0  55.22  Batting At 2    caught     2    v Sri Lanka   
2      25  38   4   0  65.78  Batting At 1   run out     1    v Sri Lanka   
3      54  66   7   0  81.81  Batting At 1    bowled     1    v Sri Lanka   
4      31  46   3   1  67.39  Batting At 1       lbw     2    v Sri Lanka   
..    ...  ..  ..  ..    ...           ...       ...   ...            ...   
123     8  11   1   0  72.72  Batting At 3    caught     2    v Australia   
125     9  13   1   0  69.23  Batting At 3    caught     2  v New Zealand   
127    45  51   2   1  88.23  Batting At 3    caught     2  v New Zealand   
128    65  76   2   1  85.52  Batting At 3    caught     1  v New Zealand   
131    55  63   8   0  87.30  Batting At 3    caught     2      v England   

            Ground Start Date  
0         Dambulla  18-Aug-08  
1         D

In [52]:
strike_rate = data.query("SR >= 120")
print(strike_rate)

     Runs  BF  4s  6s      SR           Pos Dismissal  Inns     Opposition  \
8      27  19   4   0  142.10  Batting At 7    bowled     1    v Sri Lanka   
32    100  83   8   2  120.48  Batting At 4   not out     1   v Bangladesh   
56     23  11   3   0  209.09  batting At 6   not out     1  v West Indies   
76     43  34   4   1  126.47  Batting At 3    caught     1      v England   
78    102  83  13   2  122.89  Batting At 3    caught     1  v West Indies   
83    100  52   8   7  192.30  Batting At 3   not out     2    v Australia   
85    115  66  18   1  174.24  Batting At 3   not out     2    v Australia   
93     78  65   7   2  120.00  Batting At 3    caught     2  v New Zealand   
130     8   5   2   0  160.00  Batting At 3    caught     1      v England   

            Ground Start Date  
8           Rajkot  15-Dec-09  
32           Dhaka  19-Feb-11  
56          Indore   8-Dec-11  
76      Birmingham  23-Jun-13  
78   Port of Spain   5-Jul-13  
83          Jaipur  16-Oct-

In [53]:
strike_rate = data.query("SR >= 150")
print(strike_rate)

     Runs  BF  4s  6s      SR           Pos Dismissal  Inns     Opposition  \
56     23  11   3   0  209.09  batting At 6   not out     1  v West Indies   
83    100  52   8   7  192.30  Batting At 3   not out     2    v Australia   
85    115  66  18   1  174.24  Batting At 3   not out     2    v Australia   
130     8   5   2   0  160.00  Batting At 3    caught     1      v England   

      Ground Start Date  
56    Indore   8-Dec-11  
83    Jaipur  16-Oct-13  
85    Nagpur  30-Oct-13  
130  Cuttack  19-Jan-17  


In [54]:
figure = px.bar(strike_rate, x = strike_rate["Inns"], 
                y = strike_rate["SR"], 
                color = strike_rate["SR"],
            title="Virat Kohli's High Strike Rates in First Innings Vs. Second Innings")
figure.show()

In [55]:
figure = px.scatter(data_frame = data, x="Runs",
                    y="4s", size="SR", trendline="ols", 
                    title="Relationship Between Runs Scored and Fours")
figure.show()

In [57]:
figure = px.scatter(data_frame = data, x="Runs",
                    y="6s", size="SR", trendline="ols", 
                    title= "Relationship Between Runs Scored and Sixes")
figure.show()

In [59]:
figure = px.scatter(data_frame = data, x="Runs",
                    y="SR", size="SR", trendline="ols", 
                    title= "Relationship Between Runs Scored and SR")
figure.show()

In [62]:
figure = px.scatter(data_frame = data, x="Runs",
                    y="BF", size="SR", trendline="ols", 
                    title= "Relationship Between Runs Scored and Ball Faced")
figure.show()