# Data visualisation

I am not that much of a writing person as I believe the visualisation should be enough to tell the story, so don't expect a lot of notes here.

Enough of talk, let's go straight to action! 

# Importing the libraries

In [None]:
!pip install seaborn==0.11.0


In [None]:

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
print(sns.__version__)
import plotly.offline as pyo
import plotly.graph_objs as go
import plotly.figure_factory as ff
import plotly.subplots as subplots
import cufflinks as cf
cf.go_offline(connected=True)

pd.options.display.max_columns = None
pyo.init_notebook_mode(connected=True)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

        

# Loading the datasets

In [None]:
matches = pd.read_csv('/kaggle/input/ipl-complete-dataset-20082020/IPL Matches 2008-2020.csv')
balls = pd.read_csv('/kaggle/input/ipl-complete-dataset-20082020/IPL Ball-by-Ball 2008-2020.csv')

# First look at the ball by ball data

In [None]:
balls.head()

# First look at the matches data

In [None]:
matches.head()

# Merging the two datasets

In [None]:
data = pd.merge(left=matches, right=balls, on='id', how='right')
data.head()

In [None]:
print(matches.shape)
print(balls.shape)
print(data.shape)

In [None]:
data.info()

# Extracting year from the date

In [None]:
data['date'] = pd.to_datetime(data['date'])
data['year'] = pd.DatetimeIndex(data['date']).year

In [None]:
data.head()

# Runs scored over the years

In [None]:
runs_by_years = data.groupby(by='year').sum()['total_runs']
runs_by_years = pd.DataFrame(runs_by_years)
runs_by_years.reset_index(inplace=True)

In [None]:
plt.figure(figsize=(15,9))
sns.lineplot(data=runs_by_years, x='year', y='total_runs')
plt.title('Runs scored over the years')
plt.show()

# Preferred toss decision

In [None]:
plt.figure(figsize=(15,9))
sns.countplot(data=matches, x='toss_decision')
plt.title('Preferred toss decision')
plt.show()

# Totals runs and wickets by over

In [None]:
runs_and_wickets_by_over = balls.groupby(by='over').sum()
runs_and_wickets_by_over = pd.DataFrame(runs_and_wickets_by_over[['total_runs', 'is_wicket']])
runs_and_wickets_by_over.reset_index(inplace=True)

In [None]:
plt.figure(figsize=(15,9))
sns.scatterplot(data=runs_and_wickets_by_over, x='over', y='total_runs', size='is_wicket')
plt.title('Totals runs and wickets by over')
plt.show()

# Runs distribution over wise

In [None]:
balls = pd.read_csv('/kaggle/input/ipl-complete-dataset-20082020/IPL Ball-by-Ball 2008-2020.csv')
runs_overs = balls[['total_runs', 'over']]
runs_overs

In [None]:
plt.figure(figsize=(15,9))
sns.boxplot(data=balls, x='over', y='total_runs')
plt.title('Runs distribution over wise')
plt.show()

# Runs distribution match wise

In [None]:
runs_by_match = balls.groupby(by='id').sum()
runs_by_match = pd.DataFrame(runs_by_match['total_runs'])
runs_by_match.reset_index(inplace=True)
runs_by_match

In [None]:
print(sns.__version__)

In [None]:
plt.figure(figsize=(15,9))
sns.histplot(runs_by_match['total_runs'])
plt.title('Runs distribution match wise')
plt.show()

In [None]:
data = pd.merge(left=matches, right=balls, on='id', how='right')
data['date'] = pd.to_datetime(data['date'])
data['year'] = pd.DatetimeIndex(data['date']).year

runs_by_match_and_year = data.groupby(by=['id','year']).sum()
runs_by_match_and_year = pd.DataFrame(runs_by_match_and_year[['total_runs']])
runs_by_match_and_year.reset_index(inplace=True)
runs_by_match_and_year

In [None]:
f, axes = plt.subplots(13, 1, sharex='all', sharey='all',figsize=(15,117))
f.suptitle("Runs distribution match wise")
sns.histplot(x=runs_by_match_and_year[runs_by_match_and_year['year']==2008]['total_runs'], ax=axes[0]).set_title('Year 2008')
sns.histplot(x=runs_by_match_and_year[runs_by_match_and_year['year']==2009]['total_runs'], ax=axes[1]).set_title('Year 2009')
sns.histplot(x=runs_by_match_and_year[runs_by_match_and_year['year']==2010]['total_runs'], ax=axes[2]).set_title('Year 2010')
sns.histplot(x=runs_by_match_and_year[runs_by_match_and_year['year']==2011]['total_runs'], ax=axes[3]).set_title('Year 2011')
sns.histplot(x=runs_by_match_and_year[runs_by_match_and_year['year']==2012]['total_runs'], ax=axes[4]).set_title('Year 2012')
sns.histplot(x=runs_by_match_and_year[runs_by_match_and_year['year']==2013]['total_runs'], ax=axes[5]).set_title('Year 2013')
sns.histplot(x=runs_by_match_and_year[runs_by_match_and_year['year']==2014]['total_runs'], ax=axes[6]).set_title('Year 2014')
sns.histplot(x=runs_by_match_and_year[runs_by_match_and_year['year']==2015]['total_runs'], ax=axes[7]).set_title('Year 2015')
sns.histplot(x=runs_by_match_and_year[runs_by_match_and_year['year']==2016]['total_runs'], ax=axes[8]).set_title('Year 2016')
sns.histplot(x=runs_by_match_and_year[runs_by_match_and_year['year']==2017]['total_runs'], ax=axes[9]).set_title('Year 2017')
sns.histplot(x=runs_by_match_and_year[runs_by_match_and_year['year']==2018]['total_runs'], ax=axes[10]).set_title('Year 2018')
sns.histplot(x=runs_by_match_and_year[runs_by_match_and_year['year']==2019]['total_runs'], ax=axes[11]).set_title('Year 2019')
sns.histplot(x=runs_by_match_and_year[runs_by_match_and_year['year']==2020]['total_runs'], ax=axes[12]).set_title('Year 2020')
plt.show()

# Runs balls wise distributions

In [None]:
plt.figure(figsize=(15,9))
sns.countplot(data=balls, x='total_runs')
plt.title('Runs balls wise distributions')
plt.show()

# Runs by ball of the over

In [None]:
runs_by_ball_of_overs = balls.groupby(by=['ball', 'over']).sum()
runs_by_ball_of_overs = pd.DataFrame(runs_by_ball_of_overs[['total_runs']])
runs_by_ball_of_overs.reset_index(inplace=True)
runs_by_ball_of_overs

In [None]:
runs_by_ball_of_overs = runs_by_ball_of_overs.pivot(index='ball', columns='over', values='total_runs')

In [None]:
plt.figure(figsize=(15,9))
sns.heatmap(runs_by_ball_of_overs)
plt.title('Runs by ball of the over')
plt.show()

# How are the players being dismissed?

In [None]:
plt.figure(figsize=(15,9))
sns.countplot(data=balls, x='dismissal_kind')
plt.title('How are the players being dismissed?')
plt.show()

# Which bowler led to most dismissals?

In [None]:
balls['dismissal_kind'].fillna('Not a wicket', inplace=True)
plt.figure(figsize=(15,112))
sns.countplot(y=balls[balls['dismissal_kind'] != 'Not a wicket']['bowler'],
              order=balls[balls['dismissal_kind'] != 'Not a wicket']['bowler'].value_counts().index)
plt.title('Which bowler led to most dismissals?')
plt.show()