In [0]:
import numpy as np # numerical computing 
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt #visualization
import seaborn as sns #modern visualization
import boto3

In [0]:
dbfs_path = 'dbfs:/FileStore/tables/matches.csv'
local_path = 'file:///tmp/matches.csv'
dbutils.fs.cp(dbfs_path, local_path)

In [0]:
dbfs_path = 'dbfs:/FileStore/tables/deliveries.csv'
local_path = 'file:///tmp/deliveries.csv'
dbutils.fs.cp(dbfs_path, local_path)

In [0]:
sns.set_style("darkgrid")
plt.rcParams['figure.figsize'] = (14, 8)

In [0]:
matches = pd.read_csv('/tmp/matches.csv')

In [0]:
# find the number of rows and columns of the data set
matches.shape

In [0]:
# find different types of data/variables in the given dataset
matches.info()

In [0]:
# perform simple summary statistics 
matches.describe()

In [0]:
# lets see a see a couple of actual rows of the input dataset
matches.head(2)

In [0]:
# How many matches were played
matches['id'].max()

In [0]:
# How many IPL seasons have been played
matches['season'].unique()

In [0]:
# Which Team had won by maximum runs?
matches.iloc[matches['win_by_runs'].idxmax()]

In [0]:
# Just show me the winning team from above query
matches.iloc[matches['win_by_runs'].idxmax()]['winner']

In [0]:
# Which Team had won by (closest margin) minimum runs?
matches.iloc[matches[matches['win_by_runs'].ge(1)].win_by_runs.idxmin()]['winner']

In [0]:
# Which Team had won by minimum wickets?
matches.iloc[matches[matches['win_by_wickets'].ge(1)].win_by_wickets.idxmin()]

In [0]:
# Which season had most number of matches?
sns.countplot(x='season', data=matches)
plt.show()

In [0]:
# Which is the most successful IPL Team
data = matches.winner.value_counts()
sns.barplot(y = data.index, x = data, orient='h');

In [0]:
# Which is the Top player of the match Winners
top_players = matches.player_of_match.value_counts()[:10]
#sns.barplot(x="day", y="total_bill", data=tips)
fig, ax = plt.subplots()
ax.set_ylim([0,20])
ax.set_ylabel("Count")
ax.set_title("Top player of the match Winners")
#top_players.plot.bar()
sns.barplot(x = top_players.index, y = top_players, orient='v'); #palette="Blues");
plt.show()

In [0]:
# Has Toss-winning helped in Match-winning?
ss = matches['toss_winner'] == matches['winner']
ss.groupby(ss).size()
sns.countplot(ss);