In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
os.listdir('../input/dream11-ipl2020-live')

In [None]:
data = pd.read_csv('../input/dream11-ipl2020-live/IPL_2020_Daily_Data.csv')
data.head(5)

## Daily_Data Overview (Matchwise Player Performance data)

In [None]:
class data_analytics:
    def __init__(self,data):
        self.data = data
        print('1) Dimension of the dataset is      :',data.shape)
        print('2) Number of Columns in the dataset :',data.shape[1])
        print('3) Number of Rows in the dataset    :',data.shape[0])
        numerical_features = [f for f in data.columns if data[f].dtypes!='O']
        print('4) Count of Numerical Features      :',len(numerical_features))
        cat_features = [c for c in data.columns if data[c].dtypes=='O']
        print('5) Count of Categorical Features    :',len(cat_features))
    def missing_values(self,data):
        print('6) Missing values Estimation        :')
        print('6.1) Total Missing Values in the dataset   :',(data.isnull().sum().sum()))
        print('6.2) Percentage of Total Missing Values    :',(data.isnull().sum().sum()/(data.shape[0]*data.shape[1]))*100)
        print('6.3) Column-wise Missing Values Estimation :')
        for i in data.columns:
            if data[i].isna().sum()>0:
                print(' >> The Column ',i,' has '+ str(data[i].isna().sum()) + ' missing values')

In [None]:
analytics = data_analytics(data)
print(analytics.missing_values(data))

## Feature Analysis

1) Total Number of Matches Played So far

In [None]:
a  = data['Match_number'].max()
print('Total Matches Played by the End of WEEK 1 :',a)

2) Total Count of UNIQUE Players in IPL 2020 So far! 

In [None]:
count_of_players = len(data['Player'].unique())
print('Unique Count of Players :',count_of_players)

3) Total Number of Participating Teams

In [None]:
ipl_teams = len(data['Team'].unique())
ipl_names = data['Team'].unique()
print(f'{ipl_teams} Teams are participating in Dream11 IPL 2020. The Teams are {ipl_names}')

# RUNS Scored and Batsman Performance - Feature Analysis

In [None]:
a=data['RH/LH'].value_counts()
rh_players = (a[0]/data['RH/LH'].count())*100
print(f'{round(rh_players,2)} % of the Players are Righted Handed Batsmen.')
lh_players = 100 - rh_players
print(f'{round(lh_players,2)} % of the Players are Left Handed Batsmen.')
print(a)
sns.set(style="darkgrid")
sns.countplot(x="RH/LH", data=data)

RH Denotes 'Right-handed Player' and 'LH' denotes 'Left Handed Player'.
* **The Analysis Clearly states that IPL 2020 is Highly Concentrated with Right Handed Players when compared to Left Handed Players.**

In [None]:
data['Ground'].unique()

Distribution of Runs

In [None]:
sns.distplot(data['Runs'])

In [None]:
data['Runs'].describe()

TOP 10 Indivdual Runs Scorers (Week-1) 

In [None]:
batsmandata = data[['Player','Team','Runs']]
batsmandata.sort_values(by='Runs',ascending=False).head(10)

In [None]:
plt.scatter(data['Runs'],data['RH/LH'])

Right Handed Players have scored most and Higher runs than Left Handed Ones! 

Runs Scored vs Role of Player w.r.t RH/LH feature

In [None]:
sns.scatterplot(data=data, x="Role",y="Runs",hue="RH/LH")

# TOP 10 Batsman {Run Getters} So Far in IPL 2020

In [None]:
batsman_runs = data.groupby(['Player'])['Runs'].sum()
batsman_runs.sort_values(ascending = False, inplace = True)
batsman_runs[:10].plot(x= 'Player', y = 'Runs', kind = 'barh', colormap = 'Accent')

In [None]:
sns.catplot(x="Runs",y="Role",hue="RH/LH",kind="box",data=data)

**Insights from Box Plot**: (Includes Exceptions, but below is an view of a sample)
* The Above Box Plot shows that 'Most of the Runs scored are from Wicket Keeper Batsmen' who are Right Handed.
* Most of the All-Rounders have not Performed Well, especially Right-Handed Allrounders
* Left Handed Wicket Keepers get a Good Start in Batting but fail to Convert them into Big Starts, exactly what Right Handed Wicket Keepers are Capable of!
* The Performance of WK Batsmen is Better when Compared to Normal Batsmen.

# Wickets Feature Analytics

In [None]:
print(data['Wickets'].value_counts())
data['Wickets'].value_counts().plot(kind='bar',figsize=(5,5))

Most of the Players have captures atleast One Wickets. While, 9 players have manages to pick atmost 3 wickets of the Opposition 

Wickets Distribution

In [None]:
sns.distplot(data['Wickets'])

# TOP 10 BOWLERS IN IPL2020 So Far!

In [None]:
bowlerdata = data[['Player','Team','Wickets']]
bowlerdata.sort_values(by='Wickets',ascending=False).head(10)

In [None]:
bowlers_wickets = data.groupby(['Player'])['Wickets'].sum()
bowlers_wickets.sort_values(ascending = False, inplace = True)
bowlers_wickets[:10].plot(x= 'Player', y = 'Wickets', kind = 'barh')

In [None]:
print(data['Dismissal'].value_counts())
sns.countplot(x='Dismissal',data=data)

Insights from CountPlot:
* Most Players are dismissed through Catches.
* Less Number of Stumpings Indicate that 'Batsman or Players' make better use of the Crease and play the balls inside their crease rather stepping out and playing!

# DREAM11_FANTASY_POINTS_ANALYSIS

Dsitribution of Dream11 Fantasy Points

In [None]:
sns.distplot(data['Dream11_ Points'])

# Top 10 Fantasy Picks { In Week-1 }

In [None]:
fantasy_points = data.groupby(['Player'])['Dream11_ Points'].sum()
fantasy_points.sort_values(ascending = False, inplace = True)
fantasy_points[:10].plot(x= 'Player', y = 'Dream11_ Points', kind = 'barh', colormap = 'winter_r')

In [None]:
sns.scatterplot(data=data,x="Role",y="Dream11_ Points",hue="RH/LH")

In [None]:
sns.catplot(x="Dream11_ Points",y="Role",hue="RH/LH",kind="box",data=data)

# 2) MatchResults.csv Exploration

In [None]:
df = pd.read_csv('../input/dream11-ipl2020-live/IPL2020_MatchResults.csv')
analytics = data_analytics(df)
print(analytics.missing_values(df))

In [None]:
df

# How many Matches Played in Each Stadium ?

In [None]:
number_of_matches_stadium = df['GROUND'].value_counts()
print(number_of_matches_stadium)

# How many matches have Each Team WON ?

In [None]:
number_of_wins = df['RESULT (won_by)'].value_counts()
print(number_of_wins)

# Player with Most Man of the Match

In [None]:
max_mom = df['MOM'].value_counts().head(1)
print(max_mom)

# Which Team has WON most number of Tosses ?

In [None]:
df['TOSS'].value_counts().head(1)

# Number of TIED matches so Far in Dream11 IPL 2020

In [None]:
sum(df['SUPER_OVER']==1)

# Does TOSS Winning Teams WIN Matches ?

In [None]:
count = 0
for i in range(df.shape[0]):
    if df['TOSS'][i] == df['RESULT (won_by)'][i]:
        count = count+1
print(f'Out of {df.shape[0]} matches,TOSS Winning teams has managed to WIN only {count}')

# IF You find this Notebook Useful ! Kindly UPVOTE and Show Your SUPPORT !!