In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

%matplotlib inline

In [None]:
players = pd.read_excel('../input/ipl-data-set/Players.xlsx')
matches = pd.read_csv('../input/ipl-data-set/matches.csv')

In [None]:
players.head(2)

In [None]:
players.info()

## Now clean the data

#### drop columns with maximum number of null values

In [None]:
matches.drop("umpire3", axis=1, inplace=True)

#### drop rows with nan values

In [None]:
matches.dropna(inplace=True)

In [None]:
players.dropna(inplace=True)

#### set index to id columns

In [None]:
matches.set_index("id", drop=True, inplace=True)

In [None]:
matches.Season = matches.Season.str.replace('IPL-', '').astype('int')

In [None]:
players.Batting_Hand = players.Batting_Hand.str.replace("_Hand", "")

#### Change date column to date format

In [None]:
matches.date

In [None]:
matches['date']= pd.to_datetime(matches['date'])

## Analysing the data

In [None]:
players.head()

#### Most players per country

In [None]:
players_per_country = players.Country.value_counts()
players_per_country

In [None]:
plt.figure(figsize=(10,5))
sns.set_style("whitegrid")
sns.set_style("ticks", {"xtick.major.size": 8, "ytick.major.size": 8})

sns.countplot(x='Country', data=players, palette='dark')
plt.xticks(rotation=75)
plt.title('Countries vs Players')
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(10,5))
sns.set_style("whitegrid")
sns.set_style("ticks", {"xtick.major.size": 8, "ytick.major.size": 8})

sns.countplot(x='Country', data=players, hue='Batting_Hand', palette='coolwarm')
plt.xticks(rotation=75)
plt.grid()
plt.show()

### Visualize right hand vs left hand batsman

In [None]:
players.Batting_Hand.value_counts()

In [None]:
plt.figure(figsize=(10,5))
sns.set_style("whitegrid")

sns.countplot(x='Batting_Hand', data=players, palette='cubehelix')
plt.title('Right vs Left hand Batsmen')

In [None]:
players.head()

In [None]:
plt.figure(figsize=(10,6))
players.DOB.hist(bins=60, color='purple')
plt.title('Players born per year')

In [None]:
sns.catplot(x='Bowling_Skill', y='DOB', hue='Batting_Hand', data=players, kind='swarm', aspect=2)
plt.xticks(rotation=75)
plt.title('Bowling skills vs Player born')
plt.show()

In [None]:
sns.catplot(x='Batting_Hand', y='DOB', hue='Country', data=players, kind='swarm', aspect=2)
plt.xticks(rotation=75)
plt.title('Batting Skills vs Players born')
plt.show()

In [None]:
matches.head(3)

#### Most win by any team per year

In [None]:
groupby_year = matches.groupby('Season')['winner'].value_counts()
groupby_year

In [None]:
plt.figure(figsize=(10,6))
groupby_year.loc[2019].plot(kind='bar')
plt.xlabel(None)
plt.ylabel('Number of Matches')
plt.title('Wins by Teams in year 2008', size=18)
plt.tight_layout()

#### Most matches win by teams in all seasons

In [None]:
matches.winner.value_counts()

In [None]:
plt.figure(figsize=(10,6))
matches.winner.value_counts().plot(kind='bar', color='orange')
plt.xlabel(None)
plt.ylabel('Number of Matches')
plt.title('Wins by Teams in all Seasons', size=18)
plt.tight_layout()