# IPL 2008â€“2024 Analysis Project

In [None]:
# 1. Imports & Data Loading
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Settings for plots
sns.set(style="darkgrid")
plt.rcParams['figure.figsize'] = (12, 6)

# Load datasets
matches = pd.read_csv('matches.csv')
deliveries = pd.read_csv('deliveries.csv')

# Display first few rows
print("Matches dataset:")
display(matches.head())
print("Deliveries dataset:")
display(deliveries.head())

In [None]:
# 2. Data Cleaning
print("Missing values in matches:")
print(matches.isnull().sum())
print("Missing values in deliveries:")
print(deliveries.isnull().sum())

# Convert date column
matches['date'] = pd.to_datetime(matches['date'])

In [None]:
# 3. Match Analysis
# Head-to-Head
head_to_head = matches.groupby(['team1', 'team2'])['winner'].value_counts().unstack(fill_value=0)

# Recent Form
recent_form = matches.sort_values(by='date').groupby('team1').tail(5)

In [None]:
# 4. Team Analysis
team_wins = matches['winner'].value_counts()
sns.barplot(x=team_wins.values, y=team_wins.index)
plt.title('Total Wins by Teams')
plt.xlabel('Wins')
plt.ylabel('Teams')
plt.show()

In [None]:
# 5. Player Analysis
top_batsmen = deliveries.groupby('batsman')['batsman_runs'].sum().sort_values(ascending=False).head(10)
sns.barplot(x=top_batsmen.values, y=top_batsmen.index)
plt.title('Top 10 Run Scorers')
plt.xlabel('Runs')
plt.ylabel('Batsmen')
plt.show()

wickets = deliveries[deliveries['dismissal_kind'].notnull()]
top_bowlers = wickets['bowler'].value_counts().head(10)
sns.barplot(x=top_bowlers.values, y=top_bowlers.index)
plt.title('Top 10 Wicket Takers')
plt.xlabel('Wickets')
plt.ylabel('Bowlers')
plt.show()

In [None]:
# 6. Run Rate by Phase
conditions = [
    (deliveries['over'] <= 6),
    (deliveries['over'] > 6) & (deliveries['over'] <= 15),
    (deliveries['over'] > 15)
]
choices = ['Powerplay', 'Middle Overs', 'Death Overs']
deliveries['phase'] = np.select(conditions, choices)
phase_runrate = deliveries.groupby('phase')['total_runs'].mean()
sns.barplot(x=phase_runrate.index, y=phase_runrate.values)
plt.title('Average Run Rate by Phase')
plt.xlabel('Match Phase')
plt.ylabel('Avg Runs per Over')
plt.show()