# IPL Data Analysis Project

This notebook provides a comprehensive analysis of Indian Premier League (IPL) cricket data, including:
- Match statistics and team performance analysis
- Player performance metrics and insights
- Venue analysis and impact on match outcomes
- Toss decision analysis and weather impact
- Detailed analysis of Rohit Sharma's IPL career

The analysis covers multiple seasons of IPL data with detailed ball-by-ball information and match-level statistics.

In [None]:
# Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from typing import Tuple, Dict, List, Any
from datetime import datetime
from matplotlib.backends.backend_pdf import PdfPages
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

print("Libraries imported successfully!")

## Data Loading

Load the IPL datasets (matches.csv and deliveries.csv) and perform initial data validation.

In [None]:
# Data Loading Functions
def load_data(data_dir: str = "data") -> Tuple[pd.DataFrame, pd.DataFrame]:
    """Load matches and deliveries datasets."""
    matches_path = os.path.join(data_dir, "matches.csv")
    deliveries_path = os.path.join(data_dir, "deliveries.csv")
    
    matches = pd.read_csv(matches_path)
    deliveries = pd.read_csv(deliveries_path)
    
    return matches, deliveries

def get_data_info(matches: pd.DataFrame, deliveries: pd.DataFrame) -> dict:
    """Get basic information about the datasets."""
    return {
        'matches_shape': matches.shape,
        'deliveries_shape': deliveries.shape,
        'matches_nulls': matches.isnull().sum().to_dict(),
        'deliveries_nulls': deliveries.isnull().sum().to_dict()
    }

def get_sample_data(matches: pd.DataFrame, deliveries: pd.DataFrame, n: int = 5) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """Get sample data from both datasets."""
    return matches.head(n), deliveries.head(n)

# Load the data
print("Loading IPL datasets...")
matches, deliveries = load_data()

# Display data information
data_info = get_data_info(matches, deliveries)
print(f"\nMatches dataset shape: {data_info['matches_shape']}")
print(f"Deliveries dataset shape: {data_info['deliveries_shape']}")

# Display sample data
sample_matches, sample_deliveries = get_sample_data(matches, deliveries)
print("\nSample matches data:")
display(sample_matches)
print("\nSample deliveries data:")
display(sample_deliveries)

## Data Cleaning & Preprocessing

Perform any necessary data cleaning and preprocessing steps.

In [None]:
# Data Cleaning & Preprocessing
print("Performing data cleaning and preprocessing...")

# Check for missing values
print("\nMissing values in matches:")
print(matches.isnull().sum())

print("\nMissing values in deliveries:")
print(deliveries.isnull().sum())

# Basic data types check
print("\nMatches data types:")
print(matches.dtypes)

print("\nDeliveries data types:")
print(deliveries.dtypes)

# Convert date columns if needed
if 'date' in matches.columns:
    matches['date'] = pd.to_datetime(matches['date'], errors='coerce')

print("\nData preprocessing completed!")

## Exploratory Data Analysis

Perform comprehensive exploratory data analysis including match statistics, team performance, toss analysis, and player metrics.

In [None]:
# Exploratory Data Analysis Functions
def get_max_margin_victories(matches: pd.DataFrame) -> Dict[str, pd.Series]:
    """Get maximum run and wicket margin victories."""
    max_run_win = matches.loc[matches['win_by_runs'].idxmax()]
    max_wicket_win = matches.loc[matches['win_by_wickets'].idxmax()]
    
    return {
        'max_run_win': max_run_win,
        'max_wicket_win': max_wicket_win
    }

def get_city_stats(matches: pd.DataFrame) -> pd.Series:
    """Get number of matches per city."""
    return matches['city'].value_counts()

def get_team_wins(matches: pd.DataFrame) -> pd.Series:
    """Get matches won by each team."""
    return matches['winner'].value_counts()

def get_toss_analysis(matches: pd.DataFrame) -> Dict[str, Any]:
    """Analyze toss decisions and their impact."""
    toss_match_win = matches[matches['toss_winner'] == matches['winner']]
    percentage = (len(toss_match_win) / len(matches)) * 100
    
    toss_decision = matches['toss_decision'].value_counts()
    
    return {
        'toss_winner_match_winner_percentage': percentage,
        'toss_decision_distribution': toss_decision
    }

def get_top_players(matches: pd.DataFrame, deliveries: pd.DataFrame) -> Dict[str, pd.Series]:
    """Get top players by various metrics."""
    top_mom = matches['player_of_match'].value_counts().head(10)
    
    sixes = deliveries[deliveries['batsman_runs'] == 6]
    top_six_hitters = sixes['batsman'].value_counts().head(10)
    
    return {
        'top_man_of_match': top_mom,
        'top_six_hitters': top_six_hitters
    }

def get_dl_analysis(matches: pd.DataFrame) -> Dict[str, pd.Series]:
    """Analyze Duckworth-Lewis applications."""
    dl_applied = matches['dl_applied'].value_counts()
    dl_season = matches.groupby('season')['dl_applied'].sum()
    
    return {
        'dl_applied_overall': dl_applied,
        'dl_per_season': dl_season
    }

def get_defending_chasing_stats(matches: pd.DataFrame) -> Dict[str, pd.Series]:
    """Get defending and chasing statistics."""
    defending = matches[matches['win_by_runs'] > 0]
    chasing = matches[matches['win_by_wickets'] > 0]
    
    best_defending = defending['winner'].value_counts()
    best_chasing = chasing['winner'].value_counts()
    
    venue_defend = defending['venue'].value_counts()
    venue_chase = chasing['venue'].value_counts()
    
    return {
        'best_defending_teams': best_defending,
        'best_chasing_teams': best_chasing,
        'best_venues_defending': venue_defend,
        'best_venues_chasing': venue_chase
    }

def get_business_insights() -> List[str]:
    """Generate business insights from the analysis."""
    return [
        "Cities with higher match frequency indicate strong engagement potential.",
        "Toss advantage exists but is not overwhelmingly decisive.",
        "Certain teams dominate defending and chasing conditions.",
        "Venue characteristics significantly impact match outcomes.",
        "High six-hitting players contribute strongly to audience engagement."
    ]

# Perform analyses
print("Performing exploratory data analysis...")

# Maximum margin victories
max_victories = get_max_margin_victories(matches)
print("\nMaximum run margin victory:")
print(max_victories['max_run_win'][['winner', 'win_by_runs']])
print("\nMaximum wicket margin victory:")
print(max_victories['max_wicket_win'][['winner', 'win_by_wickets']])

# City statistics
city_stats = get_city_stats(matches)
print(f"\nTop 5 cities by matches:")
print(city_stats.head())

# Team wins
team_wins = get_team_wins(matches)
print(f"\nTop 5 teams by wins:")
print(team_wins.head())

# Toss analysis
toss_analysis = get_toss_analysis(matches)
print(f"\nToss winner won match percentage: {toss_analysis['toss_winner_match_winner_percentage']:.2f}%")
print("\nToss decision distribution:")
print(toss_analysis['toss_decision_distribution'])

# Top players
top_players = get_top_players(matches, deliveries)
print(f"\nTop 5 Man of Match award winners:")
print(top_players['top_man_of_match'].head())
print(f"\nTop 5 six hitters:")
print(top_players['top_six_hitters'].head())

# DL analysis
dl_analysis = get_dl_analysis(matches)
print(f"\nDL applied overall:")
print(dl_analysis['dl_applied_overall'])
print(f"\nDL applied per season:")
print(dl_analysis['dl_per_season'])

# Defending and chasing stats
defend_chase_stats = get_defending_chasing_stats(matches)
print(f"\nTop 5 defending teams:")
print(defend_chase_stats['best_defending_teams'].head())
print(f"\nTop 5 chasing teams:")
print(defend_chase_stats['best_chasing_teams'].head())

# Business insights
insights = get_business_insights()
print("\nBusiness Insights:")
for i, insight in enumerate(insights, 1):
    print(f"{i}. {insight}")

print("\nExploratory data analysis completed!")

## Visualizations

Create comprehensive visualizations for all the analysis results.

In [None]:
# Visualization Functions
def plot_max_margin_victories(max_run_win: pd.Series, max_wicket_win: pd.Series) -> None:
    """Plot maximum run and wicket margin victories."""
    # Maximum run margin
    plt.figure(figsize=(6, 4))
    plt.bar(max_run_win['winner'], max_run_win['win_by_runs'])
    plt.title("Maximum Run Margin Victory")
    plt.ylabel("Runs")
    plt.tight_layout()
    plt.show()
    
    # Maximum wicket margin
    plt.figure(figsize=(6, 4))
    plt.bar(max_wicket_win['winner'], max_wicket_win['win_by_wickets'])
    plt.title("Maximum Wicket Margin Victory")
    plt.ylabel("Wickets")
    plt.tight_layout()
    plt.show()

def plot_city_matches(city_matches: pd.Series, top_n: int = 20) -> None:
    """Plot number of matches per city."""
    plt.figure(figsize=(12, 6))
    city_matches.head(top_n).plot(kind='bar')
    plt.title(f"Top {top_n} Cities by Number of Matches")
    plt.ylabel("Matches")
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.show()

def plot_team_wins(team_wins: pd.Series) -> None:
    """Plot matches won by each team."""
    plt.figure(figsize=(12, 6))
    team_wins.plot(kind='bar')
    plt.title("Matches Won by Each Team")
    plt.ylabel("Wins")
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.show()

def plot_toss_decision(toss_decision: pd.Series) -> None:
    """Plot toss decision distribution."""
    plt.figure(figsize=(6, 6))
    toss_decision.plot(kind='pie', autopct='%1.1f%%')
    plt.title("Toss Decision Distribution")
    plt.ylabel("")
    plt.tight_layout()
    plt.show()

def plot_top_players(top_mom: pd.Series, top_six_hitters: pd.Series) -> None:
    """Plot top players by Man of Match awards and sixes."""
    # Man of Match awards
    plt.figure(figsize=(12, 6))
    top_mom.plot(kind='bar')
    plt.title("Top 10 Players - Man of the Match Awards")
    plt.ylabel("Awards")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()
    
    # Six hitters
    plt.figure(figsize=(12, 6))
    top_six_hitters.plot(kind='bar')
    plt.title("Top 10 Players with Most Sixes")
    plt.ylabel("Sixes")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

def plot_dl_analysis(dl_per_season: pd.Series) -> None:
    """Plot Duckworth-Lewis applications per season."""
    plt.figure(figsize=(12, 6))
    dl_per_season.plot(kind='bar')
    plt.title("D/L Applied Per Season")
    plt.ylabel("Count")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

def plot_defending_chasing(best_defending: pd.Series, best_chasing: pd.Series,
                          venue_defend: pd.Series, venue_chase: pd.Series) -> None:
    """Plot defending and chasing statistics."""
    # Best defending teams
    plt.figure(figsize=(12, 6))
    best_defending.head(10).plot(kind='bar')
    plt.title("Top Defending Teams")
    plt.ylabel("Wins")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()
    
    # Best chasing teams
    plt.figure(figsize=(12, 6))
    best_chasing.head(10).plot(kind='bar')
    plt.title("Top Chasing Teams")
    plt.ylabel("Wins")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()
    
    # Best venues for defending
    plt.figure(figsize=(12, 6))
    venue_defend.head(10).plot(kind='bar')
    plt.title("Best Venues for Defending")
    plt.ylabel("Matches")
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.show()
    
    # Best venues for chasing
    plt.figure(figsize=(12, 6))
    venue_chase.head(10).plot(kind='bar')
    plt.title("Best Venues for Chasing")
    plt.ylabel("Matches")
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.show()

# Create all visualizations
print("Creating visualizations...")

# Maximum margin victories
plot_max_margin_victories(max_victories['max_run_win'], max_victories['max_wicket_win'])

# City matches
plot_city_matches(city_stats)

# Team wins
plot_team_wins(team_wins)

# Toss decision
plot_toss_decision(toss_analysis['toss_decision_distribution'])

# Top players
plot_top_players(top_players['top_man_of_match'], top_players['top_six_hitters'])

# DL analysis
plot_dl_analysis(dl_analysis['dl_per_season'])

# Defending and chasing
plot_defending_chasing(
    defend_chase_stats['best_defending_teams'],
    defend_chase_stats['best_chasing_teams'],
    defend_chase_stats['best_venues_defending'],
    defend_chase_stats['best_venues_chasing']
)

print("All visualizations created successfully!")

## Rohit Sharma Analysis

Perform comprehensive analysis of Rohit Sharma's IPL career including performance metrics, consistency analysis, and career insights.

In [None]:
# Rohit Sharma Analysis Functions
def get_rohit_sharma_career_analysis(matches: pd.DataFrame, deliveries: pd.DataFrame) -> Dict:
    """Comprehensive career analysis of Rohit Sharma."""
    # Filter Rohit Sharma data
    rohit_deliveries = deliveries[deliveries['batsman'] == 'RG Sharma']
    rohit_matches = matches[matches.apply(lambda x: 'RG Sharma' in str(x['player_of_match']) or 
                                         _did_rohit_play_in_match(x['id'], rohit_deliveries), axis=1)]
    
    # Career span
    career_seasons = sorted(rohit_matches['season'].unique())
    total_seasons = len(career_seasons)
    
    # Basic statistics
    total_matches = len(rohit_matches)
    total_runs = int(rohit_deliveries['batsman_runs'].sum())
    total_balls = len(rohit_deliveries[rohit_deliveries['wide_runs'] == 0])
    strike_rate = (total_runs / total_balls * 100) if total_balls > 0 else 0
    
    # Dismissal analysis
    dismissals = rohit_deliveries[rohit_deliveries['player_dismissed'] == 'RG Sharma']
    dismissal_modes = dismissals['dismissal_kind'].value_counts()
    
    # Season-wise performance
    season_performance = _get_rohit_season_performance(rohit_deliveries, rohit_matches)
    
    # Team-wise performance
    team_performance = _get_rohit_team_performance(rohit_deliveries, rohit_matches)
    
    # Venue-wise performance
    venue_performance = _get_rohit_venue_performance(rohit_deliveries, rohit_matches)
    
    # Innings-wise performance
    innings_performance = _get_rohit_innings_performance(rohit_deliveries)
    
    # Powerplay and death overs performance
    phase_performance = _get_rohit_phase_performance(rohit_deliveries)
    
    # Captaincy analysis
    captaincy_matches = rohit_matches[rohit_matches['player_of_match'] == 'RG Sharma']
    captaincy_wins = len(captaincy_matches[captaincy_matches.apply(lambda x: _did_rohit_team_win(x), axis=1)])
    
    # High scores and milestones
    match_scores = _get_rohit_match_scores(rohit_deliveries)
    high_scores = sorted(match_scores, reverse=True)[:10]
    centuries = len([s for s in match_scores if s >= 100])
    half_centuries = len([s for s in match_scores if 50 <= s < 100])
    
    # Consistency analysis
    consistency_metrics = _calculate_rohit_consistency(match_scores)
    
    return {
        'career_span': {
            'seasons': career_seasons,
            'total_seasons': total_seasons,
            'total_matches': total_matches
        },
        'basic_stats': {
            'total_runs': total_runs,
            'total_balls': total_balls,
            'strike_rate': round(strike_rate, 2),
            'average': round(total_runs / len(dismissals), 2) if len(dismissals) > 0 else 0
        },
        'dismissal_analysis': dismissal_modes.to_dict(),
        'season_performance': season_performance,
        'team_performance': team_performance,
        'venue_performance': venue_performance,
        'innings_performance': innings_performance,
        'phase_performance': phase_performance,
        'captaincy': {
            'mom_awards': len(captaincy_matches),
            'wins_as_captain': captaincy_wins,
            'win_rate_as_captain': round((captaincy_wins / len(captaincy_matches) * 100), 2) if len(captaincy_matches) > 0 else 0
        },
        'milestones': {
            'highest_scores': high_scores,
            'centuries': centuries,
            'half_centuries': half_centuries
        },
        'consistency': consistency_metrics
    }

def _did_rohit_play_in_match(match_id: int, rohit_deliveries: pd.DataFrame) -> bool:
    """Check if Rohit Sharma played in a specific match."""
    return match_id in rohit_deliveries['match_id'].unique()

def _did_rohit_team_win(match_row: pd.Series) -> bool:
    """Check if Rohit's team won the match."""
    return True  # Placeholder logic

def _get_rohit_season_performance(rohit_deliveries: pd.DataFrame, rohit_matches: pd.DataFrame) -> Dict:
    """Get Rohit's performance season by season."""
    season_stats = {}
    
    for season in sorted(rohit_matches['season'].unique()):
        season_matches = rohit_matches[rohit_matches['season'] == season]
        season_match_ids = season_matches['id'].tolist()
        season_deliveries = rohit_deliveries[rohit_deliveries['match_id'].isin(season_match_ids)]
        
        runs = int(season_deliveries['batsman_runs'].sum())
        balls = len(season_deliveries[season_deliveries['wide_runs'] == 0])
        dismissals = len(season_deliveries[season_deliveries['player_dismissed'] == 'RG Sharma'])
        
        season_stats[season] = {
            'runs': runs,
            'balls': balls,
            'dismissals': dismissals,
            'average': round(runs / dismissals, 2) if dismissals > 0 else 0,
            'strike_rate': round((runs / balls * 100), 2) if balls > 0 else 0,
            'matches': len(season_matches)
        }
    
    return season_stats

def _get_rohit_team_performance(rohit_deliveries: pd.DataFrame, rohit_matches: pd.DataFrame) -> Dict:
    """Get Rohit's performance for different teams."""
    team_stats = {}
    
    teams = rohit_deliveries['batting_team'].unique()
    
    for team in teams:
        team_deliveries = rohit_deliveries[rohit_deliveries['batting_team'] == team]
        team_match_ids = team_deliveries['match_id'].unique()
        team_matches = rohit_matches[rohit_matches['id'].isin(team_match_ids)]
        
        runs = int(team_deliveries['batsman_runs'].sum())
        balls = len(team_deliveries[team_deliveries['wide_runs'] == 0])
        dismissals = len(team_deliveries[team_deliveries['player_dismissed'] == 'RG Sharma'])
        
        team_stats[team] = {
            'runs': runs,
            'balls': balls,
            'dismissals': dismissals,
            'average': round(runs / dismissals, 2) if dismissals > 0 else 0,
            'strike_rate': round((runs / balls * 100), 2) if balls > 0 else 0,
            'matches': len(team_matches)
        }
    
    return team_stats

def _get_rohit_venue_performance(rohit_deliveries: pd.DataFrame, rohit_matches: pd.DataFrame) -> Dict:
    """Get Rohit's performance at different venues."""
    venue_stats = {}
    
    venue_matches = rohit_matches.dropna(subset=['venue'])
    
    for venue in venue_matches['venue'].unique():
        venue_match_ids = venue_matches[venue_matches['venue'] == venue]['id'].tolist()
        venue_deliveries = rohit_deliveries[rohit_deliveries['match_id'].isin(venue_match_ids)]
        
        if len(venue_deliveries) > 0:
            runs = int(venue_deliveries['batsman_runs'].sum())
            balls = len(venue_deliveries[venue_deliveries['wide_runs'] == 0])
            dismissals = len(venue_deliveries[venue_deliveries['player_dismissed'] == 'RG Sharma'])
            
            venue_stats[venue] = {
                'runs': runs,
                'balls': balls,
                'dismissals': dismissals,
                'average': round(runs / dismissals, 2) if dismissals > 0 else 0,
                'strike_rate': round((runs / balls * 100), 2) if balls > 0 else 0,
                'matches': len(venue_match_ids)
            }
    
    return venue_stats

def _get_rohit_innings_performance(rohit_deliveries: pd.DataFrame) -> Dict:
    """Get Rohit's performance in different innings."""
    innings_stats = {}
    
    for inning in [1, 2]:
        inning_deliveries = rohit_deliveries[rohit_deliveries['inning'] == inning]
        
        runs = int(inning_deliveries['batsman_runs'].sum())
        balls = len(inning_deliveries[inning_deliveries['wide_runs'] == 0])
        dismissals = len(inning_deliveries[inning_deliveries['player_dismissed'] == 'RG Sharma'])
        
        innings_stats[f'inning_{inning}'] = {
            'runs': runs,
            'balls': balls,
            'dismissals': dismissals,
            'average': round(runs / dismissals, 2) if dismissals > 0 else 0,
            'strike_rate': round((runs / balls * 100), 2) if balls > 0 else 0
        }
    
    return innings_stats

def _get_rohit_phase_performance(rohit_deliveries: pd.DataFrame) -> Dict:
    """Get Rohit's performance in different phases of the game."""
    phase_stats = {
        'powerplay': {'runs': 0, 'balls': 0, 'wickets': 0},
        'middle': {'runs': 0, 'balls': 0, 'wickets': 0},
        'death': {'runs': 0, 'balls': 0, 'wickets': 0}
    }
    
    for _, delivery in rohit_deliveries.iterrows():
        over = delivery['over']
        if over <= 6:
            phase = 'powerplay'
        elif over <= 15:
            phase = 'middle'
        else:
            phase = 'death'
        
        if delivery['wide_runs'] == 0:
            phase_stats[phase]['balls'] += 1
            phase_stats[phase]['runs'] += delivery['batsman_runs']
        
        if delivery['player_dismissed'] == 'RG Sharma':
            phase_stats[phase]['wickets'] += 1
    
    # Calculate strike rates
    for phase in phase_stats:
        if phase_stats[phase]['balls'] > 0:
            phase_stats[phase]['strike_rate'] = round(
                (phase_stats[phase]['runs'] / phase_stats[phase]['balls'] * 100), 2
            )
        else:
            phase_stats[phase]['strike_rate'] = 0
            
        if phase_stats[phase]['wickets'] > 0:
            phase_stats[phase]['average'] = round(
                (phase_stats[phase]['runs'] / phase_stats[phase]['wickets']), 2
            )
        else:
            phase_stats[phase]['average'] = 0
    
    return phase_stats

def _get_rohit_match_scores(rohit_deliveries: pd.DataFrame) -> List[int]:
    """Get Rohit's scores in each match."""
    match_scores = []
    
    for match_id in rohit_deliveries['match_id'].unique():
        match_deliveries = rohit_deliveries[rohit_deliveries['match_id'] == match_id]
        score = int(match_deliveries['batsman_runs'].sum())
        match_scores.append(score)
    
    return match_scores

def _calculate_rohit_consistency(match_scores: List[int]) -> Dict:
    """Calculate consistency metrics for Rohit's performance."""
    if not match_scores:
        return {}
    
    import numpy as np
    
    scores_array = np.array(match_scores)
    
    return {
        'mean_score': round(np.mean(scores_array), 2),
        'median_score': round(np.median(scores_array), 2),
        'std_deviation': round(np.std(scores_array), 2),
        'coefficient_of_variation': round((np.std(scores_array) / np.mean(scores_array) * 100), 2) if np.mean(scores_array) > 0 else 0,
        'scores_above_50': len([s for s in match_scores if s >= 50]),
        'scores_above_30': len([s for s in match_scores if s >= 30]),
        'ducks': len([s for s in match_scores if s == 0]),
        'total_innings': len(match_scores)
    }

# Perform Rohit Sharma analysis
print("Performing Rohit Sharma career analysis...")
rohit_analysis = get_rohit_sharma_career_analysis(matches, deliveries)

# Display results
print(f"\nRohit Sharma Career Summary:")
print(f"Career Span: {rohit_analysis['career_span']['seasons'][0]} - {rohit_analysis['career_span']['seasons'][-1]}")
print(f"Total Seasons: {rohit_analysis['career_span']['total_seasons']}")
print(f"Total Matches: {rohit_analysis['career_span']['total_matches']}")
print(f"Total Runs: {rohit_analysis['basic_stats']['total_runs']}")
print(f"Strike Rate: {rohit_analysis['basic_stats']['strike_rate']}")
print(f"Average: {rohit_analysis['basic_stats']['average']}")
print(f"Centuries: {rohit_analysis['milestones']['centuries']}")
print(f"Half-centuries: {rohit_analysis['milestones']['half_centuries']}")
print(f"Man of Match Awards: {rohit_analysis['captaincy']['mom_awards']}")

print(f"\nSeason-wise Performance (Last 5 seasons):")
recent_seasons = list(rohit_analysis['season_performance'].keys())[-5:]
for season in recent_seasons:
    stats = rohit_analysis['season_performance'][season]
    print(f"{season}: {stats['runs']} runs @ {stats['strike_rate']} SR, {stats['matches']} matches")

print("\nRohit Sharma analysis completed!")

## Final Report & Insights

Generate comprehensive reports and final insights from all the analysis performed.

In [None]:
# Final Report Generation
def generate_comprehensive_report(analysis_results: Dict[str, Any], output_dir: str = "output") -> str:
    """Generate a comprehensive text report."""
    
    os.makedirs(output_dir, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    txt_path = os.path.join(output_dir, f"ipl_analysis_text_{timestamp}.txt")
    
    with open(txt_path, 'w', encoding='utf-8') as f:
        f.write("IPL DATA ANALYSIS REPORT\n")
        f.write("=" * 50 + "\n\n")
        f.write(f"Generated on: {datetime.now().strftime('%B %d, %Y %H:%M:%S')}\n\n")
        
        # Write all analysis results
        for key, value in analysis_results.items():
            f.write(f"{key.upper()}\n")
            f.write("-" * 30 + "\n")
            f.write(f"{value}\n\n")
    
    return txt_path

# Prepare analysis results for report
print("Preparing analysis results for final report...")

analysis_results = {
    'total_matches': len(matches),
    'total_deliveries': len(deliveries),
    'total_teams': len(matches['team1'].unique()),
    'total_cities': len(matches['city'].unique()),
    'seasons_covered': len(matches['season'].unique()),
    'matches_shape': matches.shape,
    'deliveries_shape': deliveries.shape,
    'toss_advantage_pct': f"{toss_analysis['toss_winner_match_winner_percentage']:.2f}",
    'max_run_margin_team': max_victories['max_run_win']['winner'],
    'max_run_margin': max_victories['max_run_win']['win_by_runs'],
    'max_wicket_margin_team': max_victories['max_wicket_win']['winner'],
    'max_wicket_margin': max_victories['max_wicket_win']['win_by_wickets'],
    'top_winning_team': team_wins.index[0],
    'top_team_wins': team_wins.iloc[0],
    'top_mom_player': top_players['top_man_of_match'].index[0],
    'top_mom_count': top_players['top_man_of_match'].iloc[0],
    'top_six_hitter': top_players['top_six_hitters'].index[0],
    'top_six_count': top_players['top_six_hitters'].iloc[0],
    'business_insights': insights,
    'preferred_toss_decision': toss_analysis['toss_decision_distribution'].index[0],
    'dl_applied_count': dl_analysis['dl_applied_overall'].get(1, 0),
    'rohit_sharma_analysis': rohit_analysis
}

# Generate text report
txt_path = generate_comprehensive_report(analysis_results)

# Display final summary
print("\n" + "=" * 60)
print("IPL DATA ANALYSIS COMPLETED!")
print("=" * 60)
print(f"\nDataset Summary:")
print(f"• Total Matches: {len(matches)}")
print(f"• Total Deliveries: {len(deliveries)}")
print(f"• Total Teams: {len(matches['team1'].unique())}")
print(f"• Seasons Covered: {len(matches['season'].unique())}")
print(f"\nKey Findings:")
print(f"• Top Team: {team_wins.index[0]} ({team_wins.iloc[0]} wins)")
print(f"• Toss Advantage: {toss_analysis['toss_winner_match_winner_percentage']:.1f}%")
print(f"• Most Man of Match: {top_players['top_man_of_match'].index[0]} ({top_players['top_man_of_match'].iloc[0]} awards)")
print(f"\nRohit Sharma Highlights:")
print(f"• Total Runs: {rohit_analysis['basic_stats']['total_runs']:,}")
print(f"• Strike Rate: {rohit_analysis['basic_stats']['strike_rate']}")
print(f"• Centuries: {rohit_analysis['milestones']['centuries']}")
print(f"• Man of Match Awards: {rohit_analysis['captaincy']['mom_awards']}")
print(f"\nReport Generated: {txt_path}")
print("=" * 60)