# Cricket Fielding Analysis 

In [10]:
# creating a Cricket Fielding Analysis by using the IPL sample data.xlsx .

In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from matplotlib.ticker import MaxNLocator
import warnings


warnings.filterwarnings("ignore", category=FutureWarning)

WEIGHTS = {
    'Clean Pick': 1,
    'Good Throw': 1,
    'Catch': 3,
    'Dropped Catch': -3,
    'Stumping': 3,
    'Run Out': 3,
    'Missed Runout': -2,
    'Direct Hit': 2
}

ACTION_COLORS = {
    'Clean Pick': '#1f77b4',
    'Good Throw': '#ff7f0e',
    'Catch': '#2ca02c',
    'Dropped Catch': '#d62728',
    'Stumping': '#9467bd',
    'Run Out': '#8c564b',
    'Missed Runout': '#e377c2',
    'Direct Hit': '#7f7f7f',
    'Runs Saved': '#17becf'
}


def map_pick(code):
    """Map pick codes to descriptive actions"""
    if pd.isna(code):
        return 'No Action'
    code = str(code).strip().upper()
    mapping = {
        'Y': 'Clean Pick',
        'N': 'Fumble',
        'C': 'Catch',
        'DC': 'Dropped Catch',
        'S': 'Stumping',
        'F': 'Fumble'
    }
    return mapping.get(code, 'No Action')

def map_throw(code):
    """Map throw codes to descriptive actions"""
    if pd.isna(code):
        return 'No Throw'
    code = str(code).strip().upper()
    mapping = {
        'Y': 'Good Throw',
        'N': 'Bad Throw',
        'DH': 'Direct Hit',
        'RO': 'Run Out',
        'MR': 'Missed Runout'
    }
    return mapping.get(code, 'No Throw')

def safe_float(value):
    """Safely convert values to float handling missing/empty values"""
    if pd.isna(value) or value == '':
        return 0.0
    try:
        return float(value)
    except (ValueError, TypeError):
        return 0.0


def calculate_performance_score(player_data):
    """Calculate performance metrics and score for a player"""
    metrics = {action: 0 for action in WEIGHTS}
    metrics['Runs Saved'] = 0.0

    for _, row in player_data.iterrows():
       
        pick_action = map_pick(row['Pick'])
        if pick_action in WEIGHTS:
            metrics[pick_action] += 1
        
        
        throw_action = map_throw(row['Throw'])
        if throw_action in WEIGHTS:
            metrics[throw_action] += 1
        
        
        runs_value = safe_float(row.get('Runs', 0))
        metrics['Runs Saved'] += runs_value
   
    ps = sum(metrics[action] * weight for action, weight in WEIGHTS.items())
    ps += metrics['Runs Saved']
    metrics['Performance Score'] = ps
    
    return pd.Series(metrics)

def plot_performance_comparison(performance_matrix, raw_data):
    """Create visualizations comparing player performance"""
    os.makedirs('analysis_figures', exist_ok=True)
    
   
    sorted_df = performance_matrix.sort_values('Performance Score', ascending=False)
    

    plt.figure(figsize=(12, 8))
    ax = sns.barplot(x='Performance Score', y=sorted_df.index, 
                     data=sorted_df.reset_index(),
                     palette='viridis', edgecolor='black',
                     hue=sorted_df.index, legend=False)
    
    
    for i, score in enumerate(sorted_df['Performance Score']):
        ax.text(score + 0.2, i, f"{score:.1f}", 
                va='center', fontsize=10, fontweight='bold')
    
    plt.title('Fielding Performance Score Comparison', fontsize=16, pad=20)
    plt.xlabel('Performance Score', fontsize=12)
    plt.ylabel('Player', fontsize=12)
    plt.tight_layout()
    plt.savefig('analysis_figures/performance_score_comparison.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    
    plt.figure(figsize=(14, 8))
    breakdown_df = sorted_df.drop(['Runs Saved', 'Performance Score'], axis=1)
    
    
    cumulative = np.zeros(len(breakdown_df))
    
   
    for i, action in enumerate(breakdown_df.columns):
        values = breakdown_df[action] * WEIGHTS.get(action, 1)
        plt.barh(breakdown_df.index, values, left=cumulative,
                 color=ACTION_COLORS.get(action, '#7f7f7f'), 
                 edgecolor='white', label=action)
        cumulative += values
    
    plt.title('Fielding Performance Breakdown', fontsize=16, pad=20)
    plt.xlabel('Points Contribution', fontsize=12)
    plt.ylabel('Player', fontsize=12)
    plt.legend(loc='lower right')
    plt.gca().xaxis.set_major_locator(MaxNLocator(integer=True))
    plt.tight_layout()
    plt.savefig('analysis_figures/performance_breakdown.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    
    if 'Position' in raw_data.columns:
        positional_counts = raw_data.groupby(['Player Name', 'Position']).size().unstack().fillna(0)
        plt.figure(figsize=(12, 8))
        sns.heatmap(positional_counts, annot=True, fmt='g', cmap='YlGnBu', 
                    linewidths=0.5, cbar_kws={'label': 'Fielding Opportunities'})
        plt.title('Fielding Position Effectiveness', fontsize=16, pad=20)
        plt.xlabel('Fielding Position', fontsize=12)
        plt.ylabel('Player', fontsize=12)
        plt.tight_layout()
        plt.savefig('analysis_figures/positional_effectiveness.png', dpi=300, bbox_inches='tight')
        plt.close()


def process_fielding_data(file_path):
    """Process fielding data and generate performance analysis"""
    # Read and clean data
    try:
        df = pd.read_excel(file_path, sheet_name='Sheet1', skiprows=5, nrows=11)
    except Exception as e:
        print(f"Error reading Excel file: {e}")
        return None, None
    
    
    num_columns = len(df.columns)
    expected_columns = [
        'Match No.', 'Innings', 'Teams', 'Player Name', 'BallCount',
        'Position', 'Pick', 'Throw', 'Runs', 'Overcount', 'Venue', 'Stadium'
    ]
    
    
    if num_columns == 13:
        
        df = df.iloc[:, 1:]  
        df.columns = expected_columns
    elif num_columns == 12:
        df.columns = expected_columns
    else:
        print(f"Unexpected number of columns: {num_columns}")
       
        df.columns = [f'Col_{i}' for i in range(num_columns)]
      
        if num_columns > 7:
            df = df.rename(columns={
                df.columns[1]: 'Match No.',
                df.columns[2]: 'Innings',
                df.columns[3]: 'Teams',
                df.columns[4]: 'Player Name',
                df.columns[5]: 'BallCount',
                df.columns[6]: 'Position',
                df.columns[7]: 'Pick',
                df.columns[8]: 'Throw',
                df.columns[9]: 'Runs',
                df.columns[10]: 'Overcount',
                df.columns[11]: 'Venue'
            })
    
    
    df = df[df['Player Name'].notna() & (df['Player Name'] != '')]
    df['Runs'] = df['Runs'].apply(safe_float)  # Convert all Runs to float
    
   
    df['BallCount'] = pd.to_numeric(df['BallCount'], errors='coerce')
    
   
    player_stats = []
    for player, data in df.groupby('Player Name'):
        player_stats.append(calculate_performance_score(data).rename(player))
    
    performance_matrix = pd.DataFrame(player_stats)
    
    
    plot_performance_comparison(performance_matrix, df)
    
    
    try:
        with pd.ExcelWriter('fielding_analysis_report.xlsx') as writer:
            df.to_excel(writer, sheet_name='Ball-by-Ball Data', index=False)
            performance_matrix.to_excel(writer, sheet_name='Performance Matrix')
            
            
            summary = performance_matrix.copy()
            summary['Overall Rank'] = summary['Performance Score'].rank(ascending=False).astype(int)
            summary.to_excel(writer, sheet_name='Summary')
    except Exception as e:
        print(f"Error saving results: {e}")
    
    return df, performance_matrix


if __name__ == "__main__":
    print("Starting Fielding Performance Analysis...")
    
    raw_data, performance_matrix = process_fielding_data('IPL sample data.xlsx')
    
    if raw_data is not None and performance_matrix is not None:
        
        print("\n=== Ball-by-Ball Fielding Data ===")
        print(raw_data[['Player Name', 'BallCount', 'Position', 'Pick', 'Throw', 'Runs']].head())
        
        print("\n=== Calculated Performance Metrics ===")
        print(performance_matrix)
        
        
        if not performance_matrix['Performance Score'].isna().all():
            top_player = performance_matrix['Performance Score'].idxmax()
            top_score = performance_matrix.loc[top_player, 'Performance Score']
            most_catches = performance_matrix['Catch'].idxmax()
            
            print("\n=== Key Insights ===")
            print(f"1. Top Performer: {top_player} (Score: {top_score:.1f})")
            print(f"2. Best Catcher: {most_catches} ({performance_matrix.loc[most_catches, 'Catch']} catches)")
            print(f"3. Total Runs Saved: {performance_matrix['Runs Saved'].sum():.1f}")
            print("4. Visualization reports saved to 'analysis_figures/' directory")
            print("5. Full analysis report saved to 'fielding_analysis_report.xlsx'")
        else:
            print("\n=== Key Insights ===")
            print("No valid performance scores calculated. Check input data.")
    
    print("\nAnalysis complete!")

Starting Fielding Performance Analysis...

=== Ball-by-Ball Fielding Data ===
   Player Name  BallCount       Position Pick Throw  Runs
0    Phil Salt        0.2  wicket keeper    Y     Y   0.0
1   Yash Dhull        0.3         covers    Y     Y   0.0
2   Axer Patel        0.4          point    Y     Y   0.0
4  Lalit yadav        0.6    cover point    Y     Y   0.0
5    Aman Khan        1.1       long off    Y     Y   0.0

=== Calculated Performance Metrics ===
               Clean Pick  Good Throw  Catch  Dropped Catch  Stumping  \
Aman Khan             1.0         1.0    0.0            0.0       0.0   
Axer Patel            1.0         1.0    0.0            0.0       0.0   
Kuldeep yadav         2.0         1.0    0.0            0.0       0.0   
Lalit yadav           2.0         1.0    0.0            0.0       0.0   
Phil Salt             1.0         1.0    0.0            0.0       0.0   
Yash Dhull            1.0         1.0    0.0            0.0       0.0   

               Run Out