In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 🏏 Cricket Fielding Performance Analysis\n",
    "## ShadowFox Data Science Internship - Main Analysis Notebook\n",
    "\n",
    "This notebook performs comprehensive fielding performance analysis using IPL data, calculating performance scores, generating insights, and creating strategic recommendations."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import required libraries\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import sys\n",
    "import os\n",
    "from scipy.stats import pearsonr\n",
    "\n",
    "# Add src to path for project modules\n",
    "sys.path.append('../src')\n",
    "\n",
    "# Import project modules\n",
    "from data_loader import FieldingDataLoader\n",
    "from performance_calculator import PerformanceCalculator\n",
    "from visualizations import FieldingVisualizer\n",
    "from analysis_tools import FieldingAnalyzer, analyze_fielding_performance\n",
    "\n",
    "# Setup visualization\n",
    "plt.style.use('seaborn-v0_8-whitegrid')\n",
    "sns.set_palette('viridis')\n",
    "%matplotlib inline\n",
    "\n",
    "print(\"✅ All libraries and modules imported successfully!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Data Preparation and Loading"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize all analysis components\n",
    "loader = FieldingDataLoader()\n",
    "calculator = PerformanceCalculator()\n",
    "visualizer = FieldingVisualizer()\n",
    "analyzer = FieldingAnalyzer()\n",
    "\n",
    "# Load and prepare data\n",
    "df_raw = loader.create_sample_dataset()\n",
    "df = loader.clean_fielding_data(df_raw)\n",
    "\n",
    "print(\"📊 DATASET LOADED SUCCESSFULLY\")\n",
    "print(\"=\" * 50)\n",
    "print(f\"Team: {df['team'].iloc[0]}\")\n",
    "print(f\"Players Analyzed: {len(df)}\")\n",
    "print(f\"Match: {df['match_no'].iloc[0]} at {df['venue'].iloc[0]}\")\n",
    "print(f\"\\nPlayer List: {', '.join(df['player_name'].tolist())}\")\n",
    "\n",
    "# Display the prepared data\n",
    "print(\"\\n📋 PREPARED DATA OVERVIEW:\")\n",
    "display(df.head())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Performance Score Calculation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Calculate performance scores for all players\n",
    "df_scored = calculator.calculate_all_scores(df)\n",
    "\n",
    "print(\"🧮 PERFORMANCE SCORES CALCULATED\")\n",
    "print(\"=\" * 50)\n",
    "print(\"Performance Formula: PS = (CP×1) + (GT×1) + (C×3) + (DC×-3) + \")\n",
    "print(\"                     (ST×3) + (RO×3) + (MRO×-2) + (DH×2) + RS\")\n",
    "\n",
    "# Display scores sorted by performance\n",
    "scores_display = df_scored[['player_name', 'performance_score', 'positive_contributions', \n",
    "                          'negative_contributions', 'net_contribution', 'player_role']].sort_values('performance_score', ascending=False)\n",
    "\n",
    "print(\"\\n🎯 PERFORMANCE SCORES (Sorted):\")\n",
    "display(scores_display)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Validate calculations against expected values\n",
    "validation_results = calculator.validate_calculations(df_scored)\n",
    "\n",
    "print(\"🔍 CALCULATION VALIDATION\")\n",
    "print(\"=\" * 50)\n",
    "print(\"Comparing calculated scores with expected values:\")\n",
    "display(validation_results)\n",
    "\n",
    "# Check if all calculations are correct\n",
    "all_correct = (validation_results['status'] == '✅ PASS').all()\n",
    "validation_status = '✅ ALL CALCULATIONS CORRECT' if all_correct else '❌ VALIDATION FAILED'\n",
    "print(f\"\\nValidation Status: {validation_status}\")\n",
    "\n",
    "if all_correct:\n",
    "    print(\"🎉 Performance scores match expected values perfectly!\")\n",
    "else:\n",
    "    print(\"⚠️  Some score calculations need review\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Comprehensive Visualizations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3.1 Performance Scores Visualization\n",
    "print(\"📈 CREATING PERFORMANCE SCORES CHART...\")\n",
    "performance_fig = visualizer.plot_performance_scores(df_scored)\n",
    "print(\"✅ Performance scores visualization completed\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3.2 Positive vs Negative Contributions\n",
    "print(\"📊 ANALYZING CONTRIBUTIONS...\")\n",
    "contributions_fig = visualizer.plot_positive_negative_contributions(df_scored)\n",
    "print(\"✅ Contributions analysis completed\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3.3 Runs Saved Analysis\n",
    "print(\"💰 ANALYZING RUNS SAVED...\")\n",
    "runs_saved_fig = visualizer.plot_runs_saved_analysis(df_scored)\n",
    "print(\"✅ Runs saved analysis completed\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3.4 Correlation Analysis\n",
    "print(\"🔗 ANALYZING CORRELATIONS...\")\n",
    "correlation_fig = visualizer.create_correlation_heatmap(df_scored)\n",
    "print(\"✅ Correlation analysis completed\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3.5 Comprehensive Dashboard\n",
    "print(\"📊 CREATING COMPREHENSIVE DASHBOARD...\")\n",
    "dashboard_fig = visualizer.create_comprehensive_dashboard(df_scored)\n",
    "print(\"✅ Comprehensive dashboard created\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Advanced Performance Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 4.1 Identify Top Performers\n",
    "print(\"🏆 IDENTIFYING TOP PERFORMERS...\")\n",
    "top_performers = analyzer.identify_top_performers(df_scored, 3)\n",
    "\n",
    "print(\"Top 3 Fielding Performers:\")\n",
    "for i, (_, player) in enumerate(top_performers.iterrows(), 1):\n",
    "    print(f\"{i}. {player['player_name']} - {player['performance_score']} points ({player['player_role']})\")\n",
    "\n",
    "print(\"\\n📋 Top Performers Details:\")\n",
    "display(top_performers)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 4.2 Detailed Correlation Analysis\n",
    "print(\"🔍 DETAILED CORRELATION ANALYSIS...\")\n",
    "correlations = analyzer.calculate_correlations(df_scored)\n",
    "\n",
    "print(\"Correlation with Performance Score:\")\n",
    "display(correlations)\n",
    "\n",
    "# Highlight strongest correlations\n",
    "strong_correlations = correlations[correlations['strength'].isin(['Very Strong', 'Strong'])]\n",
    "if not strong_correlations.empty:\n",
    "    print(\"\\n💪 STRONGEST CORRELATIONS:\")\n",
    "    for _, corr in strong_correlations.iterrows():\n",
    "        print(f\"  • {corr['metric']}: r = {corr['correlation']} ({corr['strength']})\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 4.3 Player Role Analysis\n",
    "print(\"👥 ANALYSIS BY PLAYER ROLE...\")\n",
    "role_analysis = analyzer.analyze_by_player_role(df_scored)\n",
    "\n",
    "print(\"Performance by Player Role:\")\n",
    "display(role_analysis)\n",
    "\n",
    "# Key insights from role analysis\n",
    "print(\"\\n🎯 ROLE-BASED INSIGHTS:\")\n",
    "for _, role_data in role_analysis.iterrows():\n",
    "    role = role_data['player_role']\n",
    "    avg_score = role_data['performance_score_mean']\n",
    "    count = role_data['performance_score_count']\n",
    "    print(f\"  • {role}: {count} players, average score: {avg_score:.1f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 4.4 Improvement Areas Identification\n",
    "print(\"⚠️ IDENTIFYING AREAS FOR IMPROVEMENT...\")\n",
    "improvement_areas = analyzer.identify_areas_improvement(df_scored)\n",
    "\n",
    "print(\"Areas for Improvement by Player:\")\n",
    "display(improvement_areas)\n",
    "\n",
    "# Summary of improvement needs\n",
    "high_priority = improvement_areas[improvement_areas['priority_level'] == 'High']\n",
    "if not high_priority.empty:\n",
    "    print(\"\\n🔴 HIGH PRIORITY IMPROVEMENTS:\")\n",
    "    for _, player in high_priority.iterrows():\n",
    "        print(f\"  • {player['player_name']}: {', '.join(player['improvement_areas'])}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Strategic Insights and Recommendations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 5.1 Generate Key Performance Insights\n",
    "print(\"💡 GENERATING KEY INSIGHTS...\")\n",
    "insights = analyzer.generate_performance_insights(df_scored)\n",
    "\n",
    "print(\"Key Performance Insights:\")\n",
    "for i, insight in enumerate(insights, 1):\n",
    "    print(f\"{i}. {insight}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 5.2 Create Detailed Player Profiles\n",
    "print(\"👤 CREATING PLAYER PROFILES...\")\n",
    "player_profiles = analyzer.create_player_profiles(df_scored)\n",
    "\n",
    "print(\"Detailed Player Profiles:\")\n",
    "for profile in player_profiles:\n",
    "    print(f\"\\n🎯 {profile['player_name']} ({profile['player_role']})\")\n",
    "    print(f\"   Performance Score: {profile['performance_score']} ({profile['performance_rating']})\")\n",
    "    \n",
    "    if profile['key_strengths']:\n",
    "        print(f\"   Key Strengths: {', '.join(profile['key_strengths'])}\")\n",
    "    else:\n",
    "        print(f\"   Key Strengths: Consistent basic fielding\")\n",
    "    \n",
    "    if profile['improvement_suggestions']:\n",
    "        print(f\"   Improvement Areas: {', '.join(profile['improvement_suggestions'])}\")\n",
    "    \n",
    "    print(f\"   Key Metrics: {profile['key_metrics']}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 5.3 Generate Strategic Recommendations\n",
    "print(\"🎯 GENERATING STRATEGIC RECOMMENDATIONS...\")\n",
    "recommendations = analyzer.generate_strategic_recommendations(df_scored)\n",
    "\n",
    "print(\"Strategic Recommendations by Priority:\")\n",
    "for priority in ['High', 'Medium', 'Low']:\n",
    "    priority_recs = recommendations[recommendations['priority'] == priority]\n",
    "    if not priority_recs.empty:\n",
    "        print(f\"\\n{priority} Priority Recommendations:\")\n",
    "        for _, rec in priority_recs.iterrows():\n",
    "            print(f\"  • {rec['recommendation']}\")\n",
    "            print(f\"    Type: {rec['type']} | Impact: {rec['expected_impact']}\")\n",
    "            print(f\"    Reason: {rec['rationale']}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Comprehensive Analysis Report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate comprehensive analysis using all tools\n",
    "print(\"📄 GENERATING COMPREHENSIVE ANALYSIS REPORT...\")\n",
    "comprehensive_analysis = analyze_fielding_performance(df_scored)\n",
    "\n",
    "# Display executive summary\n",
    "print(\"\\n\" + \"=\" * 70)\n",
    "print(\"📊 COMPREHENSIVE FIELDING ANALYSIS REPORT\")\n",
    "print(\"=\" * 70)\n",
    "\n",
    "report = comprehensive_analysis['performance_report']\n",
    "summary = report['summary_metrics']\n",
    "distribution = report['performance_distribution']\n",
    "\n",
    "print(f\"\\n🏆 TOP PERFORMERS\")\n",
    "print(\"-\" * 30)\n",
    "for i, player in enumerate(comprehensive_analysis['top_performers'].iterrows(), 1):\n",
    "    player_data = player[1]\n",
    "    print(f\"{i}. {player_data['player_name']}: {player_data['performance_score']} points\")\n",
    "\n",
    "print(f\"\\n💡 KEY INSIGHTS\")\n",
    "print(\"-\" * 30)\n",
    "for insight in comprehensive_analysis['insights'][:5]:  # Show top 5 insights\n",
    "    print(f\"• {insight}\")\n",
    "\n",
    "print(f\"\\n🎯 STRATEGIC RECOMMENDATIONS\")\n",
    "print(\"-\" * 30)\n",
    "high_priority_recs = comprehensive_analysis['recommendations'][\n",
    "    comprehensive_analysis['recommendations']['priority'] == 'High'\n",
    "]\n",
    "for _, rec in high_priority_recs.iterrows():\n",
    "    print(f\"• {rec['recommendation']}\")\n",
    "\n",
    "print(f\"\\n🔗 MOST CORRELATED METRIC\")\n",
    "print(\"-\" * 30)\n",
    "top_correlation = comprehensive_analysis['correlations'].iloc[0]\n",
    "print(f\"{top_correlation['metric']}: r = {top_correlation['correlation']} ({top_correlation['strength']})\")\n",
    "\n",
    "print(f\"\\n📈 PERFORMANCE DISTRIBUTION\")\n",
    "print(\"-\" * 30)\n",
    "print(f\"Excellent: {distribution['excellent_players']} players\")\n",
    "print(f\"Good: {distribution['good_players']} players\")\n",
    "print(f\"Needs Improvement: {distribution['needs_improvement_players']} players\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. Results Export and Saving"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Save all analysis results\n",
    "print(\"💾 SAVING ANALYSIS RESULTS...\")\n",
    "\n",
    "# Ensure directories exist\n",
    "os.makedirs('../data/outputs', exist_ok=True)\n",
    "os.makedirs('../results/reports', exist_ok=True)\n",
    "\n",
    "# Save scored data with all metrics\n",
    "output_path = '../data/outputs/fielding_analysis_results.csv'\n",
    "df_scored.to_csv(output_path, index=False)\n",
    "print(f\"✅ Analysis results saved to: {output_path}\")\n",
    "\n",
    "# Save comprehensive analysis\n",
    "analysis_path = '../data/outputs/comprehensive_analysis.csv'\n",
    "comprehensive_analysis['top_performers'].to_csv(analysis_path, index=False)\n",
    "print(f\"✅ Comprehensive analysis saved to: {analysis_path}\")\n",
    "\n",
    "# Save recommendations\n",
    "recs_path = '../data/outputs/strategic_recommendations.csv'\n",
    "comprehensive_analysis['recommendations'].to_csv(recs_path, index=False)\n",
    "print(f\"✅ Strategic recommendations saved to: {recs_path}\")\n",
    "\n",
    "# Save player profiles\n",
    "profiles_path = '../data/outputs/player_profiles.json'\n",
    "import json\n",
    "with open(profiles_path, 'w') as f:\n",
    "    json.dump(comprehensive_analysis['player_profiles'], f, indent=2)\n",
    "print(f\"✅ Player profiles saved to: {profiles_path}\")\n",
    "\n",
    "# Save correlation analysis\n",
    "corr_path = '../data/outputs/correlation_analysis.csv'\n",
    "comprehensive_analysis['correlations'].to_csv(corr_path, index=False)\n",
    "print(f\"✅ Correlation analysis saved to: {corr_path}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8. Summary and Conclusion"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 🎯 Key Findings Summary\n",
    "\n",
    "**Performance Rankings:**\n",
    "1. **Yash Dhull & Axar Patel** - 11 points (Excellent)\n",
    "2. **Rilee Russouw** - 10 points (Excellent) \n",
    "3. **Aman Khan & Kuldeep Yadav** - 9 points (Good)\n",
    "4. **Lalit Yadav** - 6 points (Good)\n",
    "5. **Phil Salt** - 2 points (Needs Improvement)\n",
    "\n",
    **Team Performance:**\n",
    "- Average Score: 8.3 points\n",
    "- Net Runs Saved: +7 runs\n",
    "- Total Catches: 6 (75% success rate)\n",
    "- Total Run Outs: 2\n",
    "- Direct Hits: 3\n",
    "\n",
    **Key Strengths:**\n",
    "- Strong catching performance (6 successful catches)\n",
    "- Good direct hitting capability (3 direct hits)\n",
    "- Consistent ground fielding (multiple clean picks)\n",
    "- Effective all-round fielding from multiple players\n",
    "\n",
    **Areas for Improvement:**\n",
    "- 2 dropped catches affecting overall performance\n",
    "- 1 missed run out opportunity\n",
    "- 2 players conceded runs (-3 total)\n",
    "- Variable throwing accuracy\n",
    "\n",
    ### 📋 Strategic Recommendations\n",
    "\n",
    **High Priority:**\n",
    "- Implement intensive catching practice sessions\n",
    "- Focus on ground fielding to prevent runs conceded\n",
    "- Improve throwing accuracy and decision making\n",
    "\n",
    **Medium Priority:**\n",
    "- Wicket-keeper stumpings practice\n",
    "- Run out conversion training\n",
    "- Fielding efficiency improvement\n",
    "\n",
    **Low Priority:**\n",
    "- Maintain current successful strategies\n",
    "- Continue basic fielding drills\n",
    "- Regular performance monitoring\n",
    \n",
    ### 🔮 Future Analysis Directions\n",
    "- Expand analysis to multiple matches for trend identification\n",
    "- Incorporate opposition fielding comparison\n",
    "- Add positional analysis for field placement optimization\n",
    "- Develop real-time performance tracking system\n",
    "- Create predictive models for player development\n",
    "\n",
    ### ✅ Analysis Success Metrics\n",
    "- All performance scores validated against expected values\n",
    "- Comprehensive visualizations generated and saved\n",
    "- Strategic recommendations provided for each priority level\n",
    "- Player profiles created with individualized insights\n",
    "- All results exported for further use and reporting\n",
    "\n",
    **Next Step:** Proceed to `03_advanced_insights.ipynb` for statistical modeling and advanced analytics."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Final execution summary\n",
    "print(\"\\n\" + \"=\" * 70)\n",
    "print(\"🎉 FIELDING PERFORMANCE ANALYSIS COMPLETED!\")\n",
    "print(\"=\" * 70)\n",
    "print(f\"\\n📊 ANALYSIS SUMMARY:\")\n",
    "print(f\"  • Players analyzed: {len(df_scored)}\")\n",
    "print(f\"  • Performance scores calculated and validated\")\n",
    "print(f\"  • 5 comprehensive visualizations created\")\n",
    "print(f\"  • {len(comprehensive_analysis['insights'])} key insights generated\")\n",
    "print(f\"  • {len(comprehensive_analysis['recommendations'])} strategic recommendations\")\n",
    "print(f\"  • All results saved to data/outputs/\")\n",
    "print(f\"\\n🚀 NEXT STEPS:\")\n",
    "print(f\"  • Review generated visualizations in results/visualizations/\")\n",
    "print(f\"  • Implement high-priority recommendations\")\n",
    "print(f\"  • Proceed to advanced analytics in next notebook\")\n",
    "print(\"\\n\" + \"=\" * 70)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.9.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}