# Cricket Score Predictor - Interactive Analysis

This notebook provides an interactive environment for exploring cricket data and building predictive models.

## 1. Setup and Data Loading

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from data_loader import load_cricket_data
from data_preprocessor import CricketDataPreprocessor
from cricket_predictor import CricketScorePredictor

# Set plotting style
plt.style.use('default')
sns.set_palette("husl")
%matplotlib inline

In [None]:
# Load cricket data
print("Loading cricket dataset...")
df = load_cricket_data()
print(f"Dataset loaded with shape: {df.shape}")

## 2. Data Exploration

In [None]:
# Basic data exploration
print("Dataset Info:")
print(df.info())
print("\nFirst 5 rows:")
df.head()

In [None]:
# Statistical summary
print("Statistical Summary:")
df.describe()

In [None]:
# Check for missing values
print("Missing Values:")
missing_values = df.isnull().sum()
missing_values[missing_values > 0]

## 3. Data Visualization

In [None]:
# Score distribution
plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
if 'total' in df.columns:
    plt.hist(df['total'], bins=30, alpha=0.7, color='skyblue')
    plt.title('Distribution of Total Scores')
    plt.xlabel('Total Score')
    plt.ylabel('Frequency')

plt.subplot(1, 3, 2)
if 'runs' in df.columns:
    plt.hist(df['runs'], bins=30, alpha=0.7, color='lightgreen')
    plt.title('Distribution of Current Runs')
    plt.xlabel('Current Runs')
    plt.ylabel('Frequency')

plt.subplot(1, 3, 3)
if 'overs' in df.columns:
    plt.hist(df['overs'], bins=30, alpha=0.7, color='salmon')
    plt.title('Distribution of Overs')
    plt.xlabel('Overs')
    plt.ylabel('Frequency')

plt.tight_layout()
plt.show()

In [None]:
# Correlation heatmap
plt.figure(figsize=(12, 8))
numeric_cols = df.select_dtypes(include=[np.number]).columns
correlation_matrix = df[numeric_cols].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Matrix of Numerical Features')
plt.show()

## 4. Data Preprocessing

In [None]:
# Initialize preprocessor and process data
preprocessor = CricketDataPreprocessor()
processed_data, feature_cols = preprocessor.preprocess_data(df)

print(f"Features selected: {len(feature_cols)}")
print("Feature columns:", feature_cols)

In [None]:
# Prepare train-test split
X_train, X_test, y_train, y_test = preprocessor.prepare_train_test_split(
    processed_data, feature_cols
)

print(f"Training set: {X_train.shape}")
print(f"Test set: {X_test.shape}")

## 5. Model Training and Evaluation

In [None]:
# Initialize and train models
predictor = CricketScorePredictor()
predictor.train_models(X_train, y_train, feature_cols)

In [None]:
# Evaluate models
results = predictor.evaluate_models(X_test, y_test)

In [None]:
# Plot predictions comparison
predictor.plot_predictions(y_test, results)

In [None]:
# Plot feature importance
feature_importance = predictor.plot_feature_importance()
if feature_importance is not None:
    print("\nTop 10 Most Important Features:")
    print(feature_importance.head(10))

## 6. Live Prediction Examples

In [None]:
# Interactive prediction function
def predict_match_score(overs, runs, wickets):
    """Interactive function to predict match score"""
    try:
        prediction = predictor.predict_live_score(overs, runs, wickets)
        current_rr = runs / overs if overs > 0 else 0
        required_rr = (prediction - runs) / (20 - overs) if overs < 20 else 0
        
        print(f"Match Situation: {overs} overs, {runs}/{wickets}")
        print(f"Current Run Rate: {current_rr:.2f}")
        print(f"Predicted Final Score: {prediction:.0f}")
        print(f"Required Run Rate: {required_rr:.2f}")
        print("-" * 40)
        
        return prediction
    except Exception as e:
        print(f"Error: {e}")
        return None

# Example predictions
print("Live Prediction Examples:")
print("=" * 40)

scenarios = [
    (6.0, 55, 2),   # End of powerplay
    (10.0, 90, 3),  # Middle overs
    (15.0, 135, 5), # Death overs approach
    (18.0, 165, 7)  # Final overs
]

for overs, runs, wickets in scenarios:
    predict_match_score(overs, runs, wickets)

## 7. Model Saving and Loading

In [None]:
# Save the best model
predictor.save_model('cricket_model_notebook.pkl')
print("Model saved successfully!")

## 8. Custom Predictions

Use the cell below to make your own predictions by changing the values:

In [None]:
# Customize these values for your prediction
custom_overs = 12.0
custom_runs = 95
custom_wickets = 4

# Make prediction
custom_prediction = predict_match_score(custom_overs, custom_runs, custom_wickets)

## 9. Summary

This notebook demonstrates:
- Loading and exploring cricket match data
- Data preprocessing and feature engineering
- Training multiple machine learning models
- Evaluating model performance
- Making live score predictions

The best performing model can be used for real-time score predictions during cricket matches!