In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors

# Sample data (initial rows provided by user)
data = pd.read_csv("final.csv")
df = pd.DataFrame(data)

# Extract player roles
player_roles = {}
for index, row in df.iterrows():
    player_roles[row['batsman']] = row['batsman_role']
    player_roles[row['bowler']] = row['bowler_role']

# Compute batsman stats
batsman_stats = df[df['legal_delivery'] == True].groupby('batsman').agg(
    total_runs=('Runs', lambda x: x[df.loc[x.index, 'is_extra'] == False].sum()),
    total_balls=('Runs', 'count'),
    dismissals=('is_wicket', 'sum'),
    boundary_runs=('Runs', lambda x: x[(df.loc[x.index, 'is_extra'] == False) & ((x == 4) | (x == 6))].sum())
).reset_index()

batsman_stats['BA'] = batsman_stats['total_runs'] / batsman_stats['dismissals'].replace(0, 1)
batsman_stats['SR'] = (batsman_stats['total_runs'] / batsman_stats['total_balls']) * 100
batsman_stats['BP'] = (batsman_stats['boundary_runs'] / batsman_stats['total_runs']) * 100
batsman_stats['BP'] = batsman_stats['BP'].fillna(0)

# Compute bowler stats
bowler_stats = df.groupby('bowler').agg(
    total_runs_conceded=('Runs', 'sum'),
    total_wickets=('is_wicket', 'sum'),
    total_legal_deliveries=('legal_delivery', 'sum')
).reset_index()

bowler_stats['ECO'] = bowler_stats['total_runs_conceded'] / (bowler_stats['total_legal_deliveries'] / 6)
bowler_stats['BAvg'] = bowler_stats['total_runs_conceded'] / bowler_stats['total_wickets'].replace(0, 1)
bowler_stats['BSR'] = bowler_stats['total_legal_deliveries'] / bowler_stats['total_wickets'].replace(0, 1)

# Normalize features
scaler_batsman = StandardScaler()
batsman_scaled = scaler_batsman.fit_transform(batsman_stats[['BA', 'SR', 'BP']])

scaler_bowler = StandardScaler()
bowler_scaled = scaler_bowler.fit_transform(bowler_stats[['ECO', 'BAvg', 'BSR']])

# For all-rounders
all_rounders = [p for p, r in player_roles.items() if r == 'All-Rounders']
all_rounder_batting = batsman_stats[batsman_stats['batsman'].isin(all_rounders)].rename(columns={'batsman': 'player'})
all_rounder_bowling = bowler_stats[bowler_stats['bowler'].isin(all_rounders)].rename(columns={'bowler': 'player'})
all_rounder_stats = pd.merge(all_rounder_batting[['player', 'BA', 'SR', 'BP']], 
                            all_rounder_bowling[['player', 'ECO', 'BAvg', 'BSR']], on='player')
scaler_all_rounder = StandardScaler()
all_rounder_scaled = scaler_all_rounder.fit_transform(all_rounder_stats[['BA', 'SR', 'BP', 'ECO', 'BAvg', 'BSR']])

# Create index mappings
batsman_index = {player: idx for idx, player in enumerate(batsman_stats['batsman'])}
bowler_index = {player: idx for idx, player in enumerate(bowler_stats['bowler'])}
all_rounder_index = {player: idx for idx, player in enumerate(all_rounder_stats['player'])}

# Fit KNN models
knn_batsman = NearestNeighbors(n_neighbors=5, metric='euclidean')
knn_batsman.fit(batsman_scaled)

knn_bowler = NearestNeighbors(n_neighbors=5, metric='euclidean')
knn_bowler.fit(bowler_scaled)

knn_all_rounder = NearestNeighbors(n_neighbors=5, metric='euclidean')
knn_all_rounder.fit(all_rounder_scaled)

# Task 1: Player Recommendation
def recommend_similar_players(player_id, k=5):
    """
    Recommend k similar players to the given player based on their role and stats.
    
    Args:
        player_id (str): Name of the player.
        k (int): Number of similar players to return.
    
    Returns:
        list: Names of similar players.
    """
    role = player_roles[player_id]
    if role == 'Batters':
        idx = batsman_index[player_id]
        distances, indices = knn_batsman.kneighbors([batsman_scaled[idx]], n_neighbors=k+1)
        similar_players = [batsman_stats.iloc[i]['batsman'] for i in indices[0][1:]]
    elif role == 'Bowlers':
        idx = bowler_index[player_id]
        distances, indices = knn_bowler.kneighbors([bowler_scaled[idx]], n_neighbors=k+1)
        similar_players = [bowler_stats.iloc[i]['bowler'] for i in indices[0][1:]]
    elif role == 'All-Rounders':
        idx = all_rounder_index[player_id]
        distances, indices = knn_all_rounder.kneighbors([all_rounder_scaled[idx]], n_neighbors=k+1)
        similar_players = [all_rounder_stats.iloc[i]['player'] for i in indices[0][1:]]
    return similar_players

# Task 2: Tactical Planning
def find_similar_bowlers_for_batsman(batsman, k=5):
    """
    Find k bowlers similar to those the batsman struggles against.
    
    Args:
        batsman (str): Name of the batsman.
        k (int): Number of similar bowlers to return.
    
    Returns:
        list: Names of similar bowlers.
    """
    failure_bowlers = df[(df['batsman'] == batsman) & (df['is_wicket'] == True)]['bowler'].unique()
    failure_indices = bowler_stats[bowler_stats['bowler'].isin(failure_bowlers)].index
    if len(failure_indices) == 0:
        return []
    failure_vectors = bowler_scaled[failure_indices]
    failure_vector = np.mean(failure_vectors, axis=0)
    distances = np.linalg.norm(bowler_scaled - failure_vector, axis=1)
    top_matches = np.argsort(distances)[:k]
    similar_bowlers = bowler_stats.iloc[top_matches]['bowler'].tolist()
    return similar_bowlers

# Example usage
if __name__ == "__main__":
    # Example for Player Recommendation
    player_to_replace = 'Ruturaj Gaikwad'
    similar_players = recommend_similar_players(player_to_replace, k=3)
    print(f"Players similar to {player_to_replace}: {similar_players}")

    # Example for Tactical Planning
    batsman = 'Devon Conway'
    similar_bowlers = find_similar_bowlers_for_batsman(batsman, k=3)
    print(f"Bowlers similar to those {batsman} struggles against: {similar_bowlers}")

Players similar to Ruturaj Gaikwad: ['Hashim Amla', 'David Warner', 'KL Rahul']
Bowlers similar to those Devon Conway struggles against: ['Keemo Paul', 'Mitchell McClenaghan', 'Lasith Malinga']
