In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Subtract, Lambda
from tensorflow.keras.optimizers import Adam
import tensorflow as tf


# Load and preprocess the dataset
def load_and_preprocess_data(file_path):
    data = pd.read_csv(file_path)
    data = data[data['legal_delivery'] == True]
    pair_stats = data.groupby(['bowler', 'batsman']).agg({
        'Runs': 'sum',
        'is_wicket': 'sum',
        'Ball': 'count'
    }).reset_index()
    pair_stats['economy_rate'] = (pair_stats['Runs'] / pair_stats['Ball']) * 6  # Runs per over
    pair_stats['wickets_per_ball'] = pair_stats['is_wicket'] / pair_stats['Ball']
    return pair_stats, data


# Generate pairwise training data
def generate_pairwise_data(pair_stats):
    pairs = []
    for batsman in pair_stats['batsman'].unique():
        batsman_data = pair_stats[pair_stats['batsman'] == batsman]
        if len(batsman_data) < 2:
            continue
        for i in range(len(batsman_data)):
            for j in range(i + 1, len(batsman_data)):
                bowler1 = batsman_data.iloc[i]
                bowler2 = batsman_data.iloc[j]
                # Define "better" as lower economy rate or higher wickets per ball
                if (bowler1['economy_rate'] < bowler2['economy_rate']) or \
                   (bowler1['economy_rate'] == bowler2['economy_rate'] and 
                    bowler1['wickets_per_ball'] > bowler2['wickets_per_ball']):
                    pairs.append((bowler1, bowler2, 1))  # bowler1 > bowler2
                else:
                    pairs.append((bowler2, bowler1, 1))  # bowler2 > bowler1
    return pairs

# Prepare features for the model
def prepare_features(pair_stats):
    features = pair_stats[['economy_rate', 'wickets_per_ball']].values
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(features)
    return scaled_features, scaler

# Build RankNet model
def build_ranknet(input_dim):
    input = Input(shape=(input_dim,))
    x = Dense(64, activation='relu')(input)
    x = Dense(32, activation='relu')(x)
    output = Dense(1)(x)
    model = Model(inputs=input, outputs=output)
    return model

# Create and compile the pairwise ranking model
def create_pairwise_model(input_dim):
    model_i = build_ranknet(input_dim)
    model_j = build_ranknet(input_dim)
    
    input_i = Input(shape=(input_dim,))
    input_j = Input(shape=(input_dim,))
    score_i = model_i(input_i)
    score_j = model_j(input_j)
    diff = Subtract()([score_i, score_j])
    prob = Lambda(lambda x: tf.nn.sigmoid(x))(diff)
    
    pairwise_model = Model(inputs=[input_i, input_j], outputs=prob)
    pairwise_model.compile(optimizer=Adam(learning_rate=0.001), 
                         loss='binary_crossentropy')
    return pairwise_model, model_i

# Train the model
def train_model(pairwise_model, pairs, features, bowler_indices):
    if not pairs:
        raise ValueError("No pairwise data available for training.")
    X_i = np.array([features[bowler_indices[pair[0]['bowler']]] for pair in pairs])
    X_j = np.array([features[bowler_indices[pair[1]['bowler']]] for pair in pairs])
    y = np.array([pair[2] for pair in pairs])
    pairwise_model.fit([X_i, X_j], y, epochs=10, batch_size=32, verbose=0)

# Predict best bowlers for a batsman
def predict_best_bowlers(model, scaler, pair_stats, batsman, available_bowlers):
    batsman_data = pair_stats[pair_stats['batsman'] == batsman]
    bowler_features = batsman_data[batsman_data['bowler'].isin(available_bowlers)]
    if bowler_features.empty:
        return []
    features = bowler_features[['economy_rate', 'wickets_per_ball']].values
    scaled_features = scaler.transform(features)
    scores = model.predict(scaled_features, verbose=0)
    bowler_scores = list(zip(bowler_features['bowler'], scores.flatten()))
    bowler_scores.sort(key=lambda x: x[1], reverse=True)  # Higher score = better
    return [bowler for bowler, _ in bowler_scores]

# Select the playing eleven
def select_playing_eleven(pair_stats, data, opposing_batsmen, available_players, model, scaler):
    bowler_scores = {}
    for batsman in opposing_batsmen:
        best_bowlers = predict_best_bowlers(model, scaler, pair_stats, batsman, 
                                          available_players['Bowlers'] + available_players['All-Rounders'])
        for bowler in best_bowlers[:3]:  # Top 3 bowlers per batsman
            bowler_scores[bowler] = bowler_scores.get(bowler, 0) + 1
    
    # Select top 4 bowlers
    sorted_bowlers = sorted(bowler_scores.items(), key=lambda x: x[1], reverse=True)
    selected_bowlers = [bowler for bowler, _ in sorted_bowlers[:4]]
    
    # Ensure role constraints
    remaining_slots = 11 - len(selected_bowlers)
    selected_batsmen = available_players['Batters'][:max(5, remaining_slots - 2)]  # Min 5 batsmen
    remaining_slots -= len(selected_batsmen)
    
    selected_wk = available_players['Wicket-Keepers'][:1]  # Exactly 1 wicket-keeper
    remaining_slots -= 1
    
    selected_ar = available_players['All-Rounders'][:min(2, remaining_slots)]  # 1-2 all-rounders
    remaining_slots -= len(selected_ar)
    
    # Adjust if needed
    if remaining_slots > 0:
        extra_batsmen = available_players['Batters'][len(selected_batsmen):len(selected_batsmen) + remaining_slots]
        selected_batsmen.extend(extra_batsmen)
    
    playing_eleven = selected_bowlers + selected_batsmen + selected_wk + selected_ar
    return playing_eleven[:11]

# Main execution
def main(file_path, opposing_batsmen, available_players):
    # Load and preprocess data
    pair_stats, data = load_and_preprocess_data(file_path)
    features, scaler = prepare_features(pair_stats)
    
    # Generate pairwise data
    pairs = generate_pairwise_data(pair_stats)
    bowler_indices = {row['bowler']: idx for idx, row in pair_stats.iterrows()}
    
    # Build and train model
    input_dim = features.shape[1]
    pairwise_model, ranking_model = create_pairwise_model(input_dim)
    train_model(pairwise_model, pairs, features, bowler_indices)
    
    # Select team
    playing_eleven = select_playing_eleven(pair_stats, data, opposing_batsmen, 
                                         available_players, ranking_model, scaler)
    return playing_eleven

# Example usage
if __name__ == "__main__":
    file_path = 'final.csv'
    opposing_batsmen = ['Devon Conway', 'Ruturaj Gaikwad', 'Moeen Ali']
    available_players = {
        'Bowlers': ['Mohammed Shami', 'Josh Little', 'Alzarri Joseph'],
        'Batters': ['Shubman Gill', 'Sai Sudharsan', 'Vijay Shankar', 'David Miller', 'Rahul Tewatia'],
        'All-Rounders': ['Hardik Pandya', 'Rashid Khan'],
        'Wicket-Keepers': ['Wriddhiman Saha']
    }
    team = main(file_path, opposing_batsmen, available_players)
    print("Recommended Playing Eleven:", team)

2025-04-21 20:24:34.460557: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Recommended Playing Eleven: ['Mohammed Shami', 'Hardik Pandya', 'Alzarri Joseph', 'Rashid Khan', 'Shubman Gill', 'Sai Sudharsan', 'Vijay Shankar', 'David Miller', 'Rahul Tewatia', 'Wriddhiman Saha', 'Hardik Pandya']
