In [8]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, LSTM, GRU, Input, Concatenate, Reshape, Embedding, Flatten

def train_and_predict_model(data):
    def aggregate_player_stats(group):
        return pd.Series({
            'Runs Scored': group['batsman_runs'].sum(),
            'Balls Faced': len(group),
            'Wickets Taken': group['is_wicket'].sum(),
            'Balls Bowled': len(group[group['bowler'] == group.name]),
            'Runs Conceded': group[group['bowler'] == group.name]['total_runs'].sum(),
            'Team': group['batting_team'].iloc[0] if group['batsman'].iloc[0] == group.name else group['bowling_team'].iloc[0],
        })

    player_stats = data.groupby(['id', 'batsman']).apply(aggregate_player_stats).reset_index()
    player_stats = player_stats.rename(columns={'batsman': 'Player Name'})

    player_stats['Overs Bowled'] = player_stats['Balls Bowled'] / 6
    player_stats['Batting Average'] = player_stats.groupby('Player Name')['Runs Scored'].transform('mean')
    player_stats['Bowling Average'] = player_stats['Runs Conceded'] / np.maximum(1, player_stats['Wickets Taken'])
    player_stats['Strike Rate'] = player_stats['Runs Scored'] / np.maximum(1, player_stats['Balls Faced']) * 100
    player_stats['Economy Rate'] = player_stats['Runs Conceded'] / np.maximum(1, player_stats['Overs Bowled'])

    # Create and fit encoders for both Team and Player Name
    team_encoder = LabelEncoder()
    player_encoder = LabelEncoder()

    player_stats['Team'] = team_encoder.fit_transform(player_stats['Team'])
    player_stats['Encoded Player Name'] = player_encoder.fit_transform(player_stats['Player Name'])

    features = ['Runs Scored', 'Balls Faced', 'Wickets Taken', 'Balls Bowled', 'Runs Conceded',
                'Overs Bowled', 'Batting Average', 'Bowling Average', 'Strike Rate', 'Economy Rate', 'Team', 'Encoded Player Name']

    X = player_stats[features]
    y = player_stats[['Runs Scored', 'Wickets Taken', 'Balls Faced', 'Balls Bowled', 
                      'Overs Bowled', 'Runs Conceded']]

    scaler_X = StandardScaler()
    scaler_y = StandardScaler()
    X_scaled = scaler_X.fit_transform(X)
    y_scaled = scaler_y.fit_transform(y)

    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

    def create_model(input_shape):
        input_layer = Input(shape=(input_shape,))
        
        reshaped_input = Reshape((input_shape, 1))(input_layer)
        
        lstm = LSTM(64, return_sequences=True)(reshaped_input)
        lstm = LSTM(32)(lstm)
        
        gru = GRU(64, return_sequences=True)(reshaped_input)
        gru = GRU(32)(gru)
        
        concat = Concatenate()([lstm, gru])
        
        dense1 = Dense(64, activation='relu')(concat)
        dense2 = Dense(32, activation='relu')(dense1)
        
        output = Dense(6)(dense2)  
        
        model = Model(inputs=input_layer, outputs=output)
        model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
        
        return model

    # Create and train the model
    model = create_model(X_train.shape[1])
    model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

    X_player = player_stats['Encoded Player Name'].values
    X_team = player_stats['Team'].values
    model.fit([X_player, X_team], y, epochs=50, batch_size=32, validation_split=0.2)

    def calculate_fantasy_points(row):
        points = 0
        
        points += row['Runs Scored']
        points += row['Wickets Taken'] * 25
        
        if row['Runs Scored'] == 0 and row['Balls Faced'] > 0:
            points -= 2
        
        if row['Runs Scored'] >= 50:
            points += 8
        if row['Runs Scored'] >= 100:
            points += 16
        
        if row['Wickets Taken'] >= 4:
            points += 8
        if row['Wickets Taken'] >= 5:
            points += 16
        
        if row['Overs Bowled'] >= 2:
            economy_rate = row['Runs Conceded'] / row['Overs Bowled']
            if economy_rate < 6:
                points += 4
            elif economy_rate > 9:
                points -= 2
        
        return points

    X_all_scaled = scaler_X.transform(X)
    predicted_stats_scaled = model.predict(X_all_scaled)
    predicted_stats = scaler_y.inverse_transform(predicted_stats_scaled)

    predicted_df = pd.DataFrame(predicted_stats, columns=y.columns)
    predicted_df['Player Name'] = player_stats['Player Name']
    predicted_df['Team'] = player_stats['Team']

    for col in y.columns:
        if col in ['Runs Scored', 'Balls Faced', 'Balls Bowled', 'Runs Conceded']:
            predicted_df[col] = predicted_df[col].round().astype(int)
        elif col in ['Wickets Taken']:
            predicted_df[col] = predicted_df[col].round(1)
        elif col == 'Overs Bowled':
            predicted_df[col] = predicted_df[col].round(1)

    predicted_df['Predicted Fantasy Points'] = predicted_df.apply(calculate_fantasy_points, axis=1)

    def select_top_11(df):
        return df.sort_values(by='Predicted Fantasy Points', ascending=False).head(11)

    top_players = select_top_11(predicted_df)

    print("Top 11 Players based on Predicted Fantasy Points:")
    print(top_players[['Player Name', 'Team', 'Predicted Fantasy Points']])

    print("\nPredicted Stats for Top Players (per match):")
    print(top_players[['Player Name', 'Runs Scored', 'Wickets Taken', 'Balls Faced', 'Balls Bowled', 
                       'Overs Bowled', 'Runs Conceded']])

    return model, team_encoder, player_encoder, scaler_X, scaler_y


In [9]:
data = pd.read_csv('./ball_by_ball_data.csv')
model, team_encoder, player_encoder, scaler_X, scaler_y =train_and_predict_model(data)

  data = pd.read_csv('./ball_by_ball_data.csv')
  player_stats = data.groupby(['id', 'batsman']).apply(aggregate_player_stats).reset_index()


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/50


ValueError: in user code:

    File "c:\Users\shoya\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\shoya\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1384, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\shoya\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1373, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\shoya\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1150, in train_step
        y_pred = self(x, training=True)
    File "c:\Users\shoya\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\shoya\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\input_spec.py", line 219, in assert_input_compatibility
        raise ValueError(

    ValueError: Layer "model_1" expects 1 input(s), but it received 2 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(32,) dtype=int32>, <tf.Tensor 'IteratorGetNext:1' shape=(32,) dtype=int32>]


In [15]:
def predict_fantasy_points(players_list, model, player_encoder, team_encoder, scaler_X):
    input_df = pd.DataFrame(players_list, columns=['Player Name', 'Team'])
    
    # Encode player names and team names
    input_df['Encoded Player Name'] = player_encoder.transform(input_df['Player Name'])
    input_df['Encoded Team'] = team_encoder.transform(input_df['Team'])
    
    # Create dummy values for other features (you may want to adjust these)
    dummy_features = {
        'Runs Scored': 0, 'Balls Faced': 0, 'Wickets Taken': 0,
        'Balls Bowled': 0, 'Runs Conceded': 0, 'Overs Bowled': 0,
        'Batting Average': 0, 'Bowling Average': 0, 'Strike Rate': 0,
        'Economy Rate': 0
    }
    for feature, value in dummy_features.items():
        input_df[feature] = value
    
    # Prepare input features in the same order as during training
    features = ['Runs Scored', 'Balls Faced', 'Wickets Taken', 'Balls Bowled', 'Runs Conceded',
                'Overs Bowled', 'Batting Average', 'Bowling Average', 'Strike Rate', 'Economy Rate', 
                'Encoded Team', 'Encoded Player Name']
    X = input_df[features]
    
    # Scale the input features
    X_scaled = scaler_X.transform(X)
    
    # Make predictions
    predictions = model.predict(X_scaled)
    
    # Create a dataframe with predictions
    predicted_df = pd.DataFrame(predictions, columns=['Runs Scored', 'Wickets Taken', 'Balls Faced', 'Balls Bowled', 
                                                      'Overs Bowled', 'Runs Conceded'])
    predicted_df['Player Name'] = input_df['Player Name']
    predicted_df['Team'] = input_df['Team']
    
    # Round the predictions as needed
    for col in predicted_df.columns:
        if col in ['Runs Scored', 'Balls Faced', 'Balls Bowled', 'Runs Conceded']:
            predicted_df[col] = predicted_df[col].round().astype(int)
        elif col in ['Wickets Taken', 'Overs Bowled']:
            predicted_df[col] = predicted_df[col].round(1)
    
    predicted_df['Predicted Fantasy Points'] = predicted_df.apply(calculate_fantasy_points, axis=1)
    
    return predicted_df

def calculate_fantasy_points(row):
    points = 0
    
    points += row['Runs Scored']
    points += row['Wickets Taken'] * 25
    
    if row['Runs Scored'] == 0 and row['Balls Faced'] > 0:
        points -= 2
    
    if row['Runs Scored'] >= 50:
        points += 8
    if row['Runs Scored'] >= 100:
        points += 16
    
    if row['Wickets Taken'] >= 4:
        points += 8
    if row['Wickets Taken'] >= 5:
        points += 16
    
    if row['Overs Bowled'] >= 2:
        economy_rate = row['Runs Conceded'] / row['Overs Bowled']
        if economy_rate < 6:
            points += 4
        elif economy_rate > 9:
            points -= 2
    
    return points

def select_top_11(df):
    return df.sort_values(by='Predicted Fantasy Points', ascending=False).head(11)



In [16]:

players_list = [
    {"Player Name": "SC Ganguly", "Team": "Kolkata Knight Riders"},
    {"Player Name": "BB McCullum", "Team": "Kolkata Knight Riders"},
    {"Player Name": "RT Ponting", "Team": "Kolkata Knight Riders"},
    {"Player Name": "DJ Hussey", "Team": "Kolkata Knight Riders"},
    {"Player Name": "M Kartik", "Team": "Kolkata Knight Riders"},
    {"Player Name": "I Sharma", "Team": "Kolkata Knight Riders"},
    {"Player Name": "AB Agarkar", "Team": "Kolkata Knight Riders"},
    {"Player Name": "WP Saha", "Team": "Kolkata Knight Riders"},
    {"Player Name": "LR Shukla", "Team": "Kolkata Knight Riders"},
    {"Player Name": "M Morkel", "Team": "Kolkata Knight Riders"},
    {"Player Name": "AB Dinda", "Team": "Kolkata Knight Riders"},
    {"Player Name": "AB Dinda", "Team": "Kolkata Knight Riders"},
    {"Player Name": "WP Saha", "Team": "Kolkata Knight Riders"},
    {"Player Name": "SC Ganguly", "Team": "Kolkata Knight Riders"},
    {"Player Name": "LR Shukla", "Team": "Kolkata Knight Riders"},
    {"Player Name": "AB Agarkar", "Team": "Kolkata Knight Riders"},

    {"Player Name": "P Kumar", "Team": "Royal Challengers Bangalore"},
    {"Player Name": "Z Khan", "Team": "Royal Challengers Bangalore"},
    {"Player Name": "AA Noffke", "Team": "Royal Challengers Bangalore"},
    {"Player Name": "JH Kallis", "Team": "Royal Challengers Bangalore"},
    {"Player Name": "SB Joshi", "Team": "Royal Challengers Bangalore"},
    {"Player Name": "CL White", "Team": "Royal Challengers Bangalore"},
    {"Player Name": "B Akhil", "Team": "Royal Challengers Bangalore"},
    {"Player Name": "MV Boucher", "Team": "Royal Challengers Bangalore"},
    {"Player Name": "W Jaffer", "Team": "Royal Challengers Bangalore"},
    {"Player Name": "V Kohli", "Team": "Royal Challengers Bangalore"},
    {"Player Name": "A Kumble", "Team": "Royal Challengers Bangalore"},
    {"Player Name": "J Arunkumar", "Team": "Royal Challengers Bangalore"},
    {"Player Name": "P Kumar", "Team": "Royal Challengers Bangalore"},
    {"Player Name": "MV Boucher", "Team": "Royal Challengers Bangalore"},
    {"Player Name": "W Jaffer", "Team": "Royal Challengers Bangalore"},
    {"Player Name": "B Akhil", "Team": "Royal Challengers Bangalore"},
    {"Player Name": "A Kumble", "Team": "Royal Challengers Bangalore"},
]


# Predict fantasy points for all players
predicted_df = predict_fantasy_points(players_list, model, player_encoder, team_encoder, scaler_X)

# Select top 11 players
top_11_players = select_top_11(predicted_df)

print("Top 11 Players based on Predicted Fantasy Points:")
print(top_11_players[['Player Name', 'Team', 'Predicted Fantasy Points']])

print("\nPredicted Stats for Top Players:")
print(top_11_players[['Player Name', 'Runs Scored', 'Wickets Taken', 'Balls Faced', 'Balls Bowled', 
                      'Overs Bowled', 'Runs Conceded']])

# Print the players who were not selected
not_selected = set(predicted_df['Player Name']) - set(top_11_players['Player Name'])
print("\nPlayers not selected:")
print(list(not_selected))

ValueError: in user code:

    File "c:\Users\shoya\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 2440, in predict_function  *
        return step_function(self, iterator)
    File "c:\Users\shoya\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 2425, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\shoya\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 2413, in run_step  **
        outputs = model.predict_step(data)
    File "c:\Users\shoya\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 2381, in predict_step
        return self(x, training=False)
    File "c:\Users\shoya\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\shoya\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\input_spec.py", line 219, in assert_input_compatibility
        raise ValueError(

    ValueError: Layer "model_2" expects 1 input(s), but it received 2 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None,) dtype=int32>, <tf.Tensor 'IteratorGetNext:1' shape=(None,) dtype=int32>]
