In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, LSTM, GRU, Input, Concatenate, Reshape


data = pd.read_csv('data.csv')


label_enc = LabelEncoder()
data['Player Role'] = label_enc.fit_transform(data['Player Role'])
data['Team'] = label_enc.fit_transform(data['Team'])

features = ['Batting Average', 'Bowling Average', 'Strike Rate', 'Economy Rate', 
            'Centuries Scored', 'Half Centuries Scored', 'Ducks Scored', 
            'Wickets Taken Last Match', 'Runs Scored Last Match', 
            'Player Age', 'Player Experience', 'Player Role', 'Team']

X = data[features]
y = data[['Runs Scored', 'Wickets Taken', 'Balls Faced', 'Balls Bowled', 
          'Overs Bowled', 'Maidens Bowled', 'Runs Conceded']]


for col in y.columns:
    y[col] = y[col] / data['Player Experience']

scaler_X = StandardScaler()
scaler_y = StandardScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y[col] = y[col] / data['Player Experience']


In [2]:
def create_model(input_shape):
    input_layer = Input(shape=(input_shape,))
    
    reshaped_input = Reshape((input_shape, 1))(input_layer)
    
    lstm = LSTM(64, return_sequences=True)(reshaped_input)
    lstm = LSTM(32)(lstm)
    
    gru = GRU(64, return_sequences=True)(reshaped_input)
    gru = GRU(32)(gru)
    
    concat = Concatenate()([lstm, gru])
    
    dense1 = Dense(64, activation='relu')(concat)
    dense2 = Dense(32, activation='relu')(dense1)
    
    output = Dense(7)(dense2)  
    
    model = Model(inputs=input_layer, outputs=output)
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    
    return model


model = create_model(X_train.shape[1])
model.fit(X_train, y_train, epochs=50, batch_size=4, validation_data=(X_test, y_test))



Epoch 1/50


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x22abcdbeb50>

In [3]:
def calculate_fantasy_points(row, match_type):
    points = 0
    
    
    points += 4  
    points += row['Runs Scored']
    points += row['Wickets Taken'] * 25 if match_type != 'Test' else row['Wickets Taken'] * 16
    
    
    if row['Runs Scored'] == 0 and row['Balls Faced'] > 0:
        points -= 2 if match_type in ['T20', 'T10'] else 3 if match_type == 'ODI' else 4
    
    
    if row['Runs Scored'] >= 50:
        points += 8 if match_type == 'T20' else 4 if match_type in ['ODI', 'Test'] else 16
    if row['Runs Scored'] >= 100:
        points += 16 if match_type == 'T20' else 8
    
    if row['Maidens Bowled'] > 0:
        points += row['Maidens Bowled'] * (12 if match_type == 'T20' else 4 if match_type == 'ODI' else 16)
    
    if row['Wickets Taken'] >= 4:
        points += 8 if match_type == 'T20' else 4
    if row['Wickets Taken'] >= 5:
        points += 16 if match_type == 'T20' else 8
    
    
    if row['Overs Bowled'] >= 2:
        economy_rate = row['Runs Conceded'] / row['Overs Bowled']
        if economy_rate < 6:
            points += 4
        elif economy_rate > 9:
            points -= 2
    
    return points


X_all_scaled = scaler_X.transform(X)
predicted_stats_scaled = model.predict(X_all_scaled)
predicted_stats = scaler_y.inverse_transform(predicted_stats_scaled)



In [4]:

predicted_df = pd.DataFrame(predicted_stats, columns=y.columns)
predicted_df['Player Name'] = data['Player Name']
predicted_df['Team'] = data['Team']
predicted_df['Player Role'] = data['Player Role']

for col in y.columns:
    if col in ['Runs Scored', 'Balls Faced', 'Balls Bowled', 'Runs Conceded']:
        predicted_df[col] = predicted_df[col].round().astype(int)
    elif col in ['Wickets Taken', 'Maidens Bowled']:
        predicted_df[col] = predicted_df[col].round(1)
    elif col == 'Overs Bowled':
        predicted_df[col] = predicted_df[col].round(1)

In [5]:

match_type = 'ODI'  
predicted_df['Predicted Fantasy Points'] = predicted_df.apply(lambda row: calculate_fantasy_points(row, match_type), axis=1)

def select_top_11_with_roles(df):
    sorted_players = df.sort_values(by='Predicted Fantasy Points', ascending=False)
    
    team = []
    roles_covered = set()
    
    for _, player in sorted_players.iterrows():
        if len(team) >= 11 and len(roles_covered) == len(label_enc.classes_):
            break
        if player['Player Role'] not in roles_covered or len(team) < 11:
            team.append(player)
            roles_covered.add(player['Player Role'])
    
    while len(team) < 11:
        for _, player in sorted_players.iterrows():
            if player not in team:
                team.append(player)
                break
    
    return pd.DataFrame(team)

In [9]:
top_players = select_top_11_with_roles(predicted_df)

print("Top 11 Players based on Predicted Fantasy Points:")
print(top_players[['Player Name', 'Team', 'Predicted Fantasy Points', 'Player Role']])

print("\nPredicted Stats for Top Players (per match):")
print(top_players[['Player Name', 'Runs Scored', 'Wickets Taken', 'Balls Faced', 'Balls Bowled', 
                   'Overs Bowled', 'Maidens Bowled', 'Runs Conceded']])


role_distribution = top_players['Player Role'].value_counts()
print(role_distribution)


Top 11 Players based on Predicted Fantasy Points:
           Player Name  Team  Predicted Fantasy Points  Player Role
20        Shreyas Iyer     0                189.000000            2
16        Sanju Samson     0                186.500000            2
12     Pathum Nissanka     1                 71.500000            0
6    Washington Sundar     0                 60.800001            3
5         Shubman Gill     0                 60.000000            0
18     Janith Liyanage     1                 56.500000            2
0          Virat Kohli     0                 50.400000            0
1         Rohit Sharma     0                 50.000000            0
27       Kuldeep Yadav     0                 47.400000            2
2         Rishabh Pant     0                 43.400000            2
17    Charith Asalanka     1                 42.000000            2
14  Maheesh Theekshana     1                 24.900000            1

Predicted Stats for Top Players (per match):
           Player Na

In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, LSTM, GRU, Input, Concatenate, Reshape

# Load dataset
data = pd.read_csv('data.csv')

# Encode categorical data
label_enc = LabelEncoder()
data['Player Role'] = label_enc.fit_transform(data['Player Role'])
data['Team'] = label_enc.fit_transform(data['Team'])

# Define features and target variables
features = ['Batting Average', 'Bowling Average', 'Strike Rate', 'Economy Rate', 
            'Centuries Scored', 'Half Centuries Scored', 'Ducks Scored', 
            'Wickets Taken Last Match', 'Runs Scored Last Match', 
            'Player Age', 'Player Experience', 'Player Role', 'Team']

X = data[features]
y = data[['Runs Scored', 'Wickets Taken', 'Balls Faced', 'Balls Bowled', 
          'Overs Bowled', 'Maidens Bowled', 'Runs Conceded']]

# Normalize target variables
for col in y.columns:
    y[col] = y[col] / data['Player Experience']

# Standardize features and target variables
scaler_X = StandardScaler()
scaler_y = StandardScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

# Create the model
def create_model(input_shape):
    input_layer = Input(shape=(input_shape,))
    
    reshaped_input = Reshape((input_shape, 1))(input_layer)
    
    lstm = LSTM(64, return_sequences=True)(reshaped_input)
    lstm = LSTM(32)(lstm)
    
    gru = GRU(64, return_sequences=True)(reshaped_input)
    gru = GRU(32)(gru)
    
    concat = Concatenate()([lstm, gru])
    
    dense1 = Dense(64, activation='relu')(concat)
    dense2 = Dense(32, activation='relu')(dense1)
    
    output = Dense(7)(dense2)  
    
    model = Model(inputs=input_layer, outputs=output)
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    
    return model

model = create_model(X_train.shape[1])
model.fit(X_train, y_train, epochs=50, batch_size=4, validation_data=(X_test, y_test))

# Fantasy points calculation function
def calculate_fantasy_points(row, match_type):
    points = 0
    points += 4  # Appearance points
    points += row['Runs Scored']
    points += row['Wickets Taken'] * 25 if match_type != 'Test' else row['Wickets Taken'] * 16
    
    # Duck penalty
    if row['Runs Scored'] == 0 and row['Balls Faced'] > 0:
        points -= 2 if match_type in ['T20', 'T10'] else 3 if match_type == 'ODI' else 4
    
    # Batting bonuses
    if row['Runs Scored'] >= 50:
        points += 8 if match_type == 'T20' else 4 if match_type in ['ODI', 'Test'] else 16
    if row['Runs Scored'] >= 100:
        points += 16 if match_type == 'T20' else 8
    
    # Bowling bonuses
    if row['Maidens Bowled'] > 0:
        points += row['Maidens Bowled'] * (12 if match_type == 'T20' else 4 if match_type == 'ODI' else 16)
    if row['Wickets Taken'] >= 4:
        points += 8 if match_type == 'T20' else 4
    if row['Wickets Taken'] >= 5:
        points += 16 if match_type == 'T20' else 8
    
    # Economy rate bonuses/penalties
    if row['Overs Bowled'] >= 2:
        economy_rate = row['Runs Conceded'] / row['Overs Bowled']
        if economy_rate < 6:
            points += 4
        elif economy_rate > 9:
            points -= 2
    
    return points

# Function to select top 11 players from specific teams
def get_top_11_players_for_teams(data, team_1, team_2, match_type='ODI'):
    # Filter data for the specified teams
    team_data = data[data['Team'].isin([team_1, team_2])]
    
    # Prepare the data for prediction
    X_team = team_data[features]
    X_team_scaled = scaler_X.transform(X_team)
    
    # Predict stats for the filtered dataset
    predicted_stats_scaled = model.predict(X_team_scaled)
    predicted_stats = scaler_y.inverse_transform(predicted_stats_scaled)
    
    predicted_df = pd.DataFrame(predicted_stats, columns=y.columns)
    predicted_df['Player Name'] = team_data['Player Name'].values
    predicted_df['Team'] = team_data['Team'].values
    predicted_df['Player Role'] = team_data['Player Role'].values
    
    for col in y.columns:
        if col in ['Runs Scored', 'Balls Faced', 'Balls Bowled', 'Runs Conceded']:
            predicted_df[col] = predicted_df[col].round().astype(int)
        elif col in ['Wickets Taken', 'Maidens Bowled']:
            predicted_df[col] = predicted_df[col].round(1)
        elif col == 'Overs Bowled':
            predicted_df[col] = predicted_df[col].round(1)
    
    predicted_df['Predicted Fantasy Points'] = predicted_df.apply(lambda row: calculate_fantasy_points(row, match_type), axis=1)
    
    return select_top_11_with_roles(predicted_df)

# Top 11 player selection function with role consideration
def select_top_11_with_roles(df):
    sorted_players = df.sort_values(by='Predicted Fantasy Points', ascending=False)
    
    team = []
    roles_covered = set()
    
    for _, player in sorted_players.iterrows():
        if len(team) >= 11 and len(roles_covered) == len(label_enc.classes_):
            break
        if player['Player Role'] not in roles_covered or len(team) < 11:
            team.append(player)
            roles_covered.add(player['Player Role'])
    
    while len(team) < 11:
        for _, player in sorted_players.iterrows():
            if player not in team:
                team.append(player)
                break
    
    return pd.DataFrame(team)

# Example usage
team_1 = 'Sri Lanka'  # You need to pass the correct label encoded value or modify the code to use team names directly
team_2 = 'Australia'  # You need to pass the correct label encoded value or modify the code to use team names directly

# Filter data based on team names
team_1_encoded = label_enc.transform([team_1])[0]
team_2_encoded = label_enc.transform([team_2])[0]

top_players = get_top_11_players_for_teams(data, team_1_encoded, team_2_encoded)

print("Top 11 Players based on Predicted Fantasy Points:")
print(top_players[['Player Name', 'Team', 'Predicted Fantasy Points', 'Player Role']])

print("\nPredicted Stats for Top Players (per match):")
print(top_players[['Player Name', 'Runs Scored', 'Wickets Taken', 'Balls Faced', 'Balls Bowled', 
                   'Overs Bowled', 'Maidens Bowled', 'Runs Conceded']])

role_distribution = top_players['Player Role'].value_counts()
print(role_distribution)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y[col] = y[col] / data['Player Experience']


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Top 11 Players based on Predicted Fantasy Points:
        Player Name  Team  Predicted Fantasy Points  Player Role
18   Mitchell Starc     0                 82.799998            3
16      Pat Cummins     0                 82.799998            3
14      Steve Smith     0                 80.000000            0
29      Nathan Lyon     0                 79.799998            3
15     David Warner     0         