In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, LSTM, GRU, Input, Concatenate, Reshape


data = pd.read_csv('data.csv')


label_enc = LabelEncoder()
data['Player Role'] = label_enc.fit_transform(data['Player Role'])
data['Team'] = label_enc.fit_transform(data['Team'])

features = ['Batting Average', 'Bowling Average', 'Strike Rate', 'Economy Rate', 
            'Centuries Scored', 'Half Centuries Scored', 'Ducks Scored', 
            'Wickets Taken Last Match', 'Runs Scored Last Match', 
            'Player Age', 'Player Experience', 'Player Role', 'Team']

X = data[features]
y = data[['Runs Scored', 'Wickets Taken', 'Balls Faced', 'Balls Bowled', 
          'Overs Bowled', 'Maidens Bowled', 'Runs Conceded']]


for col in y.columns:
    y[col] = y[col] / data['Player Experience']

scaler_X = StandardScaler()
scaler_y = StandardScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y[col] = y[col] / data['Player Experience']


In [7]:
def create_model(input_shape):
    input_layer = Input(shape=(input_shape,))
    
    reshaped_input = Reshape((input_shape, 1))(input_layer)
    
    lstm = LSTM(64, return_sequences=True)(reshaped_input)
    lstm = LSTM(32)(lstm)
    
    gru = GRU(64, return_sequences=True)(reshaped_input)
    gru = GRU(32)(gru)
    
    concat = Concatenate()([lstm, gru])
    
    dense1 = Dense(64, activation='relu')(concat)
    dense2 = Dense(32, activation='relu')(dense1)
    
    output = Dense(7)(dense2)  
    
    model = Model(inputs=input_layer, outputs=output)
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    
    return model


model = create_model(X_train.shape[1])
model.fit(X_train, y_train, epochs=50, batch_size=4, validation_data=(X_test, y_test))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x1a0529b6050>

In [8]:
def calculate_fantasy_points(row, match_type):
    points = 0
    
    
    points += 4  
    points += row['Runs Scored']
    points += row['Wickets Taken'] * 25 if match_type != 'Test' else row['Wickets Taken'] * 16
    
    
    if row['Runs Scored'] == 0 and row['Balls Faced'] > 0:
        points -= 2 if match_type in ['T20', 'T10'] else 3 if match_type == 'ODI' else 4
    
    
    if row['Runs Scored'] >= 50:
        points += 8 if match_type == 'T20' else 4 if match_type in ['ODI', 'Test'] else 16
    if row['Runs Scored'] >= 100:
        points += 16 if match_type == 'T20' else 8
    
    if row['Maidens Bowled'] > 0:
        points += row['Maidens Bowled'] * (12 if match_type == 'T20' else 4 if match_type == 'ODI' else 16)
    
    if row['Wickets Taken'] >= 4:
        points += 8 if match_type == 'T20' else 4
    if row['Wickets Taken'] >= 5:
        points += 16 if match_type == 'T20' else 8
    
    
    if row['Overs Bowled'] >= 2:
        economy_rate = row['Runs Conceded'] / row['Overs Bowled']
        if economy_rate < 6:
            points += 4
        elif economy_rate > 9:
            points -= 2
    
    return points


X_all_scaled = scaler_X.transform(X)
predicted_stats_scaled = model.predict(X_all_scaled)
predicted_stats = scaler_y.inverse_transform(predicted_stats_scaled)



In [9]:

predicted_df = pd.DataFrame(predicted_stats, columns=y.columns)
predicted_df['Player Name'] = data['Player Name']
predicted_df['Team'] = data['Team']
predicted_df['Player Role'] = data['Player Role']

for col in y.columns:
    if col in ['Runs Scored', 'Balls Faced', 'Balls Bowled', 'Runs Conceded']:
        predicted_df[col] = predicted_df[col].round().astype(int)
    elif col in ['Wickets Taken', 'Maidens Bowled']:
        predicted_df[col] = predicted_df[col].round(1)
    elif col == 'Overs Bowled':
        predicted_df[col] = predicted_df[col].round(1)

In [10]:

match_type = 'ODI'  
predicted_df['Predicted Fantasy Points'] = predicted_df.apply(lambda row: calculate_fantasy_points(row, match_type), axis=1)

def select_top_11_with_roles(df):
    sorted_players = df.sort_values(by='Predicted Fantasy Points', ascending=False)
    
    team = []
    roles_covered = set()
    
    for _, player in sorted_players.iterrows():
        if len(team) >= 11 and len(roles_covered) == len(label_enc.classes_):
            break
        if player['Player Role'] not in roles_covered or len(team) < 11:
            team.append(player)
            roles_covered.add(player['Player Role'])
    
    while len(team) < 11:
        for _, player in sorted_players.iterrows():
            if player not in team:
                team.append(player)
                break
    
    return pd.DataFrame(team)

In [11]:
top_players = select_top_11_with_roles(predicted_df)

print("Top 11 Players based on Predicted Fantasy Points:")
print(top_players[['Player Name', 'Team', 'Predicted Fantasy Points', 'Player Role']])

print("\nPredicted Stats for Top Players (per match):")
print(top_players[['Player Name', 'Runs Scored', 'Wickets Taken', 'Balls Faced', 'Balls Bowled', 
                   'Overs Bowled', 'Maidens Bowled', 'Runs Conceded']])


role_distribution = top_players['Player Role'].value_counts()
print(role_distribution)


Top 11 Players based on Predicted Fantasy Points (with role validation):
           Player Name  Team  Predicted Fantasy Points  Player Role
20        Shreyas Iyer     0                185.000000            2
16        Sanju Samson     0                170.000000            2
18     Janith Liyanage     1                 71.500000            2
27       Kuldeep Yadav     0                 70.500001            2
6    Washington Sundar     0                 63.400001            3
12     Pathum Nissanka     1                 61.000000            0
5         Shubman Gill     0                 52.000000            0
0          Virat Kohli     0                 48.000000            0
1         Rohit Sharma     0                 48.000000            0
28      Mohammed Siraj     0                 47.500001            2
2         Rishabh Pant     0                 47.500000            2
14  Maheesh Theekshana     1                 22.900000            1

Predicted Stats for Top Players (per match