In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, LSTM, GRU, Input, Concatenate, Reshape
import joblib
from tensorflow.keras.models import save_model

# Load and preprocess data
data = pd.read_csv('data.csv')

# Encode categorical columns
label_enc = LabelEncoder()
data['Player Role'] = label_enc.fit_transform(data['Player Role'])
data['Team'] = label_enc.fit_transform(data['Team'])

features = ['Batting Average', 'Bowling Average', 'Strike Rate', 'Economy Rate', 
            'Centuries Scored', 'Half Centuries Scored', 'Ducks Scored', 
            'Wickets Taken Last Match', 'Runs Scored Last Match', 
            'Player Age', 'Player Experience', 'Player Role', 'Team']

X = data[features]
y = data[['Runs Scored', 'Wickets Taken', 'Balls Faced', 'Balls Bowled', 
          'Overs Bowled', 'Maidens Bowled', 'Runs Conceded']]

# Normalize y values
for col in y.columns:
    y[col] = y[col] / data['Player Experience']

scaler_X = StandardScaler()
scaler_y = StandardScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

# Create and train model
def create_model(input_shape):
    input_layer = Input(shape=(input_shape,))
    reshaped_input = Reshape((input_shape, 1))(input_layer)
    
    lstm = LSTM(64, return_sequences=True)(reshaped_input)
    lstm = LSTM(32)(lstm)
    
    gru = GRU(64, return_sequences=True)(reshaped_input)
    gru = GRU(32)(gru)
    
    concat = Concatenate()([lstm, gru])
    dense1 = Dense(64, activation='relu')(concat)
    dense2 = Dense(32, activation='relu')(dense1)
    output = Dense(7)(dense2)  
    
    model = Model(inputs=input_layer, outputs=output)
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    return model

# Train the model
model = create_model(X_train.shape[1])
model.fit(X_train, y_train, epochs=50, batch_size=4, validation_data=(X_test, y_test))

# Save the model and scalers
model.save('trained_model.h5')
joblib.dump(scaler_X, 'scaler_X.pkl')
joblib.dump(scaler_y, 'scaler_y.pkl')
joblib.dump(label_enc, 'label_enc.pkl')





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y[col] = y[col] / data['Player Experience']




Epoch 1/50


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


  saving_api.save_model(


['label_enc.pkl']

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Dense, LSTM, GRU, Input, Concatenate, Reshape
import joblib
import os

# Load dataset
data = pd.read_csv('data.csv')

# Encode categorical data
label_enc = LabelEncoder()
data['Player Role'] = label_enc.fit_transform(data['Player Role'])
data['Team'] = label_enc.fit_transform(data['Team'])

# Define features and target variables
features = ['Batting Average', 'Bowling Average', 'Strike Rate', 'Economy Rate', 
            'Centuries Scored', 'Half Centuries Scored', 'Ducks Scored', 
            'Wickets Taken Last Match', 'Runs Scored Last Match', 
            'Player Age', 'Player Experience', 'Player Role', 'Team']

X = data[features]
y = data[['Runs Scored', 'Wickets Taken', 'Balls Faced', 'Balls Bowled', 
          'Overs Bowled', 'Maidens Bowled', 'Runs Conceded', 'Catch taken', 'Caught & Bowled', 'Stumping/Run Out (direct)', 'Run Out (Thrower/Catcher)']]

# Normalize target variables
for col in y.columns:
    y[col] = y[col] / data['Player Experience']

# Standardize features and target variables
scaler_X = StandardScaler()
scaler_y = StandardScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

# Create the model
def create_model(input_shape):
    input_layer = Input(shape=(input_shape,))
    
    reshaped_input = Reshape((input_shape, 1))(input_layer)
    
    lstm = LSTM(64, return_sequences=True)(reshaped_input)
    lstm = LSTM(32)(lstm)
    
    gru = GRU(64, return_sequences=True)(reshaped_input)
    gru = GRU(32)(gru)
    
    concat = Concatenate()([lstm, gru])
    
    dense1 = Dense(64, activation='relu')(concat)
    dense2 = Dense(32, activation='relu')(dense1)
    
    output = Dense(11)(dense2)  
    
    model = Model(inputs=input_layer, outputs=output)
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    
    return model
model = create_model(X_train.shape[1])
model.fit(X_train, y_train, epochs=50, batch_size=4, validation_data=(X_test, y_test))
model.save('trained_model.h5')
joblib.dump(scaler_X, 'scaler_X.joblib')
joblib.dump(scaler_y, 'scaler_y.joblib')
joblib.dump(label_enc, 'label_enc.joblib')




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y[col] = y[col] / data['Player Experience']




Epoch 1/50


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


  saving_api.save_model(


['label_enc.joblib']

In [1]:
######################################################################################################################

In [9]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, LSTM, GRU, Input, Concatenate, Reshape
import joblib
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2


# Load and preprocess the data
def load_and_preprocess_data(file_path):
    data = pd.read_csv(file_path)
    data = data.drop('file', axis=1)
    # Check for NaNs
    if data.isna().sum().sum() > 0:
        print("Data contains NaN values, filling them with 0.")
        data = data.fillna(0)  # Fill NaN values
    return data


def aggregate_player_stats(data):
    # Batter statistics
    batter_stats = data.groupby('batter').agg({
        'batter_runs': 'sum',
        'total_runs': 'sum',
        'wicket': 'sum',
        'team': lambda x: x.mode().iloc[0] if not x.mode().empty else 'Unknown'  # Most frequent team
    }).reset_index()
    batter_stats.columns = ['Player Name', 'Runs Scored', 'Total Runs', 'Outs', 'Team']
    batter_stats['Batting Average'] = batter_stats['Runs Scored'] / np.maximum(batter_stats['Outs'], 1)
    
    # Bowler statistics
    bowler_stats = data.groupby('bowler').agg({
        'total_runs': 'sum',
        'wicket': 'sum',
        'over': 'count',
        'team': lambda x: x.mode().iloc[0] if not x.mode().empty else 'Unknown'  # Most frequent team
    }).reset_index()
    bowler_stats.columns = ['Player Name', 'Runs Conceded', 'Wickets Taken', 'Balls Bowled', 'Team']
    bowler_stats['Bowling Average'] = bowler_stats['Runs Conceded'] / np.maximum(bowler_stats['Wickets Taken'], 1)
    bowler_stats['Economy Rate'] = (bowler_stats['Runs Conceded'] / bowler_stats['Balls Bowled']) * 6
    
    # Merge batter and bowler stats
    player_stats = pd.merge(batter_stats, bowler_stats, on=['Player Name', 'Team'], how='outer').fillna(0)
    
    # Calculate additional features
    player_stats['Strike Rate'] = (player_stats['Runs Scored'] / np.maximum(player_stats['Balls Bowled'], 1)) * 100
    player_stats['Player Experience'] = player_stats['Balls Bowled'] + player_stats['Total Runs']
    
    # Add a simple Player Role assignment (you might want to refine this)
    player_stats['Player Role'] = player_stats.apply(
        lambda row: 'Bowler' if row['Wickets Taken'] > 10 else 'Batsman' if row['Runs Scored'] > 500 else 'All-Rounder',
        axis=1
    )
    
    return player_stats
# Prepare features and target variables
def prepare_data(player_stats):
    features = ['Batting Average', 'Bowling Average', 'Strike Rate', 'Economy Rate', 'Player Experience']
    X = player_stats[features]
    y = player_stats[['Runs Scored', 'Wickets Taken', 'Balls Bowled', 'Runs Conceded']]
    
    # Normalize target variables
    for col in y.columns:
        y[col] = y[col] / player_stats['Player Experience']
    
    return X, y

# Create the model
def create_model(input_shape):
    input_layer = Input(shape=(input_shape,))
    
    reshaped_input = Reshape((1, input_shape))(input_layer)
    
    lstm = LSTM(64, return_sequences=True, kernel_regularizer=l2(0.001))(reshaped_input)
    lstm = LSTM(32, kernel_regularizer=l2(0.001))(lstm)
    
    gru = GRU(64, return_sequences=True, kernel_regularizer=l2(0.001))(reshaped_input)
    gru = GRU(32, kernel_regularizer=l2(0.001))(gru)
    
    concat = Concatenate()([lstm, gru])
    
    dense1 = Dense(64, activation='relu', kernel_regularizer=l2(0.001))(concat)
    dense2 = Dense(32, activation='relu', kernel_regularizer=l2(0.001))(dense1)
    
    output = Dense(4)(dense2)  # 4 output variables
    
    model = Model(inputs=input_layer, outputs=output)
    model.compile(optimizer=Adam(learning_rate=0.0001), loss='mean_squared_error', metrics=['mae'])
    
    return model


# Main function
def main():
    # Load and preprocess data
    data = load_and_preprocess_data('../combined_standardized_innings_output_of_all_types.csv')
    player_stats = aggregate_player_stats(data)
    player_stats.to_csv('aggregated_player_stats.csv', index=False)
    # Prepare features and target variables
    X, y = prepare_data(player_stats)
    
    # Scale the data
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()
    X_scaled = scaler_X.fit_transform(X)
    y_scaled = scaler_y.fit_transform(y)
    
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)
    # Check for NaN or infinite values in features and target variables
    print(np.isnan(X_train).sum())
    print(np.isnan(y_train).sum())
    print(np.isinf(X_train).sum())
    print(np.isinf(y_train).sum())

    # Replace NaNs or Infs with zeros or an appropriate value
    X_train = np.nan_to_num(X_train, nan=0.0, posinf=0.0, neginf=0.0)
    y_train = np.nan_to_num(y_train, nan=0.0, posinf=0.0, neginf=0.0)

    # Create and train the model
    model = create_model(X_train.shape[1])
    model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))
    # Perform a test prediction to check the output range
    test_pred = model.predict(X_train[:10])
    print(test_pred)

    # Save the model and scalers
    model.save('trained_model.h5')
    joblib.dump(scaler_X, 'scaler_X.joblib')
    joblib.dump(scaler_y, 'scaler_y.joblib')

if __name__ == "__main__":
    main()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y[col] = y[col] / player_stats['Player Experience']


0
1192
0
0
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
[[ 0.9433084  -0.59965163 -0.89823    -0.7873746 ]
 [ 0.18528947 -0.23960485 -0.40633178 -0.50696844]
 [ 0.8327592  -0.60947573 -0.89952636 -0.7773478 ]
 [ 0.96917945 -0.59585637 -0.8871594  -0.7829058 ]
 [ 1.0292917  -0.5996789  -0.8986306  -0.79144573]
 [-1.110804    1.0852158   1.1364818   0.5346854 ]
 [ 0.9943293  -0.5767232  -0.90740913 -0.7782223 ]
 [ 0.96315295 -0.59676737 -0.8924644  -0.

  saving_api.save_model(
