In [23]:
import torch
import torch.nn as nn
import warnings
from sklearn.preprocessing import StandardScaler
import joblib
warnings.filterwarnings("ignore")


class LargeNet(nn.Module):
    def __init__(self, input_size):
        super(LargeNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.relu1 = nn.ReLU()

        self.fc2 = nn.Linear(512, 256)
        self.bn2 = nn.BatchNorm1d(256)
        self.relu2 = nn.ReLU()

        self.fc3 = nn.Linear(256, 128)
        self.bn3 = nn.BatchNorm1d(128)
        self.relu3 = nn.ReLU()

        self.fc4 = nn.Linear(128, 64)
        self.bn4 = nn.BatchNorm1d(64)
        self.relu4 = nn.ReLU()

        self.fc5 = nn.Linear(64, 32)
        self.bn5 = nn.BatchNorm1d(32)
        self.relu5 = nn.ReLU()

        self.fc6 = nn.Linear(32, 1)  # Output layer

    def forward(self, x):
        out = self.fc1(x)
        out = self.bn1(out)
        out = self.relu1(out)

        out = self.fc2(out)
        out = self.bn2(out)
        out = self.relu2(out)

        out = self.fc3(out)
        out = self.bn3(out)
        out = self.relu3(out)

        out = self.fc4(out)
        out = self.bn4(out)
        out = self.relu4(out)

        out = self.fc5(out)
        out = self.bn5(out)
        out = self.relu5(out)


        out = self.fc6(out)  # No activation in output layer (for regression tasks)
        return out

def mae_loss(y_pred, y_true):
    return torch.mean(torch.abs(y_pred - y_true))

def rmse_loss(y_pred, y_true):
    return torch.sqrt(torch.mean((y_pred - y_true) ** 2))

def get_features(player_id, date, match_type):
    import pandas as pd
    import numpy as np
    import os

    # Constants
    cat_1_columns = [
        "boundaries", "sixes", "fifties", "hundreds", "ducks", "thirty_run_innings",
        "caught", "run out", "direct", "stumped", "3+catches", "wickets_taken",
        "3wickets_haul", "5wickets_haul", "maiden_overs", "wickets_lbw_bowled"
    ]
    cat_1_windows = [10, 30, 50]  # Sparse
    cat_2_columns = [
        "dot_balls", "total_runs", "balls_faced", "strike_rate", "runs_conceded",
        "balls_bowled", "economy_rate", "dots", "bowling_average"
    ]
    cat_2_windows = [3, 5, 7]  # Dense
    ewma_alphas = [0.5, 0.7, 0.9]
    format_mapping = {"it20": "t20", "mdm": "test", "odm": "odi"}

    # Read the player data
    input_folder = "../data/processed/playerwise/"
    file_path = os.path.join(input_folder, f"{player_id}.csv")

    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Player data file not found: {file_path}")

    df = pd.read_csv(file_path)

    # Normalize 'match_type' to lowercase and map formats
    if 'match_type' in df.columns:
        df["match_type"] = df["match_type"].str.lower()
        df["revised_format"] = df["match_type"].map(format_mapping).fillna(df["match_type"])
    else:
        raise ValueError("match_type column not found in the data.")

    # Convert 'date' column to datetime
    if 'date' in df.columns:
        df['date'] = pd.to_datetime(df['date'])
        date = pd.to_datetime(date)
        df = df[df['date'] < date]  # Filter data before the given date
    else:
        raise ValueError("Date column not found in the data; cannot filter by date.")

    # Filter by 'revised_format'
    # print(df)
    match_type_lower = match_type.lower()
    match_type_mapped = format_mapping.get(match_type_lower, match_type_lower)
    df = df[df['revised_format'] == match_type_mapped]

    # Sort by date
    df.sort_values('date', inplace=True)

    if df.empty:
        feature_columns = []
        for window in cat_1_windows:
            for col in cat_1_columns:
                feature_columns.append(f"{col}_hcma_w{window}")
        for window in cat_2_windows:
            for col in cat_2_columns:
                for alpha in ewma_alphas:
                    feature_columns.append(f"{col}_ewma_w{window}_alpha{alpha}")
        features = pd.DataFrame(columns=['player_role'] + feature_columns)
        features.loc[0] = [np.nan] * (len(feature_columns) + 1)
        features['player_role'] = 'new'
        features.to_csv(f"{player_id}_features.csv", index=False)  # Save to CSV
        return features.iloc[0]

    # Determine player role
    total_matches = len(df)
    bowled_matches = df[df["balls_bowled"] > 0].shape[0]
    batted_matches = df[df["balls_faced"] > 0].shape[0]

    is_bowler = bowled_matches / total_matches >= 0.25
    is_batsman = batted_matches / total_matches >= 0.25

    if is_bowler and is_batsman:
        player_role = "all-rounder"
    elif is_bowler:
        player_role = "bowler"
    elif is_batsman:
        player_role = "batsman"
    else:
        player_role = "new"

    # Compute features
    results = df.copy()
    # print (results)
    # Cat-1 (Sparse: HCMA)
    cat1_features = {}
    for window in cat_1_windows:
        for col in cat_1_columns:
            col_name = f"{col}_hcma_w{window}"
            if col in df.columns:
                cat1_features[col_name] = (
                    df.groupby('revised_format')[col]
                    .transform(lambda s: s.shift(1).rolling(window=window, min_periods=1).mean())
                )
            else:
                cat1_features[col_name] = np.nan
    results = pd.concat([results, pd.DataFrame(cat1_features)], axis=1)

    # Cat-2 (Dense: EWMA)
    #print(df)
    cat2_features = {}
    for window in cat_2_windows:
        for col in cat_2_columns:
            for alpha in ewma_alphas:
                col_name = f"{col}_ewma_w{window}_alpha{alpha}"
                if col in df.columns:
                    cat2_features[col_name] = df.groupby('revised_format')[col].transform(
                        lambda s: s.shift(1).rolling(window=window, min_periods=1).apply(
                            lambda x: x.ewm(alpha=alpha, adjust=False).mean().iloc[-1] if len(x) > 0 else np.nan
                        )
                    )
                else:
                    cat2_features[col_name] = np.nan
    results = pd.concat([results, pd.DataFrame(cat2_features)], axis=1)

    # Feature columns
    # print(results)
    feature_columns = list(cat1_features.keys()) + list(cat2_features.keys())

    # Get the last row of features
    last_row = results.iloc[-1][feature_columns]
    last_row['player_role'] = player_role

    # Save to CSV
    output_file = f"{player_id}_features.csv"
    last_row.to_frame().T.to_csv(output_file, index=False)
    print(f"Features saved to {output_file}")

    return last_row


def model_inference(last_row):
    import pandas as pd
    import torch

    # Convert last_row (Series) to DataFrame
    features_df = last_row.to_frame().T  # Transpose to get a DataFrame

    # Define the feature columns
    cat_1_columns = [
        "boundaries",
        "sixes",
        "fifties",
        "hundreds",
        "ducks",
        "thirty_run_innings",
        "caught",
        "run out",
        "direct",
        "stumped",
        "3+catches",
        "wickets_taken",
        "3wickets_haul",
        "5wickets_haul",
        "maiden_overs",
        "wickets_lbw_bowled",
    ]

    cat_2_columns = [
        "dot_balls",
        "total_runs",
        "balls_faced",
        "strike_rate",
        "runs_conceded",
        "balls_bowled",
        "economy_rate",
        "dots",
        "bowling_average",
    ]

    numerical_features = []
    # Construct feature names as per training
    window = 30
    for col in cat_1_columns:
        col_name = f"{col}_hcma_w{window}"
        numerical_features.append(col_name)

    window = 5
    alpha = 0.7
    for col in cat_2_columns:
        col_name = f"{col}_ewma_w{window}_alpha{alpha}"
        numerical_features.append(col_name)

    # Ensure that all necessary features are present
    missing_cols = set(numerical_features) - set(features_df.columns)
    if missing_cols:
        print(f"Warning: Missing columns in input data: {missing_cols}")
        # Handle missing columns (e.g., fill with zeros)
        for col in missing_cols:
            features_df[col] = 0  # Adjust as necessary for your use case
    X = features_df[numerical_features].copy()
    # print("X:\n", X)
    scaler_fit = joblib.load(f"../data/interim/scaler/scaler_{match_type}.save") 
    print(features_df[numerical_features])

    features_df[numerical_features] = scaler_fit.transform(
        features_df[numerical_features]
    )
    print("---------------------------------")
    print(features_df[numerical_features])
    # print(features_df[numerical_features].shape)
    print("---------------------------------")

    # Select and prepare the feature columns
    X = features_df[numerical_features].copy()
    # print("X:\n", X)
    X = X.astype(float)

    # Convert to torch tensor
    X_tensor = torch.tensor(X.values, dtype=torch.float32)

    # Load the ANN model
    # Placeholder for model loading
    # Adjust input_size and other parameters according to your model
    input_size = X_tensor.shape[1]
    model = LargeNet(input_size=input_size)
    model.load_state_dict(torch.load('../model_artifacts/Product_UI_t20_Model.pth'))
    model.eval()

    # Perform inference
    with torch.no_grad():
        outputs = model(X_tensor)  # Get raw model outputs, which are the predicted scores
    
    predicted_scores = outputs  # These are your predicted regression scores

    # Return the prediction
    return predicted_scores




def predict(team1, team2, team_players1, team_players2, match_date, match_type):
    """
    Predicts the top 11 players based on the model's inference.
    
    Args:
        team1 (str): Name of Team 1.
        team2 (str): Name of Team 2.
        team_players1 (list[dict]): List of players in Team 1.
        team_players2 (list[dict]): List of players in Team 2.
        match_date (str): Date of the match.
        match_type (str): Type of the match.

    Returns:
        list[dict]: List of top 11 players' IDs and their predicted scores.
    """
    all_players = team_players1 + team_players2
    player_scores = []

    # Iterate through all players to compute their scores
    for player in all_players:
        player_id = player["id"]
        features = get_features(player_id, match_date, match_type)
        score = model_inference(features)  # Simulate model inference
        player_scores.append({"id": player_id, "score": score})

    # Sort players by score in descending order
    player_scores.sort(key=lambda x: x["score"], reverse=True)

    # Return the top 11 players with their scores
    return player_scores[:11]

# Example usage
team1 = "Bangladesh"
team2 = "India"
team_players2 = []
team_players1 = [{"id": "0404d43c", "name": "Player A1", "alt_name": "", "image": ""}]
match_date = "2024-10-09"
match_type = "T20"

top_players = predict(team1, team2, team_players1, team_players2, match_date, match_type)
print(top_players)

Features saved to 0404d43c_features.csv
    boundaries_hcma_w30 sixes_hcma_w30 fifties_hcma_w30 hundreds_hcma_w30  \
172            2.133333       0.766667              0.1               0.0   

    ducks_hcma_w30 thirty_run_innings_hcma_w30 caught_hcma_w30  \
172       0.066667                    0.166667        0.733333   

    run out_hcma_w30 direct_hcma_w30 stumped_hcma_w30  ...  \
172         0.033333             0.0         0.166667  ...   

    wickets_lbw_bowled_hcma_w30 dot_balls_ewma_w5_alpha0.7  \
172                         0.0                     7.4477   

    total_runs_ewma_w5_alpha0.7 balls_faced_ewma_w5_alpha0.7  \
172                     13.1818                      13.1491   

    strike_rate_ewma_w5_alpha0.7 runs_conceded_ewma_w5_alpha0.7  \
172                   111.195769                            0.0   

    balls_bowled_ewma_w5_alpha0.7 economy_rate_ewma_w5_alpha0.7  \
172                           0.0                           0.0   

    dots_ewma_w5_alpha0