In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

# Load the data
df = pd.read_csv("./output/master_dataframe.csv", index_col=0)

# Feature engineering
df["player_runs"] = df["runs.batter"]
df["player_wickets"] = np.where(df["wicket.kind"].notna(), 1, 0)
df["player_fours"] = np.where(df["runs.batter"] == 4, 1, 0)
df["player_sixes"] = np.where(df["runs.batter"] == 6, 1, 0)

# One-hot encode the categorical variables
categorical_cols = ["team_1", "team_2", "venue","bowler","non_striker"]
df = pd.get_dummies(df, columns=categorical_cols)

# Group the data by player and calculate aggregated features
agg_func={
    col: "sum"
    for col in df.columns
    if col.startswith("team_") or col.startswith("venue_") or col in ["player_runs", "player_wickets", "player_fours", "player_sixes"]
}
agg_func["over"]="mean"

player_stats = (
    df.groupby("batter")[
        ["over","player_runs", "player_wickets", "player_fours", "player_sixes"]
        + [
            col
            for col in df.columns
            if col.startswith("team_") or col.startswith("venue_")
        ]
    ]
    .agg(agg_func)
    .reset_index()
)

In [3]:
player_stats.columns

Index(['batter', 'team_1_runs.total', 'team_2_runs.total', 'team_1_over',
       'team_2_over', 'team_1_runs.extras', 'team_2_runs.extras',
       'team_1_extras.legbyes', 'team_2_extras.legbyes', 'team_1_extras.wides',
       'team_2_extras.wides', 'team_1_extras.byes', 'team_2_extras.byes',
       'team_1_extras.noballs', 'team_2_extras.noballs', 'player_runs',
       'player_wickets', 'player_fours', 'player_sixes',
       'team_1_Chennai Super Kings', 'team_1_Deccan Chargers',
       'team_1_Delhi Capitals', 'team_1_Delhi Daredevils',
       'team_1_Gujarat Lions', 'team_1_Gujarat Titans',
       'team_1_Kings XI Punjab', 'team_1_Kochi Tuskers Kerala',
       'team_1_Kolkata Knight Riders', 'team_1_Lucknow Super Giants',
       'team_1_Mumbai Indians', 'team_1_Pune Warriors', 'team_1_Punjab Kings',
       'team_1_Rajasthan Royals', 'team_1_Rising Pune Supergiant',
       'team_1_Rising Pune Supergiants', 'team_1_Royal Challengers Bangalore',
       'team_1_Royal Challengers Bengalu

In [4]:
# Merge the aggregated player stats back into the main DataFrame
df = pd.merge(df, player_stats, on="batter", how="left",suffixes=["_original",""])

In [5]:
df.columns

Index(['date', 'match_number', 'innings', 'over_original', 'batter', 'stage',
       'runs.batter', 'runs.extras', 'runs.total', 'extras.legbyes',
       ...
       'venue_ShaheedVeerNarayanSinghInternationalStadium',
       'venue_SharjahCricketStadium', 'venue_SheikhZayedStadium',
       'venue_StGeorge'sPark', 'venue_SubrataRoySaharaStadium',
       'venue_SuperSportPark', 'venue_VidarbhaCricketAssociationStadium',
       'venue_WankhedeStadium', 'venue_ZayedCricketStadium', 'over'],
      dtype='object', length=1426)

In [32]:
"player_fours" in df.columns

True

In [33]:
df.fillna(0,inplace=True)

In [None]:
df['']

In [34]:
type(df['wicket.kind'][0])

int

In [None]:


# Define the target variable (fantasy points)
df["fantasy_points"] = (
    df["runs.batter"]
    + ((df["wicket.kind"]!=0) * -10)
    +(df['player_wickets'] * -1)
    + (df["player_fours"] * 4)
    + (df["player_sixes"] * 6)
)

# Split the data into training and testing sets
X = df[
    ["player_runs", "player_wickets", "player_fours", "player_sixes","over"]
    + [col for col in df.columns if col.startswith("team_") or col.startswith("venue_")]
]
y = df["fantasy_points"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train the models
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_y_pred = rf_model.predict(X_test)
rf_mse = mean_squared_error(y_test, rf_y_pred)
rf_r2 = r2_score(y_test, rf_y_pred)
print(f"Random Forest MSE: {rf_mse:.2f}")
print(f"Random Forest R-squared: {rf_r2:.2f}")

# gb_model = GradientBoostingRegressor(n_estimators=100, random_state=42)
# gb_model.fit(X_train, y_train)
# gb_y_pred = gb_model.predict(X_test)
# gb_mse = mean_squared_error(y_test, gb_y_pred)
# gb_r2 = r2_score(y_test, gb_y_pred)
# print(f"Gradient Boosting MSE: {gb_mse:.2f}")
# print(f"Gradient Boosting R-squared: {gb_r2:.2f}")

# nn_model = MLPRegressor(hidden_layer_sizes=(64, 32), random_state=42)
# nn_model.fit(X_train, y_train)
# nn_y_pred = nn_model.predict(X_test)
# nn_mse = mean_squared_error(y_test, nn_y_pred)
# nn_r2 = r2_score(y_test, nn_y_pred)
# print(f"Neural Network MSE: {nn_mse:.2f}")
# print(f"Neural Network R-squared: {nn_r2:.2f}")

# Cross-Validation
kf = KFold(n_splits=5, random_state=42, shuffle=True)
rf_scores = cross_val_score(rf_model, X, y, cv=kf, scoring="r2")
print(f"Random Forest R-squared (CV): {rf_scores.mean():.2f}")

# Player Insights Visualization
top_players = (
    df.groupby("batter")["fantasy_points"].mean().sort_values(ascending=False).head(20)
)
plt.figure(figsize=(12, 6))
top_players.plot(kind="bar")
plt.title("Top 20 Players by Average Fantasy Points")
plt.xlabel("Player")
plt.ylabel("Average Fantasy Points")
plt.show()

Random Forest MSE: 9.65
Random Forest R-squared: 1.00


In [8]:
df['wicket.kind'].value_counts()

wicket.kind
caught                   8053
bowled                   2204
run out                  1107
lbw                       798
caught and bowled         367
stumped                   358
retired hurt               15
hit wicket                 15
obstructing the field       3
retired out                 3
Name: count, dtype: int64

In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the data
df = pd.read_csv("./output/master_dataframe.csv")

# Feature engineering
df["player_runs"] = df["runs.batter"]
df["player_catches"] = df["fielder"].notna().astype(int)
df["player_stumpings"] = np.where(df["wicket.kind"] == "stumped", 1, 0)
df["player_runouts"] = np.where(df["dismissal.type"] == "run out", 1, 0)

# Group the bowling data by bowler
bowler_stats = (
    df.groupby("bowler")[["wicket.kind"]]
    .apply(lambda x: x.value_counts())
    .unstack(fill_value=0)
    .reset_index()
)
bowler_stats.columns = [
    "bowler",
    "bowler_wickets",
    "bowler_maidens",
    "bowler_runs",
    "bowler_overs",
]
bowler_stats["bowler_economy"] = (
    bowler_stats["bowler_runs"] / bowler_stats["bowler_overs"]
)

# One-hot encode the categorical variables
categorical_cols = ["team_1", "team_2", "venue"]
df = pd.get_dummies(df, columns=categorical_cols)

# Merge the bowling stats back into the main DataFrame
df = pd.merge(df, bowler_stats, on="bowler", how="left").fillna(0)

# Define the target variable (fantasy points)
df["fantasy_points"] = (
    df["runs.batter"]
    + (df["player_wickets"] * 25)
    + (df["player_fours"] * 4)
    + (df["player_sixes"] * 6)
    + (df["player_catches"] * 10)
    + (df["player_stumpings"] * 15)
    + (df["player_runouts"] * 10)
    + (df["bowler_wickets"] * 20)
    - (df["bowler_runs"] / df["bowler_overs"] * 10)
)

# Split the data into training and testing sets
X = df[
    [
        "player_runs",
        "player_wickets",
        "player_fours",
        "player_sixes",
        "player_catches",
        "player_stumpings",
        "player_runouts",
        "bowler_wickets",
        "bowler_economy",
    ]
    + [col for col in df.columns if col.startswith("team_") or col.startswith("venue_")]
]
y = df["fantasy_points"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train the models
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

gb_model = GradientBoostingRegressor(n_estimators=100, random_state=42)
gb_model.fit(X_train, y_train)

nn_model = MLPRegressor(hidden_layer_sizes=(64, 32), random_state=42)
nn_model.fit(X_train, y_train)


def get_fantsy_players(team, opponent, venue):
    # Prepare the input data
    input_data = pd.DataFrame(
        {"team_1": [team], "team_2": [opponent], "venue": [venue]}
    )
    input_data = pd.get_dummies(input_data, columns=["team_1", "team_2", "venue"])
    input_data = pd.merge(
        input_data,
        df[
            [
                "batter",
                "bowler",
                "player_runs",
                "player_wickets",
                "player_fours",
                "player_sixes",
                "player_catches",
                "player_stumpings",
                "player_runouts",
                "bowler_wickets",
                "bowler_economy",
            ]
            + [
                col
                for col in df.columns
                if col.startswith("team_") or col.startswith("venue_")
            ]
        ],
        on=["batter", "bowler"],
        how="left",
    )
    input_data = input_data.fillna(0)

    # Use the trained models to predict fantasy points
    input_data["rf_predicted_points"] = rf_model.predict(input_data)
    input_data["gb_predicted_points"] = gb_model.predict(input_data)
    input_data["nn_predicted_points"] = nn_model.predict(input_data)

    # Combine the predictions and sort the players
    input_data["final_predicted_points"] = (
        input_data["rf_predicted_points"]
        + input_data["gb_predicted_points"]
        + input_data["nn_predicted_points"]
    ) / 3
    dream11_team = (
        input_data.sort_values("final_predicted_points", ascending=False)
        .head(11)["batter"]
        .tolist()
    )

    # Ensure a balanced team with proper batting order
    batsmen = [player for player in dream11_team if 'WK' not in df.loc[df['batter'] == player, 'position'].item() and 'BOWL' not in df.loc[df['batter'] == player, 'position'].item()]
    bowlers = [player for player in dream11_team if 'BOWL' in df.loc[df['batter'] == player, 'position'].item()]
    keepers = [player for player in dream11_team if 'WK' in df.loc[df['batter'] == player, 'position'].item()]
    all_rounders = [player for player in dream11_team if 'WK' not in df.loc[df['batter'] == player, 'position'].item() and 'BOWL' in df.loc[df['batter'] == player, 'position'].item()]

    # Arrange the players in the correct batting order
    balanced_team = batsmen[:4] + keepers + batsmen[4:] + bowlers + all_rounders
    return balanced_team

KeyError: 'fielder'

In [11]:
df.columns

Index(['Unnamed: 0', 'date', 'match_number', 'innings', 'over', 'batter',
       'bowler', 'stage', 'non_striker', 'runs.batter', 'runs.extras',
       'runs.total', 'extras.legbyes', 'extras.wides', 'extras.byes',
       'extras.noballs', 'wicket.kind', 'wicket.player_out', 'wicket.fielders',
       'season', 'city', 'team_1', 'team_2', 'team_1_runs.total',
       'team_2_runs.total', 'match_winner', 'player_of_match', 'team_1_over',
       'team_2_over', 'toss_winner', 'toss_decision', 'team_1_runs.extras',
       'team_2_runs.extras', 'team_1_extras.legbyes', 'team_2_extras.legbyes',
       'team_1_extras.wides', 'team_2_extras.wides', 'team_1_extras.byes',
       'team_2_extras.byes', 'team_1_extras.noballs', 'team_2_extras.noballs',
       'officials_match_referees', 'officials_reserve_umpires',
       'officials_tv_umpires', 'officials_umpires_1', 'officials_umpires_2',
       'outcome_by_wickets', 'venue', 'outcome_by_runs', 'event_stage',
       'outcome_method', 'match_extras'

In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the data
df = pd.read_csv("./output/master_dataframe.csv", index_col=0)

# Group players by team
team_players = df.groupby(["team_1", "team_2", "date"])["batter"].unique().reset_index()
team_players["team_1_players"] = team_players["batter"].apply(
    lambda x: x if isinstance(x, list) else x
)
team_players["team_2_players"] = team_players["batter"].apply(
    lambda x: x if isinstance(x, list) else x
)
team_players = team_players.drop("batter", axis=1)

# Feature engineering
df["player_runs"] = df["runs.batter"]
df["player_wickets"] = np.where(df["wicket.kind"].notna(), 1, 0)
df["player_fours"] = np.where(df["runs.batter"] == 4, 1, 0)
df["player_sixes"] = np.where(df["runs.batter"] == 6, 1, 0)
df["player_catches"] = df["wicket.fielders"].str.split(",").str.len()
df["player_stumpings"] = np.where(df["wicket.kind"] == "stumped", 1, 0)
df["player_runouts"] = np.where(df["wicket.kind"] == "run out", 1, 0)
team1= df['team_1']
team2 = df['team_2']
venue = df['venue']

# Group the bowling data by bowler
bowler_stats = (
    df.groupby("bowler")[["wicket.kind", "runs.batter"]]
    .agg({"wicket.kind": "count", "runs.batter": "sum"})
    .reset_index()
)
bowler_stats.columns = [
    "bowler",
    "bowler_wickets",
    "bowler_runs",
]
bowler_stats["bowler_overs"] = (
    df.groupby("bowler")["over"].count().reset_index()["over"]
) / 6
bowler_stats["bowler_economy"] = (
    bowler_stats["bowler_runs"] / bowler_stats["bowler_overs"]
)

# One-hot encode the categorical variables
categorical_cols = ["team_1", "team_2", "venue"]
df = pd.get_dummies(df, columns=categorical_cols)

# Merge the bowling stats back into the main DataFrame
df = pd.merge(df, bowler_stats, on="bowler", how="left")
df = df.fillna(0)

# Define the target variable (fantasy points)
df["fantasy_points"] = (
    df["runs.batter"]
    + (df["bowler_wickets"] * 20)
    + (df["player_fours"] * 4)
    + (df["player_sixes"] * 6)
    + (df["player_catches"] * 10)
    + (df["player_stumpings"] * 15)
    + (df["player_runouts"] * 10)
    - (df["bowler_runs"] / df["bowler_overs"] * 10)
)
df['team1'] = team1
df['team2'] = team2
df['venue'] = venue

In [17]:
c =[col for col in  list(df.columns) if col.startswith("venue_")]

In [18]:
c

['venue_ArunJaitleyStadium',
 'venue_BarabatiStadium',
 'venue_BarsaparaCricketStadium',
 'venue_BharatRatnaShriAtalBihariVajpayeeEkanaCricketStadium',
 'venue_BrabourneStadium',
 'venue_BuffaloPark',
 'venue_DeBeersDiamondOval',
 'venue_DrDYPatilSportsAcademy',
 'venue_DrYSRajasekharaReddyACA-VDCACricketStadium',
 'venue_DubaiInternationalCricketStadium',
 'venue_EdenGardens',
 'venue_FerozShahKotla',
 'venue_GreenPark',
 'venue_HimachalPradeshCricketAssociationStadium',
 'venue_HolkarCricketStadium',
 'venue_JSCAInternationalStadiumComplex',
 'venue_Kingsmead',
 'venue_MAChidambaramStadium',
 'venue_MChinnaswamyStadium',
 'venue_MaharajaYadavindraSinghInternationalCricketStadium',
 'venue_MaharashtraCricketAssociationStadium',
 'venue_NarendraModiStadium',
 'venue_NehruStadium',
 'venue_NewWanderersStadium',
 'venue_Newlands',
 'venue_OUTsuranceOval',
 'venue_PunjabCricketAssociationISBindraStadium',
 'venue_PunjabCricketAssociationStadium',
 'venue_RajivGandhiInternationalStadium',


In [39]:


# Cluster players into batters and bowlers
X = df[
    [
        "player_runs",
        "player_wickets",
        "player_fours",
        "player_sixes",
        "player_catches",
        "player_stumpings",
        "player_runouts",
        "bowler_wickets",
        "bowler_economy",
    ]
]
kmeans = KMeans(n_clusters=2, random_state=42)
df["player_type"] = kmeans.fit_predict(X)

# Split the data into training and testing sets
X = df[
    [
        "player_runs",
        "player_wickets",
        "player_fours",
        "player_sixes",
        "player_catches",
        "player_stumpings",
        "player_runouts",
        "bowler_wickets",
        "bowler_economy",
    ]
    + [col for col in df.columns if (col.startswith("team_") and  col.find(" ")!=-1)or col.startswith("venue_")]
]
y = df["fantasy_points"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train the models
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

gb_model = GradientBoostingRegressor(n_estimators=100, random_state=42)
gb_model.fit(X_train, y_train)

nn_model = MLPRegressor(hidden_layer_sizes=(16, 8), random_state=42)
nn_model.fit(X_train, y_train)




In [40]:
list(X.columns)

['player_runs',
 'player_wickets',
 'player_fours',
 'player_sixes',
 'player_catches',
 'player_stumpings',
 'player_runouts',
 'bowler_wickets',
 'bowler_economy',
 'team_1_Chennai Super Kings',
 'team_1_Deccan Chargers',
 'team_1_Delhi Capitals',
 'team_1_Delhi Daredevils',
 'team_1_Gujarat Lions',
 'team_1_Gujarat Titans',
 'team_1_Kings XI Punjab',
 'team_1_Kochi Tuskers Kerala',
 'team_1_Kolkata Knight Riders',
 'team_1_Lucknow Super Giants',
 'team_1_Mumbai Indians',
 'team_1_Pune Warriors',
 'team_1_Punjab Kings',
 'team_1_Rajasthan Royals',
 'team_1_Rising Pune Supergiant',
 'team_1_Rising Pune Supergiants',
 'team_1_Royal Challengers Bangalore',
 'team_1_Royal Challengers Bengaluru',
 'team_1_Sunrisers Hyderabad',
 'team_2_Chennai Super Kings',
 'team_2_Deccan Chargers',
 'team_2_Delhi Capitals',
 'team_2_Delhi Daredevils',
 'team_2_Gujarat Lions',
 'team_2_Gujarat Titans',
 'team_2_Kings XI Punjab',
 'team_2_Kochi Tuskers Kerala',
 'team_2_Kolkata Knight Riders',
 'team_2_Lu

In [None]:
def get_fantsy_players(team, opponent, venue):
    """
    Select the best Dream11 team based on predicted fantasy points for a given match.

    Args:
        team (str): Name of the first team
        opponent (str): Name of the second team
        venue (str): Name of the venue

    Returns:
        list: List of 11 players selected for Dream11 team
    """
    # Get all possible categories from the training data, excluding NaN values
    team1_values = df["team1"].dropna().unique()
    team2_values = df["team2"].dropna().unique()
    all_teams = sorted(list(set(team1_values) | set(team2_values)))
    all_venues = sorted(df["venue"].dropna().unique())

    # Create the input DataFrame with the current match details
    input_data = pd.DataFrame(
        {"team_1": [team], "team_2": [opponent], "venue": [venue]}
    )

    # Create dummy variables matching the training data structure
    # For team_1
    team_1_dummies = pd.DataFrame(
        0, index=input_data.index, columns=[f"team_1_{t}" for t in all_teams]
    )
    team_1_dummies[f"team_1_{team}"] = 1

    # For team_2
    team_2_dummies = pd.DataFrame(
        0, index=input_data.index, columns=[f"team_2_{t}" for t in all_teams]
    )
    team_2_dummies[f"team_2_{opponent}"] = 1

    # For venue
    venue_dummies = pd.DataFrame(
        0, index=input_data.index, columns=[f"venue_{v}" for v in all_venues]
    )
    venue_dummies[f"venue_{venue}"] = 1

    # Combine all dummy variables
    input_data = pd.concat(
        [input_data, team_1_dummies, team_2_dummies, venue_dummies], axis=1
    )

    # Get the players for both teams with error handling
    try:
        team_1_players = (
            team_players.loc[
                (team_players["team_1"] == team) & (team_players["team_2"] == opponent)
            ]
            .tail(1)["team_1_players"]
            .iloc[0]
        )
        
        team_2_players = (
            team_players.loc[
                (team_players["team_1"] == team) & (team_players["team_2"] == opponent)
            ]
            .tail(1)["team_2_players"]
            .iloc[0]
        )
    except (IndexError, KeyError):
        raise ValueError(
            f"No player data found for match between {team} and {opponent}"
        )

    all_players = np.concatenate([team_1_players, team_2_players])

    selection_features = {
        "player_runs": "mean",
        "player_wickets": "mean",
        "player_fours": "mean",
        "player_sixes": "mean",
        "player_catches": "mean",
        "player_stumpings": "mean",
        "player_runouts": "mean",
        "bowler_wickets": "mean",
        "bowler_economy": "mean",
        "player_type": "first",
    }
    for team in (all_teams):
        selection_features["team_1_"+team] = "min"
    for team in (all_teams):
        selection_features['team_2_'+team] = "min"
    for venue in (all_venues):
        selection_features['venue_'+venue] = "min"
    # Get player statistics
    player_stats = (
        df[df["batter"].isin(all_players)]
        .groupby("batter")
        .agg(
            selection_features
        )
        .reset_index()
    )
    player_stats["team_"+team] = 1
    player_stats["venue_"+venue] = 1
    # Handle case where no player statistics are found
    if player_stats.empty:
        raise ValueError("No player statistics found for the selected teams")

    # Add team and venue dummy columns from input_data
    dummy_cols = [
        col for col in input_data.columns if col.startswith(("team_", "venue_"))
    ]
    for col in dummy_cols:
        player_stats[col] = input_data[col].iloc[0]

    # Ensure features are in the same order as during training
    prediction_features = player_stats[
        [
            "player_runs",
            "player_wickets",
            "player_fours",
            "player_sixes",
            "player_catches",
            "player_stumpings",
            "player_runouts",
            "bowler_wickets",
            "bowler_economy",
        ]
        + 
            [col for col in player_stats.columns if col.startswith(("team_", "venue_"))]
        
    ]
    print(prediction_features.columns)
    try:
        prediction_features.drop(
            columns=["team_1", "team_2", "team_Sunrisers Hyderabad"], inplace=True
        )
    except Exception:
        print("Exception")

    # Make predictions using all models
    try:
        player_stats["rf_predicted_points"] = rf_model.predict(prediction_features)
        player_stats["gb_predicted_points"] = gb_model.predict(prediction_features)
        player_stats["nn_predicted_points"] = nn_model.predict(prediction_features)
    except ValueError as e:
        raise ValueError(f"Error making predictions: {str(e)}")

    # Calculate final predicted points
    player_stats["final_predicted_points"] = (
        player_stats["rf_predicted_points"]
        + player_stats["gb_predicted_points"]
        + player_stats["nn_predicted_points"]
    ) / 3

    # Sort players by predicted points
    player_stats = player_stats.sort_values("final_predicted_points", ascending=False)

    # Select players based on their roles
    batsmen = player_stats[player_stats["player_type"] == 0].head(8)["batter"].tolist()
    bowlers = player_stats[player_stats["player_type"] == 1].head(6)["batter"].tolist()

    # Select wicketkeeper (player with high runs and catching ability)
    keepers = (
        player_stats[
            (player_stats["player_type"] == 0)
            & (player_stats["player_catches"] > player_stats["player_catches"].mean())
        ]
        .head(2)["batter"]
        .tolist()
    )

    # Select all-rounders (players with both batting and bowling abilities)
    remaining_players = player_stats[
        ~player_stats["batter"].isin(batsmen + bowlers + keepers)
    ]
    all_rounders = (
        remaining_players[
            (remaining_players["player_runs"] > remaining_players["player_runs"].mean())
            & (
                remaining_players["bowler_wickets"]
                > remaining_players["bowler_wickets"].mean()
            )
        ]
        .head(6)["batter"]
        .tolist()
    )

    # Combine all players for final team
    dream11_team = batsmen + keepers + all_rounders + bowlers
    final_team = []
    # Ensure we have exactly 11 players
    print(team_1_players)
    for player in dream11_team:
        if (player in team_1_players) and (player not in final_team):
            final_team.append(player)
    if len(dream11_team) < 11:
        remaining = (
            remaining_players[~remaining_players["batter"].isin(dream11_team)]
            .head(11 - len(dream11_team))["batter"]
            .tolist()
        )

        for player in remaining:
            if (player in team_1_players) and (player not in final_team):
                final_team.append(player)

    return final_team[:11]

In [105]:
team_players

Unnamed: 0,team_1,team_2,date,team_1_players,team_2_players
0,Chennai Super Kings,Deccan Chargers,2008-05-06,"[PA Patel, S Anirudha, SP Fleming, SK Raina, S...","[PA Patel, S Anirudha, SP Fleming, SK Raina, S..."
1,Chennai Super Kings,Deccan Chargers,2009-04-27,"[PA Patel, SK Raina, ML Hayden, MS Dhoni, JDP ...","[PA Patel, SK Raina, ML Hayden, MS Dhoni, JDP ..."
2,Chennai Super Kings,Deccan Chargers,2009-05-04,"[M Vijay, ML Hayden, MS Dhoni, SK Raina, JA Mo...","[M Vijay, ML Hayden, MS Dhoni, SK Raina, JA Mo..."
3,Chennai Super Kings,Deccan Chargers,2010-04-10,"[M Vijay, ML Hayden, SK Raina, MS Dhoni, MEK H...","[M Vijay, ML Hayden, SK Raina, MS Dhoni, MEK H..."
4,Chennai Super Kings,Deccan Chargers,2010-04-22,"[M Vijay, ML Hayden, S Badrinath, SK Raina, MS...","[M Vijay, ML Hayden, S Badrinath, SK Raina, MS..."
...,...,...,...,...,...
1075,Sunrisers Hyderabad,Royal Challengers Bangalore,2019-03-31,"[JM Bairstow, DA Warner, V Shankar, YK Pathan,...","[JM Bairstow, DA Warner, V Shankar, YK Pathan,..."
1076,Sunrisers Hyderabad,Royal Challengers Bangalore,2019-05-04,"[WP Saha, MJ Guptill, MK Pandey, KS Williamson...","[WP Saha, MJ Guptill, MK Pandey, KS Williamson..."
1077,Sunrisers Hyderabad,Royal Challengers Bangalore,2021-10-06,"[JJ Roy, Abhishek Sharma, KS Williamson, PK Ga...","[JJ Roy, Abhishek Sharma, KS Williamson, PK Ga..."
1078,Sunrisers Hyderabad,Royal Challengers Bangalore,2023-05-18,"[Abhishek Sharma, RA Tripathi, AK Markram, H K...","[Abhishek Sharma, RA Tripathi, AK Markram, H K..."


In [77]:
for item in team_players.loc[team_players["team_1"] == "Mumbai Indians", "team_1_players"].tail(1):
    print(item)

['RG Sharma' 'Ishan Kishan' 'C Green' 'SA Yadav' 'Tilak Varma' 'TH David'
 'HC Brook' 'MA Agarwal' 'RA Tripathi' 'AK Markram' 'Abhishek Sharma'
 'H Klaasen' 'Abdul Samad' 'M Jansen' 'Washington Sundar' 'B Kumar'
 'M Markande']


In [18]:
team_players = df.groupby(["over","date"])["batter"].unique().reset_index()

In [19]:
team_players

Unnamed: 0,over,date,batter
0,0,2008-04-18,"[SC Ganguly, BB McCullum, R Dravid, W Jaffer]"
1,0,2008-04-19,"[PA Patel, ML Hayden, K Goel, JR Hopes, T Kohl..."
2,0,2008-04-20,"[AC Gilchrist, Y Venugopal Rao, WP Saha, BB Mc..."
3,0,2008-04-21,"[K Goel, JR Hopes, M Kaif]"
4,0,2008-04-22,"[AC Gilchrist, Y Venugopal Rao, G Gambhir, V S..."
...,...,...,...
16363,19,2024-05-18,"[C Green, GJ Maxwell, MK Lomror, MS Dhoni, SN ..."
16364,19,2024-05-19,"[JM Sharma, Shivam Singh, Sanvir Singh]"
16365,19,2024-05-21,"[V Viyaskanth, PJ Cummins]"
16366,19,2024-05-22,"[Swapnil Singh, KV Sharma]"


In [None]:
get_fantsy_players("Mumbai Indians","Chennai Super Kings","MAChidambaramStadium")

Index(['player_runs', 'player_wickets', 'player_fours', 'player_sixes',
       'player_catches', 'player_stumpings', 'player_runouts',
       'bowler_wickets', 'bowler_economy', 'team_1_Chennai Super Kings',
       'team_1_Deccan Chargers', 'team_1_Delhi Capitals',
       'team_1_Delhi Daredevils', 'team_1_Gujarat Lions',
       'team_1_Gujarat Titans', 'team_1_Kings XI Punjab',
       'team_1_Kochi Tuskers Kerala', 'team_1_Kolkata Knight Riders',
       'team_1_Lucknow Super Giants', 'team_1_Mumbai Indians',
       'team_1_Pune Warriors', 'team_1_Punjab Kings',
       'team_1_Rajasthan Royals', 'team_1_Rising Pune Supergiant',
       'team_1_Rising Pune Supergiants', 'team_1_Royal Challengers Bangalore',
       'team_1_Royal Challengers Bengaluru', 'team_1_Sunrisers Hyderabad',
       'team_2_Chennai Super Kings', 'team_2_Deccan Chargers',
       'team_2_Delhi Capitals', 'team_2_Delhi Daredevils',
       'team_2_Gujarat Lions', 'team_2_Gujarat Titans',
       'team_2_Kings XI Punjab',

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prediction_features.drop(


['JC Archer',
 'TH David',
 'Arshad Khan',
 'AM Rahane',
 'SA Yadav',
 'T Stubbs',
 'AT Rayudu',
 'PP Chawla',
 'RG Sharma']