In [1]:
import joblib
import warnings
import numpy as np
import pandas as pd
from utils import prizepicks_lines, database_connection_and_cursor
warnings.filterwarnings("ignore")

In [2]:
def total_maps(cs_data, map_df, map_three):
    stats = ["kills", "headshots", "assists", "deaths", "kast", "adr", "rating"]
    if map_three:
        for stat_column in stats:
            idx = cs_data.columns.get_indexer([stat_column])[0]
            stat_value = map_df[f"{stat_column}_map_1"] + map_df[f"{stat_column}_map_2"] + map_df[f"{stat_column}_map_3"]
            if stat_column in ["kast", "k_d_diff", "adr", "fk_diff", "rating"]:
                map_df.insert(loc=idx, column=stat_column, value=stat_value/3)
            else:
                map_df.insert(loc=idx, column=stat_column, value=stat_value)
        drop_cols = [f"{stat_column}_map_1" for stat_column in stats] + [f"{stat_column}_map_2" for stat_column in stats] + [f"{stat_column}_map_3" for stat_column in stats]
        map_df.drop(columns=drop_cols, inplace=True)
        map_df["map_number"] = "MAPS 1-3"
        return map_df
    else:
        for stat_column in stats:
            idx = cs_data.columns.get_indexer([stat_column])[0]
            stat_value = map_df[f"{stat_column}_map_1"] + map_df[f"{stat_column}_map_2"]
            if stat_column in ["kast", "adr", "rating"]:
                map_df.insert(loc=idx, column=stat_column, value=stat_value/2)
            else:
                map_df.insert(loc=idx, column=stat_column, value=stat_value)
        drop_cols = [f"{stat_column}_map_1" for stat_column in stats] + [f"{stat_column}_map_2" for stat_column in stats]
        map_df.drop(columns=drop_cols, inplace=True)
        map_df["map_number"] = "MAPS 1-2"
        return map_df

In [3]:
# Connect to the AWS database
connection, cursor = database_connection_and_cursor()
cursor.execute(
    """SELECT * FROM hltv_cs"""
)
cs_data = cursor.fetchall()
cols = [col[0] for col in cursor.description]
cs_data = pd.DataFrame(cs_data, columns=cols)

# Convert To Float Values
cs_data[["kast", "adr", "rating"]] = cs_data[["kast", "adr", "rating"]].astype("float")
cs_data["date"] = pd.to_datetime(cs_data["date"])

cs_data = cs_data.iloc[:, :].query("(map != 'Best of 3') and (map != 'Best of 2') and (map != 'All') and (map != 'Cache')").dropna().reset_index(drop=True)

cs_data.drop(columns=["k_d_diff", "fk_diff", "event", "date", "map", "team", "opponent", "player_name", "team_score", "opponent_score"], inplace=True)

cs_data = cs_data.groupby(by=["match_url"]).filter(lambda group: set(group["map_number"]).issubset({1, 2, 3}))

cs_data.reset_index(drop=True, inplace=True)

map_1 = cs_data[cs_data["map_number"] == 1]
map_2 = cs_data[cs_data["map_number"] == 2]
map_3 = cs_data[cs_data["map_number"] == 3]

target_cols = ["match_url", "player_url", "kills", "headshots", "assists", "deaths", "kast", "adr", "rating"]

map_1_2= pd.merge(
    left=map_1,
    right=map_2[target_cols],
    suffixes = ("_map_1", "_map_2"),
    on=["match_url", "player_url"],
)

map_1_2_3 = map_1_2.merge(
    right=map_3[target_cols],
    on=["match_url", "player_url"],
).rename(columns={"kills": "kills_map_3", "headshots": "headshots_map_3", "assists": "assists_map_3", 
                "deaths": "deaths_map_3", "kast": "kast_map_3", "adr": "adr_map_3", "rating": "rating_map_3"})

map_1_2 = total_maps(cs_data=cs_data, map_df=map_1_2, map_three=False)
map_1_2_3 = total_maps(cs_data=cs_data, map_df=map_1_2_3, map_three=True)

map_1["map_number"] = "MAPS 1"
map_3["map_number"] = "MAPS 3"

cs_data = pd.concat(
    [
        map_1,
        map_3,
        map_1_2,
        map_1_2_3
    ],
    ignore_index=True
)

[STATUS]: SUCCESSFULLY CREATED DB CONNECTION


In [4]:
# Teams Encoded
cursor.execute("SELECT * FROM teams_encoded")
team_mapper = cursor.fetchall()
teams_encoded_df = pd.DataFrame(team_mapper, columns=[col[0] for col in cursor.description]).set_index(["hltv_url", "map_number"])
team_mapper = teams_encoded_df.to_dict()["std"]

# Players Encoded
cursor.execute("SELECT * FROM players_encoded")
player_mapper = cursor.fetchall()
players_encoded_df = pd.DataFrame(player_mapper, columns=[col[0] for col in cursor.description]).set_index(["hltv_url", "map_number"])
player_mapper = players_encoded_df.to_dict()["std"]

In [5]:
# Player Map
cursor.execute("SELECT * FROM player_map")
data = cursor.fetchall()
player_hltv_df = pd.DataFrame(data, columns=[col[0] for col in cursor.description])
player_hltv_df.dropna(inplace=True)
player_hltv_df["player_id"] = player_hltv_df["player_id"].astype("int")
player_hltv_df.set_index("player_id", inplace=True)

# Team Map
cursor.execute("SELECT * FROM team_map")
data = cursor.fetchall()
team_hltv_df = pd.DataFrame(data, columns=[col[0] for col in cursor.description])
team_hltv_df.dropna(inplace=True)
team_hltv_df["team_id"] = team_hltv_df["team_id"].astype("int")
team_hltv_df.set_index("team_id", inplace=True)

In [6]:
# Load the new model
model = joblib.load(fr"C:\Users\kazir\Desktop\Projects\csgo_ml_project\notebooks\xgr_model_v3.joblib")

In [7]:
WEIGHTS = np.array([0.25, 0.20, 0.15, 0.125, 0.115, 0.10, 0.05, 0.01])
WEIGHT_COLS = ["kills", "headshots", "assists", "deaths", "kast", "adr", "rating"]

def make_predictions(team_id, player_id, opponent_team, map_type):
    model_inputs = {
    "wma_kills": None, "wma_headshots": None, "wma_assists": None, "wma_deaths": None, "wma_kast": None, "wma_adr": None,
    "wma_rating": None, "maps_1": 0, "maps_1_2": 0, "maps_1_3": 0, "maps_3": 0, "player_team_enc": None, "opponent_team_enc": None, "player_enc": None
    }
    TEAM_ID = int(team_id)
    PLAYER_ID = int(player_id)
    OPP_TEAM = opponent_team
    MAP_TYPE = map_type

    if ("(Combo)" in map_type) | ("First" in map_type) | ("AWP" in map_type):
        return 0

    kills_or_headshots = MAP_TYPE.split()[-1].lower()
    
    map_number = " ".join(MAP_TYPE.split()[:2])
    map_number_formatted = map_number.lower().replace(" ", "_").replace("-", "_")

    team_url = team_hltv_df.loc[TEAM_ID]["hltv_url"]
    opp_url = team_hltv_df[team_hltv_df["player_team"] == OPP_TEAM]
    # If opponent cannot be found
    if len(opp_url) == 0:
        print(f"[ML MODEL ERROR]: OPPONENT NOT FOUND ({opponent_team})")
        return 0
    
    opp_url = opp_url["hltv_url"].iloc[0]
    player_url = player_hltv_df.loc[PLAYER_ID]["hltv_url"]

    # ID ERRORS
    try:
        player_team_id = team_mapper.get((team_url, map_number))
    except TypeError:
        print(f"[TEAM ID ERROR]: {TEAM_ID}")
        return 0
    try:
        opp_team_id = team_mapper.get((opp_url, map_number))
    except TypeError:
        print(f"[OPPONENT NAME ERROR]: {OPP_TEAM}")
        return 0
    
    try:
        player_id = player_mapper.get((player_url, map_number))
    except TypeError:
        print(f"[PLAYER ID ERROR]: {PLAYER_ID}")
        return 0

    model_inputs["player_team_enc"] = player_team_id
    model_inputs["opponent_team_enc"] = opp_team_id
    model_inputs["player_enc"] = player_id
    model_inputs[map_number_formatted] = 1

    player_data_df = cs_data[
    (cs_data["map_number"] == map_number) &
    (cs_data["player_url"] == player_url)
    ]
    if len(player_data_df) < 8:
        print(f"[ML MODEL ERROR]: NOT ENOUGH DATA ({player_url} | {map_number})")
        return 0

    dot_product = player_data_df[WEIGHT_COLS].tail(8).apply(lambda group: np.dot(WEIGHTS[::-1], group), raw=True)
    kills, headshots, assists, deaths, kast, adr, rating = dot_product
    model_inputs["wma_kills"] = kills
    model_inputs["wma_headshots"] = headshots
    model_inputs["wma_assists"] = assists
    model_inputs["wma_deaths"] = deaths
    model_inputs["wma_kast"] = kast
    model_inputs["wma_adr"] = adr
    model_inputs["wma_rating"] = rating

    x = [np.array(list(model_inputs.values()))]
    prediction = model.predict(x)[0]
    if kills_or_headshots == "headshots":
        return prediction[1]
    else:
        return prediction[0]

In [8]:
prediction_df = pd.DataFrame(prizepicks_lines())

In [9]:
# Predictions
prediction_df["Model Prediction"] = prediction_df.apply(lambda row: make_predictions(team_id=row["Team ID"], player_id=row["Player ID"], opponent_team=row["Opp"], map_type=row["Type"]), axis=1)
prediction_df = prediction_df[prediction_df["Model Prediction"] != 0].drop(columns=["ID", "Player ID", "Team ID"])
prediction_df.reset_index(drop=True)

# Add difference and sort by teams
prediction_df.sort_values(by="Team", inplace=True)
prediction_df["Diff (+/-)"] = prediction_df["Model Prediction"] - prediction_df["Line Score"]
prediction_df["Model O/U"] = np.where(prediction_df["Line Score"] < prediction_df["Model Prediction"], "Over", "Under")
prediction_df["Model Prediction"] = prediction_df["Model Prediction"].round(3)

# Save to a CSV
prediction_df.to_csv("predictions.csv", index=False)

# Display
prediction_df.reset_index(drop=True)

[ML MODEL ERROR]: NOT ENOUGH DATA (https://www.hltv.org/stats/players/24111/junyme | MAPS 1-2)
[ML MODEL ERROR]: NOT ENOUGH DATA (https://www.hltv.org/stats/players/20133/svedjehed | MAPS 1-2)
[ML MODEL ERROR]: NOT ENOUGH DATA (https://www.hltv.org/stats/players/24672/fakerealityy | MAPS 1-2)
[ML MODEL ERROR]: NOT ENOUGH DATA (https://www.hltv.org/stats/players/24111/junyme | MAPS 1-2)
[ML MODEL ERROR]: NOT ENOUGH DATA (https://www.hltv.org/stats/players/20133/svedjehed | MAPS 1-2)
[ML MODEL ERROR]: NOT ENOUGH DATA (https://www.hltv.org/stats/players/24672/fakerealityy | MAPS 1-2)
[ML MODEL ERROR]: OPPONENT NOT FOUND (Rhyno)
[ML MODEL ERROR]: OPPONENT NOT FOUND (Rhyno)
[ML MODEL ERROR]: OPPONENT NOT FOUND (Rhyno)
[ML MODEL ERROR]: OPPONENT NOT FOUND (Rhyno)
[ML MODEL ERROR]: OPPONENT NOT FOUND (Rhyno)
[ML MODEL ERROR]: OPPONENT NOT FOUND (Rhyno)
[ML MODEL ERROR]: OPPONENT NOT FOUND (Rhyno)
[ML MODEL ERROR]: OPPONENT NOT FOUND (Rhyno)
[ML MODEL ERROR]: OPPONENT NOT FOUND (Rhyno)
[ML MOD

Unnamed: 0,Game Date,Game Time,Type,Name,Team,Opp,Line Score,Model Prediction,Diff (+/-),Model O/U
0,2025-02-27,05:00:00,MAPS 1-2 Headshots,shalfey,9 Pandas,Monte,15.0,15.035,0.035118,Over
1,2025-02-27,05:00:00,MAPS 1-2 Headshots,d1Ledez,9 Pandas,Monte,20.0,19.258,-0.742447,Under
2,2025-02-27,05:00:00,MAPS 1-2 Kills,r3salt,9 Pandas,Monte,31.5,28.460,-3.040287,Under
3,2025-02-27,05:00:00,MAPS 1-2 Headshots,r3salt,9 Pandas,Monte,19.5,15.436,-4.063755,Under
4,2025-02-27,05:00:00,MAPS 1-2 Headshots,Alv,9 Pandas,Monte,12.0,12.683,0.682826,Over
...,...,...,...,...,...,...,...,...,...,...
197,2025-02-27,07:00:00,MAPS 1-2 Headshots,cptkurtka023,kONO,Leo,18.0,17.093,-0.906992,Under
198,2025-02-27,07:00:00,MAPS 1-2 Kills,zogeN,kONO,Leo,26.0,26.995,0.994940,Over
199,2025-02-27,07:00:00,MAPS 1-2 Kills,s4ltovsk1yy,kONO,Leo,32.5,34.475,1.974621,Over
200,2025-02-27,07:00:00,MAPS 1-2 Kills,cptkurtka023,kONO,Leo,31.5,32.525,1.024601,Over
