In [1]:
import joblib
import sys
import os
import warnings
warnings.filterwarnings("ignore")
project_root = os.path.abspath(os.path.join("..", "scraper"))
sys.path.insert(0, project_root)
from utils import *

In [2]:
# Connect to the AWS database
connection, cursor = database_connection_and_cursor("CSGO")
cursor.execute(
    """SELECT * FROM hltv_stats"""
)
cs_data = cursor.fetchall()

cols = [
    'ID', 'Event', 'Date', 'Map', 'Map Number', 'Team', 'Name',
    'Kills', 'Headshots', 'Assists', 'Deaths', 'Kast', 'K-D Diff',
    'ADR', 'FK Diff', 'Rating', 'Team Score', 'Opponent Score',
    'Teammate 1', 'Teammate 2', 'Teammate 3', 'Teammate 4', 'Opponent 1',
    'Opponent 2', 'Opponent 3', 'Opponent 4', 'Opponent 5', 'Teammate 5',
    'Opponent 6', 'Opponent 7', 'Teammate 6'
]
cs_data = pd.DataFrame(cs_data, columns=cols)

# Convert object statistic columns to float values
cs_data["Kast"] = cs_data["Kast"].astype("float")
cs_data["ADR"] = cs_data["ADR"].astype("float")
cs_data["Rating"] = cs_data["Rating"].astype("float")

# Unique teams and players
team_df = cs_data[["Date", "Name", "Team"]].drop_duplicates()
team_df.reset_index(drop=True, inplace=True)

# Concat the data
cs_data = \
cs_data.merge(
    right=team_df.add_prefix("Opponent "),
    left_on=["Date", "Opponent 1"],
    right_on=["Opponent Date", "Opponent Name"],
    how="left",
).drop(columns=["Opponent Date", "Opponent Name"])

# Opponents
opponents = cs_data.pop("Opponent Team")

# Append back to the dataframe
cs_data.insert(5, "Opponent Team", opponents)

cs_stats = cs_data.iloc[:, :].query("(Map!='Best of 3') and (Map!='Best of 2') and (Map!= 'All')").reset_index(drop=True)

# Drop the columns
cs_stats_f = cs_stats.drop(columns=["ID", "K-D Diff", "FK Diff", "Team Score", "Opponent Score"])

# Filter the dataset to only MAPS 1, 2, and 3
cs_stats_f = cs_stats_f.groupby(by=["Event", "Date", "Team", "Opponent Team"]).filter(lambda group: set(group["Map Number"]).issubset({"1", "2", "3"}))
cs_stats_f.reset_index(drop=True, inplace=True)

In [3]:
def total_maps_statistics(df: pd.DataFrame, three_maps: bool=False) -> pd.DataFrame:
    """
    Totals up the statistics on the dataframe and returns a new dataframe

    Params:
        df (pd.DataFrame): The dataframe
        three_maps (bool): If it is three maps

    Returns:
        df (pd.DataFrame): New dataframe
    """
    df = df.copy()
    # Total up the numbers
    stat_columns = ["Kills", "Headshots", "Assists", "Deaths", "Kast", "ADR", "Rating"]
    # If statistics of three maps should be sum
    if three_maps:
        for stat_column in stat_columns:
            idx = cs_stats_f.columns.get_indexer([stat_column])[0]
            if stat_column in ["Kast", "ADR", "Rating"]:
                df.insert(loc=idx, column=stat_column, value=(df[f"{stat_column} Map 1"] + df[f"{stat_column} Map 2"] + df[f"{stat_column} Map 3"])/3)
            else:
                df.insert(loc=idx, column=stat_column, value=df[f"{stat_column} Map 1"] + df[f"{stat_column} Map 2"] + df[f"{stat_column} Map 3"])
        drop_cols = [f"{stat_column} Map 1" for stat_column in stat_columns] + [f"{stat_column} Map 2" for stat_column in stat_columns] + [f"{stat_column} Map 3" for stat_column in stat_columns]
        df.drop(columns=drop_cols, inplace=True)
        df["Map Number"] = "MAPS 1-3"
        return df
    # Only two maps
    else:
        for stat_column in stat_columns:
            idx = cs_stats_f.columns.get_indexer([stat_column])[0]
            if stat_column in ["Kast", "ADR", "Rating"]:
                df.insert(loc=idx, column=stat_column, value=(df[f"{stat_column} Map 1"] + df[f"{stat_column} Map 2"])/2)
            else:
                df.insert(loc=idx, column=stat_column, value=df[f"{stat_column} Map 1"] + df[f"{stat_column} Map 2"])
        drop_cols = [f"{stat_column} Map 1" for stat_column in stat_columns] + [f"{stat_column} Map 2" for stat_column in stat_columns]
        df.drop(columns=drop_cols, inplace=True)
        df["Map Number"] = "MAPS 1-2"
        return df

In [4]:
# Seperate the Maps to make it easier merge
map_1 = cs_stats_f[cs_stats_f["Map Number"] == "1"]
map_2 = cs_stats_f[cs_stats_f["Map Number"] == "2"]
map_3 = cs_stats_f[cs_stats_f["Map Number"] == "3"]

# Merge the dataframes to calculate for Maps 1-2
map_1_and_2 = pd.merge(
    left=map_1,
    right=map_2[
        ["Event", "Date", "Name", "Team", "Opponent Team", "Kills", "Headshots", "Assists", "Deaths", "Kast", "ADR", "Rating"]
        ],
    suffixes = (" Map 1", " Map 2"),
    on=["Event", "Date", "Name", "Team", "Opponent Team"],
)

# Merge the dataframe to calculate for Maps 1-3
map_1_and_2_and_3 = map_1_and_2.merge(
    right=map_3[
        ["Event", "Date", "Name", "Team", "Opponent Team", "Kills", "Headshots", "Assists", "Deaths", "Kast", "ADR", "Rating"]
    ],
    on=["Event", "Date", "Name", "Team", "Opponent Team"],
).rename(columns={
    "Kills": "Kills Map 3",
    "Headshots": "Headshots Map 3",
    "Assists": "Assists Map 3",
    "Deaths": "Deaths Map 3",
    "Kast": "Kast Map 3",
    "ADR": "ADR Map 3",
    "Rating": "Rating Map 3"
    })

# Apply the function
map_1_and_2 = total_maps_statistics(df=map_1_and_2)
map_1_and_2_and_3 = total_maps_statistics(df=map_1_and_2_and_3, three_maps=True)

# Change the Map Number column
map_1["Map Number"] = "MAPS 1"
map_3["Map Number"] = "MAPS 3"

# Exclude Map 2
cs_maps_and_stats = pd.concat(
    [
        map_1,
        map_3,
        map_1_and_2,
        map_1_and_2_and_3
    ],
    ignore_index=True
)

# Weighted moving average (weights)
weights = np.array([0.25, 0.20, 0.15, 0.125, 0.115, 0.10, 0.05, 0.01])

# Window size
n = 8

# Columns to apply weighted moving averages
weight_cols = ["Kills", "Headshots", "Assists", "Deaths", "Kast", "ADR", "Rating"]

# Encode the players by using their mean rating
player_ratings = cs_data.groupby("Name")["Rating"].agg(['mean', 'std'])

# Make dictionaries to encode names and map back to the name
player_mean_rating_dict = player_ratings['mean'].to_dict()
player_mean_rating_dict_reversed = {v: k for k, v in player_mean_rating_dict.items()}

# Encode the teams
team_ratings = cs_data.groupby(["Team"])["Rating"].agg(['mean', 'median', 'std'])

# Make dictionaries to encode teams and map back
team_std_rating_dict = team_ratings["std"].to_dict()
team_std_rating_dict_reversed = {v: k for k, v in team_std_rating_dict.items()}

In [None]:
model = joblib.load("xgr_model_v2.joblib")

def model_projections(name: str, team: str, opp_team: str, map_type: str):
    model_inputs = {
    "WMA Kills": 0,
    "WMA Headshots": 0,
    "WMA Assists": 0,
    "WMA Deaths": 0,
    "WMA Kast": 0,
    "WMA ADR": 0,
    "WMA Rating": 0,
    "Map Number MAPS 1": 0,
    "Map Number MAPS 1-2": 0,
    "Map Number MAPS 1-3": 0,
    "Map Number MAPS 3": 0,
    "Team": 0,
    "Opponent Team": 0,
    "Name": ""
    }
    if "(Combo)" in map_type:
        return 0
    stat_type = map_type.split()[-1]
    # Fix the naming convention
    map_name = map_type.replace("MAPS", "MAP").replace("MAP", "MAPS")
    map_type = " ".join(map_name.split()[:2])
    df = cs_maps_and_stats[(cs_maps_and_stats["Name"] == name) & (cs_maps_and_stats["Team"] == team) & (cs_maps_and_stats["Map Number"] == map_type)]
    df_lower = cs_maps_and_stats[(cs_maps_and_stats["Name"] == name.lower()) & (cs_maps_and_stats["Team"] == team) & (cs_maps_and_stats["Map Number"] == map_type)]
    if (len(df) <= 7) & (len(df_lower) <= 7):
        return 0
    elif len(df_lower) >= 8:
        df = df_lower
        name = name.lower()
    else:
        df = df
    dot_product = df[weight_cols].tail(n).apply(lambda group: np.dot(weights[::-1], group), raw=True)
    for col in dot_product.keys():
        column = f"WMA {col}"
        model_inputs[column] = dot_product.loc[col]
    model_inputs[f"Map Number {map_type}"] = 1
    # If the data cannot be mapped using the dictionaries
    try:
        model_inputs["Name"] = player_mean_rating_dict[name]
        model_inputs["Team"] = team_std_rating_dict[team]
        model_inputs["Opponent Team"] = team_std_rating_dict[opp_team]
    except KeyError:
        return 0
    inp = [np.array(list(model_inputs.values()))]
    prediction = model.predict(inp)[0]
    if stat_type == "Headshots":
        return prediction[1]
    return prediction[0]

In [6]:
# Predictions
prediction_df = pd.DataFrame(prizepicks_lines())
prediction_df["Model Prediction"] = prediction_df.apply(lambda col: model_projections(col["Name"], col["Team"], col["Opp"], col["Type"]), axis=1)
prediction_df = prediction_df[prediction_df["Model Prediction"] != 0].drop(columns="ID")
prediction_df.reset_index(drop=True)

# Add difference and sort by teams
prediction_df.sort_values(by="Team", inplace=True)
prediction_df["Diff (+/-)"] = prediction_df["Model Prediction"] - prediction_df["Line Score"]
prediction_df["Over/Under"] = np.where(prediction_df["Line Score"] < prediction_df["Model Prediction"], "Over", "Under")

# Save to a CSV
prediction_df.to_csv("predictions.csv", index=False)

# Display
prediction_df.reset_index(drop=True)

Unnamed: 0,Game Date,Game Time,Type,Name,Team,Opp,Line Score,Model Prediction,Diff (+/-),Over/Under
0,2024-11-11,04:00:00,MAPS 1-3 Kills,r3salt,9 Pandas,ECLOT,47.5,44.877697,-2.622303,Under
1,2024-11-11,04:00:00,MAPS 1-3 Headshots,glowiing,9 Pandas,ECLOT,27.5,25.199430,-2.300570,Under
2,2024-11-11,04:00:00,MAPS 1-3 Headshots,d1Ledez,9 Pandas,ECLOT,29.0,28.895821,-0.104179,Under
3,2024-11-11,04:00:00,MAPS 1-3 Headshots,shalfey,9 Pandas,ECLOT,22.5,21.870558,-0.629442,Under
4,2024-11-11,04:00:00,MAPS 1-3 Headshots,r3salt,9 Pandas,ECLOT,28.5,26.425379,-2.074621,Under
...,...,...,...,...,...,...,...,...,...,...
244,2024-11-12,00:00:00,MAP 1 Kills,kauez,paiN,Nouns,14.0,14.662120,0.662120,Over
245,2024-11-12,00:00:00,MAP 1 Kills,nqz,paiN,Nouns,15.0,16.076330,1.076330,Over
246,2024-11-12,00:00:00,MAP 1 Kills,biguzera,paiN,Nouns,14.0,15.218978,1.218978,Over
247,2024-11-12,00:00:00,MAP 1 Kills,lux,paiN,Nouns,16.0,16.905682,0.905682,Over
