In [1]:
import joblib
import sys
import os
import io
import warnings
warnings.filterwarnings("ignore")
project_root = os.path.abspath(os.path.join("..", "scraper"))
sys.path.insert(0, project_root)
from thefuzz import fuzz
from thefuzz import process
import numpy as np
import pandas as pd
import boto3
from utils import prizepicks_lines

In [2]:
def read_csv_from_s3(bucket_name: str, object_key: str) -> pd.DataFrame:
    """
    Reads a CSV file from an S3 bucket and loads it into a pandas DataFrame.

    Params:
        bucket_name (str): Name of the S3 bucket.
        object_key (str): Key of the CSV file in the S3 bucket.

    Returns:
        pd.DataFrame: DataFrame loaded from the CSV file in S3.
    """
    # Initialize the S3 client
    client = boto3.client("s3")
    
    # Fetch the object from the bucket
    response = client.get_object(Bucket=bucket_name, Key=object_key)
    
    # Read the object content into a pandas DataFrame
    df = pd.read_csv(io.StringIO(response['Body'].read().decode('utf-8')), index_col=[0])
    
    return df

In [3]:
# Load the CSV data into a DataFrame
cs_data = read_csv_from_s3("csgobucket1", "cs_data_processed.csv")
cs_data["Date"] = pd.to_datetime(cs_data["Date"])

csgo_teams = set(list(cs_data["Team"]))
csgo_players = set(list(cs_data["Name"]))

# Weighted moving average
weights = np.array([0.25, 0.20, 0.15, 0.125, 0.115, 0.10, 0.05, 0.01])

# Columns to apply weighted moving averages
weight_cols = ["Kills", "Headshots", "Assists", "Deaths", "Kast", "ADR", "Rating"]
# Encode the players by using their mean rating
player_ratings = cs_data.groupby("Name")["Rating"].agg(['mean', 'std'])
# Make dictionaries to encode names and map back to the name
player_rating_dict = player_ratings['mean'].to_dict()
# Encode the teams
team_ratings = cs_data.groupby(["Team"])["Rating"].agg(['mean', 'median', 'std'])
# Make dictionaries to encode teams and map back and also make the team names uppercase because of case sensitivity
team_rating_dict = team_ratings["std"].to_dict()

In [4]:
def normalize_team_name(team_name: str) -> str:
    """
    Removes extra words from team name so the mapping can be more accurate and it returns that best matched name
    """
    stop_words = ["esports", "esport", "teams", "team", "clan", "fe"]
    team_name = team_name.lower().split(" ")
    norm_team_name = " ".join([word for word in team_name if word not in stop_words])
    best_name, score = process.extractBests(query=norm_team_name, choices=csgo_teams, scorer=fuzz.token_sort_ratio, limit=1)[0]
    if score == 100:
        return best_name
    return None

def normalize_player_name(player_name: str) -> str:
    """
    Matches the player names from other sources with the name on HLTV
    """
    special_names = {"LuckyV1": "Lucky"}
    if player_name.strip() in special_names:
        player_name = special_names[player_name]
    best_name, score = process.extractBests(query=player_name, choices=csgo_players, scorer=fuzz.ratio, limit=1)[0]
    if score == 100:
        return best_name
    return None

In [5]:
model = joblib.load("xgr_model_v2.joblib")

def projection(player_name: str, player_team: str, opp_team: str, map_type: str):
    model_inputs = {
        "WMA Kills": 0, "WMA Headshots": 0, "WMA Assists": 0, "WMA Deaths": 0,
        "WMA Kast": 0, "WMA ADR": 0, "WMA Rating": 0,
        "Map Number MAPS 1": 0, "Map Number MAPS 1-2": 0, "Map Number MAPS 1-3": 0,
        "Map Number MAPS 3": 0, "Team": 0, "Opponent Team": 0, "Name": ""
    }

    # Normalize the name and team
    player_name_norm = normalize_player_name(player_name)
    team_name_norm = normalize_team_name(player_team)
    opp_team_norm = normalize_team_name(opp_team)

    # Skip if any of the conditions meet (model is not set for these stat types yet)
    if ("(Combo)" in map_type) | ("First" in map_type) | ("AWP" in map_type):
        return 0
    
    # Keep track of what may not be working with the fuzzy tool
    if (player_name_norm == None):
        return 0
    if (team_name_norm == None):
        return 0
    if (opp_team_norm == None):
        return 0

    # Kills or HS
    stat_type = map_type.split()[-1]

    # Normalize map type name
    map_name = map_type.replace("MAPS", "MAP").replace("MAP", "MAPS")
    map_type = " ".join(map_name.split()[:2])

    # Filter the dataframe based on name, team, and map type
    df = cs_data[
        (cs_data["Name"] == player_name_norm) & 
        (cs_data["Team"] == team_name_norm) & 
        (cs_data["Map Number"] == map_type)
    ]

    # Not enough data
    if len(df) < 8:
        return 0

    # Calculate the Weighted Moving Average (WMA)
    dot_product = df[weight_cols].tail(8).apply(lambda group: np.dot(weights[::-1], group), raw=True)
    for col in dot_product.keys():
        column = f"WMA {col}"
        model_inputs[column] = dot_product.loc[col]

    # Set the map number feature
    model_inputs[f"Map Number {map_type}"] = 1

    # Attempt to get from the dictionary
    model_inputs["Name"] = player_rating_dict.get(player_name_norm)
    model_inputs["Team"] = team_rating_dict.get(team_name_norm)
    model_inputs["Opponent Team"] = team_rating_dict.get(opp_team_norm)

    # Prepare input for prediction
    inp = [np.array(list(model_inputs.values()))]
    
    # Get the prediction from the model
    prediction = model.predict(inp)[0]

    # Return the corresponding stat based on stat_type
    if stat_type == "Headshots":
        return prediction[1]
    return prediction[0]

In [6]:
# Predictions
prediction_df = pd.DataFrame(prizepicks_lines())
prediction_df["Model Prediction"] = prediction_df.apply(lambda col: projection(col["Name"], col["Team"], col["Opp"], col["Type"]), axis=1)
not_predicted = prediction_df[prediction_df["Model Prediction"] == 0]
prediction_df = prediction_df[prediction_df["Model Prediction"] != 0].drop(columns="ID")
prediction_df.reset_index(drop=True)

# Add difference and sort by teams
prediction_df.sort_values(by="Team", inplace=True)
prediction_df["Diff (+/-)"] = prediction_df["Model Prediction"] - prediction_df["Line Score"]
prediction_df["Model O/U"] = np.where(prediction_df["Line Score"] < prediction_df["Model Prediction"], "Over", "Under")

# Save to a CSV
prediction_df.to_csv("predictions.csv", index=False)

# Display
prediction_df.reset_index(drop=True)

Unnamed: 0,Game Date,Game Time,Type,Name,Team,Opp,Line Score,Model Prediction,Diff (+/-),Model O/U
0,2025-01-05,12:00:00,MAPS 3 Kills,mantuu,9INE,FLuffy Gangsters,16.0,19.105436,3.105436,Over
1,2025-01-05,12:00:00,MAPS 3 Kills,refrezh,9INE,FLuffy Gangsters,14.0,17.201313,3.201313,Over
2,2025-01-05,12:00:00,MAPS 3 Kills,raalz,9INE,FLuffy Gangsters,13.0,16.688299,3.688299,Over
3,2025-01-05,12:00:00,MAPS 3 Headshots,mantuu,9INE,FLuffy Gangsters,5.0,5.951986,0.951986,Over
4,2025-01-05,12:00:00,MAPS 3 Headshots,refrezh,9INE,FLuffy Gangsters,7.5,9.159211,1.659211,Over
5,2025-01-05,12:00:00,MAPS 3 Headshots,raalz,9INE,FLuffy Gangsters,8.0,10.193201,2.193201,Over
6,2025-01-05,13:00:00,MAPS 1-2 Headshots,TMB,ECSTATIC,kONO,15.0,17.247829,2.247829,Over
7,2025-01-05,13:00:00,MAPS 1-2 Headshots,n1Xen,ECSTATIC,kONO,19.0,18.186857,-0.813143,Under
8,2025-01-05,13:00:00,MAPS 1-2 Headshots,nut nut,ECSTATIC,kONO,17.0,18.065037,1.065037,Over
9,2025-01-05,13:00:00,MAPS 1-2 Headshots,Kristou,ECSTATIC,kONO,12.5,14.709985,2.209985,Over
