In [163]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from math import sqrt
from sklearn.model_selection import train_test_split, GridSearchCV, TimeSeriesSplit

#Player projections using decay

# Parameters
DECAY = 0.05
PATCH_MULTIPLIER = 25

# Distance between patches
def custom_patch_distance(train_patch, holdout_patch):
    hmaj, hmin = holdout_patch
    tmaj, tmin = train_patch
    return (hmaj - tmaj) * PATCH_MULTIPLIER + (hmin - tmin)

df = pd.read_csv("/Users/samharwood/Downloads/statstester.csv")
df['patch_tuple'] = df['match_patch'].apply(lambda x: tuple(map(int, str(x).split('.'))))

#Training and holdout split
unique_patches = sorted(df['patch_tuple'].unique(), reverse=True)
most_recent_patch = unique_patches[0]
holdout_df = df[df['patch_tuple'] == most_recent_patch].copy()
train_df = df[df['patch_tuple'] != most_recent_patch].copy()

#Holdout split; 1000 matches from most recent patch taken for holdout. Default was to include all matches on the most recent patch in the holdout, which would prevent predictions to be informed by the most recent, relevant data
if len(holdout_df) > 1000:
    holdout_df = holdout_df.sample(n=1000, random_state=42)
    remaining = df[(df['patch_tuple'] == most_recent_patch) & (~df.index.isin(holdout_df.index))]
    train_df = pd.concat([train_df, remaining])

# Filter players with >=5 matches in training
valid_players = train_df.groupby('player').filter(lambda g: len(g) >= 5)['player'].unique()
train_df = train_df[train_df['player'].isin(valid_players)].copy()

# Decayed feature averaging
def weighted_avg_feature(df, feature):
    df['weight'] = np.exp(-DECAY * df['patch_tuple'].apply(
        lambda x: custom_patch_distance(x, most_recent_patch)))
    return df.groupby('player').apply(
        lambda g: np.average(g[feature], weights=g['weight'])
    ).reset_index(name=f'pred_{feature}')

player_pred_kills = weighted_avg_feature(train_df.copy(), 'kills')
player_pred_kast_pct = weighted_avg_feature(train_df.copy(), 'kast_pct')

player_perf = player_pred_kills.merge(player_pred_kast_pct, on='player')

#Agent level coefficients from prior code

agent_coef_dict = {
    'Reyna':4.0584, 'Iso':3.9195, 'Raze':3.3379, 'Neon':2.7851,
    'Jett':2.6998, 'Yoru':2.3374, 'Chamber':2.1834, 'Sage':1.6417,
    'Clove':1.4078, 'Viper':1.3776, 'Phoenix':0.8329, 'Brimstone':0.6529,
    'Vyse':0.5889, 'Gekko':0.4869, 'Deadlock':0.2158, 'Tejo':0.1085,
    'Killjoy':-0.0277, 'Fade':-0.0562, 'Sova':-0.0831, 'Omen':-0.2596,
    'Astra':-0.6056, 'Skye':-0.8775, 'Kayo':-0.9327, 'Breach':-1.3188,
    'Harbor':-3.4416, 'Cypher':0.0
}

agent_counts = train_df.groupby(['player', 'agent']).size().unstack(fill_value=0)
all_agents = set(agent_counts.columns) | set(agent_coef_dict.keys())
for agent in all_agents:
    if agent not in agent_coef_dict:
        agent_coef_dict[agent] = 0.0

expected_coef = agent_counts.mul(agent_coef_dict).sum(axis=1).div(
    agent_counts.sum(axis=1)).rename('agent_coef')

player_perf = player_perf.merge(
    expected_coef.reset_index(), 
    on='player', 
    how='left'
).fillna(np.mean(list(agent_coef_dict.values())))

#Combined dataset

holdout_perf = holdout_df.groupby('player')['kills'].mean().reset_index(name='actual_kills')
combined = player_perf.merge(holdout_perf, on='player', how='inner')

X = combined[['pred_kills', 'pred_kast_pct', 'agent_coef']]
y = combined['actual_kills']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#XGBoost with Grid Search

param_grid = {
    'max_depth': [3, 4, 5],
    'learning_rate': [0.01, 0.02, 0.03],
    'reg_alpha': [0.5, 1],
    'reg_lambda': [0.5, 1],
    'gamma': [0, 0.1, 0.15]
}

xgb = XGBRegressor(
    n_estimators=1000,
    early_stopping_rounds=50,
    eval_metric='rmse',
    verbosity=0,
    random_state=42
)

grid = GridSearchCV(
    xgb, param_grid,
    cv=TimeSeriesSplit(3),
    scoring='neg_root_mean_squared_error',
    verbose=0,
    n_jobs=-1
)

grid.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=0)
best_model = grid.best_estimator_


train_pred = best_model.predict(X_train)
test_pred = best_model.predict(X_test)

print("\n=== Model Performance ===")
print(f"Train MAE: {mean_absolute_error(y_train, train_pred):.3f}")
print(f"Test MAE: {mean_absolute_error(y_test, test_pred):.3f}")
print(f"Train RMSE: {sqrt(mean_squared_error(y_train, train_pred)):.3f}")
print(f"Test RMSE: {sqrt(mean_squared_error(y_test, test_pred)):.3f}")
print(f"Train R²: {r2_score(y_train, train_pred):.3f}")
print(f"Test R²: {r2_score(y_test, test_pred):.3f}")

print("\nFeature Importances:")
for name, imp in zip(X.columns, best_model.feature_importances_):
    print(f"{name}: {imp:.4f}")

print("\nBest Hyperparameters:")
for param, value in grid.best_params_.items():
    print(f"{param}: {value}")

print(f"\nHoldout set size (rows): {holdout_df.shape[0]}")


  return df.groupby('player').apply(
  return df.groupby('player').apply(



=== Model Performance ===
Train MAE: 2.589
Test MAE: 3.280
Train RMSE: 3.309
Test RMSE: 4.051
Train R²: 0.308
Test R²: 0.114

Feature Importances:
pred_kills: 0.4460
pred_kast_pct: 0.2314
agent_coef: 0.3226

Best Hyperparameters:
gamma: 0.15
learning_rate: 0.01
max_depth: 3
reg_alpha: 1
reg_lambda: 1

Holdout set size (rows): 1200


In [8]:
# Comparison of agent stats to detemrine performance metrics
agent_stats = df.groupby('agent').agg({
    'first_kills': 'mean',
    'kast_pct': 'mean',
    'kills': 'mean'
}).reset_index()

# Merge with agent coeffs
agent_stats = agent_stats.merge(
    pd.DataFrame.from_dict(agent_coef_dict, orient='index', columns=['agent_coef']),
    left_on='agent', right_index=True
)

# Compute correlations
print(agent_stats[['agent_coef', 'first_kills', 'kast_pct']].corr())

             agent_coef  first_kills  kast_pct
agent_coef     1.000000     0.809385 -0.481137
first_kills    0.809385     1.000000 -0.661731
kast_pct      -0.481137    -0.661731  1.000000


In [29]:
#Final 2 map, XGB model

import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from math import sqrt
from sklearn.model_selection import train_test_split, GridSearchCV, TimeSeriesSplit

# Parameters
DECAY = 0.05
PATCH_MULTIPLIER = 25

# Distance between patches
def custom_patch_distance(train_patch, holdout_patch):
    hmaj, hmin = holdout_patch
    tmaj, tmin = train_patch
    return (hmaj - tmaj) * PATCH_MULTIPLIER + (hmin - tmin)

df = pd.read_csv("/Users/samharwood/Downloads/statstester.csv")
df['patch_tuple'] = df['match_patch'].apply(lambda x: tuple(map(int, str(x).split('.'))))

# Training and holdout split
unique_patches = sorted(df['patch_tuple'].unique(), reverse=True)
most_recent_patch = unique_patches[0]
holdout_df = df[df['patch_tuple'] == most_recent_patch].copy()
train_df = df[df['patch_tuple'] != most_recent_patch].copy()

#Holdout split; 1000 matches from most recent patch taken for holdout. Default was to include all matches on the most recent patch in the holdout, which would prevent predictions to be informed by the most recent, relevant data
if len(holdout_df) > 1000:
    holdout_df = holdout_df.sample(n=1000, random_state=42)
    remaining = df[(df['patch_tuple'] == most_recent_patch) & (~df.index.isin(holdout_df.index))]
    train_df = pd.concat([train_df, remaining])

# Filter players with >=5 matches in training
valid_players = train_df.groupby('player').filter(lambda g: len(g) >= 5)['player'].unique()
train_df = train_df[train_df['player'].isin(valid_players)].copy()

# Decayed feature averaging
def weighted_avg_feature(df, feature):
    df['weight'] = np.exp(-DECAY * df['patch_tuple'].apply(
        lambda x: custom_patch_distance(x, most_recent_patch)))
    return df.groupby('player').apply(
        lambda g: np.average(g[feature], weights=g['weight'])
    ).reset_index(name=f'pred_{feature}')

player_pred_kills = weighted_avg_feature(train_df.copy(), 'kills')
player_pred_kast_pct = weighted_avg_feature(train_df.copy(), 'kast_pct')

player_perf = player_pred_kills.merge(player_pred_kast_pct, on='player')

# Agent level coefficients from prior code
agent_coef_dict = {
    'Reyna': 4.0584, 'Iso': 3.9195, 'Raze': 3.3379, 'Neon': 2.7851,
    'Jett': 2.6998, 'Yoru': 2.3374, 'Chamber': 2.1834, 'Sage': 1.6417,
    'Clove': 1.4078, 'Viper': 1.3776, 'Phoenix': 0.8329, 'Brimstone': 0.6529,
    'Vyse': 0.5889, 'Gekko': 0.4869, 'Deadlock': 0.2158, 'Tejo': 0.1085,
    'Killjoy': -0.0277, 'Fade': -0.0562, 'Sova': -0.0831, 'Omen': -0.2596,
    'Astra': -0.6056, 'Skye': -0.8775, 'Kayo': -0.9327, 'Breach': -1.3188,
    'Harbor': -3.4416, 'Cypher': 0.0
}

agent_counts = train_df.groupby(['player', 'agent']).size().unstack(fill_value=0)
all_agents = set(agent_counts.columns) | set(agent_coef_dict.keys())

expected_coef = agent_counts.mul(agent_coef_dict).sum(axis=1).div(
    agent_counts.sum(axis=1)).rename('agent_coef')

player_perf = player_perf.merge(
    expected_coef.reset_index(), 
    on='player', 
    how='left'
).fillna(np.mean(list(agent_coef_dict.values())))

# Combine with holdout actuals: here, actual_kills is averaged per map
holdout_perf = holdout_df.groupby('player')['kills'].mean().reset_index(name='actual_kills')
combined = player_perf.merge(holdout_perf, on='player', how='inner')

# Multiplying kills by 2 so model learns, scales to 2 map totals
combined['actual_kills_2maps'] = combined['actual_kills'] * 2

# Define features and target accordingly.
X = combined[['pred_kills', 'pred_kast_pct', 'agent_coef']]
y = combined['actual_kills_2maps']  # Revised target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# XGBoost with Grid Search
param_grid = {
    'max_depth': [4, 5, 6],
    'learning_rate': [0.01, 0.02, 0.03],
    'reg_alpha': [0.5, 1],
    'reg_lambda': [0.5, 1],
    'gamma': [0, 0.1, 0.15]
}

xgb = XGBRegressor(
    n_estimators=1000,
    early_stopping_rounds=50,
    eval_metric='rmse',
    verbosity=0,
    random_state=42
)

grid = GridSearchCV(
    xgb, param_grid,
    cv=TimeSeriesSplit(3),
    scoring='neg_root_mean_squared_error',
    verbose=0,
    n_jobs=-1
)

grid.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=0)
best_model = grid.best_estimator_

# Evaluation on two-map outcomes
train_pred = best_model.predict(X_train)
test_pred = best_model.predict(X_test)

print("\n=== Two-Map Model Performance ===")
print(f"Train MAE: {mean_absolute_error(y_train, train_pred):.3f}")
print(f"Test MAE: {mean_absolute_error(y_test, test_pred):.3f}")
print(f"Train RMSE: {sqrt(mean_squared_error(y_train, train_pred)):.3f}")
print(f"Test RMSE: {sqrt(mean_squared_error(y_test, test_pred)):.3f}")
print(f"Train R²: {r2_score(y_train, train_pred):.3f}")
print(f"Test R²: {r2_score(y_test, test_pred):.3f}")

print("\nFeature Importances:")
for name, imp in zip(X.columns, best_model.feature_importances_):
    print(f"{name}: {imp:.4f}")

print("\nBest Hyperparameters:")
for param, value in grid.best_params_.items():
    print(f"{param}: {value}")


  return df.groupby('player').apply(
  return df.groupby('player').apply(



=== Two-Map Model Performance ===
Train MAE: 4.173
Test MAE: 6.507
Train RMSE: 5.275
Test RMSE: 8.006
Train R²: 0.599
Test R²: 0.258

Feature Importances:
pred_kills: 0.3916
pred_kast_pct: 0.2768
agent_coef: 0.3316

Best Hyperparameters:
gamma: 0.1
learning_rate: 0.01
max_depth: 5
reg_alpha: 0.5
reg_lambda: 0.5


In [53]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from math import sqrt

#Prior feature engineering before NN
df = pd.read_csv("/Users/samharwood/Downloads/statstester.csv")
df['patch_tuple'] = df['match_patch'].apply(lambda x: tuple(map(int, str(x).split('.'))))

# Parameters
DECAY = 0.05
PATCH_MULTIPLIER = 25

def custom_patch_distance(train_patch, holdout_patch):
    hmaj, hmin = holdout_patch
    tmaj, tmin = train_patch
    return (hmaj - tmaj) * PATCH_MULTIPLIER + (hmin - tmin)

# Split data
unique_patches = sorted(df['patch_tuple'].unique(), reverse=True)
most_recent_patch = unique_patches[0]
holdout_df = df[df['patch_tuple'] == most_recent_patch].copy()
train_df = df[df['patch_tuple'] != most_recent_patch].copy()

if len(holdout_df) > 1000:
    holdout_df = holdout_df.sample(n=1000, random_state=42)
    remaining = df[(df['patch_tuple'] == most_recent_patch) & (~df.index.isin(holdout_df.index))]
    train_df = pd.concat([train_df, remaining])

# Filter players
valid_players = train_df.groupby('player').filter(lambda g: len(g) >= 5)['player'].unique()
train_df = train_df[train_df['player'].isin(valid_players)].copy()

# Weighted averages
def weighted_avg_feature(df, feature):
    df['weight'] = np.exp(-DECAY * df['patch_tuple'].apply(
        lambda x: custom_patch_distance(x, most_recent_patch)))
    return df.groupby('player').apply(
        lambda g: np.average(g[feature], weights=g['weight'])
    ).reset_index(name=f'pred_{feature}')

player_pred_kills = weighted_avg_feature(train_df.copy(), 'kills')
player_pred_kast_pct = weighted_avg_feature(train_df.copy(), 'kast_pct')
player_perf = player_pred_kills.merge(player_pred_kast_pct, on='player')

# Agent coefficients
agent_coef_dict = {
    'Reyna':4.0584, 'Iso':3.9195, 'Raze':3.3379, 'Neon':2.7851,
    'Jett':2.6998, 'Yoru':2.3374, 'Chamber':2.1834, 'Sage':1.6417,
    'Clove':1.4078, 'Viper':1.3776, 'Phoenix':0.8329, 'Brimstone':0.6529,
    'Vyse':0.5889, 'Gekko':0.4869, 'Deadlock':0.2158, 'Tejo':0.1085,
    'Killjoy':-0.0277, 'Fade':-0.0562, 'Sova':-0.0831, 'Omen':-0.2596,
    'Astra':-0.6056, 'Skye':-0.8775, 'Kayo':-0.9327, 'Breach':-1.3188,
    'Harbor':-3.4416, 'Cypher':0.0
}

agent_counts = train_df.groupby(['player', 'agent']).size().unstack(fill_value=0)
expected_coef = agent_counts.mul(agent_coef_dict).sum(axis=1).div(
    agent_counts.sum(axis=1)).rename('agent_coef')
player_perf = player_perf.merge(expected_coef.reset_index(), on='player', how='left').fillna(0)

# Prepare final dataset
holdout_perf = holdout_df.groupby('player')['kills'].mean().reset_index(name='actual_kills')
combined = player_perf.merge(holdout_perf, on='player', how='inner')
combined['actual_kills_2maps'] = combined['actual_kills'] * 2
X = combined[['pred_kills', 'pred_kast_pct', 'agent_coef']]
y = combined['actual_kills_2maps']

#Preparing NN data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

#Grid Search
def create_model(learning_rate=0.001, dropout_rate=0.3, batch_size=64):
    model = Sequential([
        Dense(128, activation='swish', input_shape=(X_train.shape[1],)),
        BatchNormalization(),
        Dropout(dropout_rate),
        Dense(64, activation='swish'),
        BatchNormalization(),
        Dropout(dropout_rate),
        Dense(32, activation='swish'),
        Dense(1)
    ])
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='mae',
        metrics=['mae']
    )
    return model

#Grid params for testing
param_grid = {
    'learning_rate': [0.001, 0.0002],
    'dropout_rate': [0.25, 0.3],
    'batch_size': [64, 128]
}

best_score = float('inf')
best_params = {}
best_model = None

for lr in param_grid['learning_rate']:
    for dr in param_grid['dropout_rate']:
        for bs in param_grid['batch_size']:
            print(f"\nTesting lr={lr}, dropout={dr}, batch_size={bs}")
            
            model = create_model(
                learning_rate=lr,
                dropout_rate=dr,
                batch_size=bs
            )
            
            history = model.fit(
                X_train, y_train,
                validation_data=(X_val, y_val),
                epochs=200,
                batch_size=bs,
                callbacks=[
                    EarlyStopping(monitor='val_mae', patience=15, restore_best_weights=True),
                    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5)
                ],
                verbose=0
            )
            
            val_mae = min(history.history['val_mae'])
            if val_mae < best_score:
                best_score = val_mae
                best_params = {
                    'learning_rate': lr,
                    'dropout_rate': dr,
                    'batch_size': bs
                }
                best_model = model
                print(f"New best val_mae: {val_mae:.4f}")


print("Best Hyperparams")
print(f"Best Parameters: {best_params}")
print(f"Best Validation MAE: {best_score:.4f}")

#Test Mae/RMSE
y_pred = best_model.predict(X_test).flatten()
test_mae = mean_absolute_error(y_test, y_pred)
test_rmse = sqrt(mean_squared_error(y_test, y_pred))

print("Final Test Performance")
print(f"Test MAE: {test_mae:.3f}")
print(f"Test RMSE: {test_rmse:.3f}")


  return df.groupby('player').apply(
  return df.groupby('player').apply(
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Testing lr=0.001, dropout=0.25, batch_size=64
New best val_mae: 6.6014

Testing lr=0.001, dropout=0.25, batch_size=128


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Testing lr=0.001, dropout=0.3, batch_size=64


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


New best val_mae: 6.3693

Testing lr=0.001, dropout=0.3, batch_size=128


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Testing lr=0.0002, dropout=0.25, batch_size=64


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Testing lr=0.0002, dropout=0.25, batch_size=128


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Testing lr=0.0002, dropout=0.3, batch_size=64


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Testing lr=0.0002, dropout=0.3, batch_size=128


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Best Hyperparams
Best Parameters: {'learning_rate': 0.001, 'dropout_rate': 0.3, 'batch_size': 64}
Best Validation MAE: 6.3693
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
Final Test Performance
Test MAE: 6.699
Test RMSE: 8.049
