<a href="https://colab.research.google.com/github/reemchaaban/game_system/blob/main/data-processing/player_count_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import holidays
!pip install workalendar
from workalendar.usa import UnitedStates  # can change
from datetime import datetime, timedelta
import pickle

from google.colab import drive, files, userdata
import os

!pip install huggingface_hub transformers
from huggingface_hub import login
HF_TOKEN = userdata.get("HF_TOKEN")
login(token=HF_TOKEN)



In [None]:
# load dataset
drive.mount('/content/drive')
drive_base_path = '/content/drive/My Drive/503Nproj/player-count-history'
file_path = os.path.join(drive_base_path, 'player_count_history.csv')
df = pd.read_csv(file_path, parse_dates=['date'])

Mounted at /content/drive


In [None]:
# feature engineering
us_holidays = holidays.US()
cal = UnitedStates()
df['is_holiday_holidays_lib'] = df['date'].apply(lambda x: int(x in us_holidays))
df['is_holiday_workalendar'] = df['date'].apply(lambda x: int(cal.is_holiday(x)))
df['day_of_week'] = df['date'].dt.dayofweek  # i.e., monday=0, tuesday=1, ..., sunday=6

In [None]:
# target column & features
target_col = 'total players'
feature_cols = [col for col in df.columns if col not in ['date', target_col]]

In [None]:
# scaling
scaler_players = MinMaxScaler()
df[[target_col]] = scaler_players.fit_transform(df[[target_col]])  # train only on total players
with open('scaler_players.pkl', 'wb') as f:
    pickle.dump(scaler_players, f)
print(f"Scaler was trained on {scaler_players.n_features_in_} features")


scaler_features = MinMaxScaler()
df[feature_cols] = scaler_features.fit_transform(df[feature_cols])


Scaler was trained on 1 features


In [None]:
print(df.isnull().sum()) # get number of missing values
df.fillna(df.mean(), inplace=True)

date                       0
570                        0
730                        0
578080                     0
1172470                    0
                          ..
1086940                    0
total players              0
is_holiday_holidays_lib    0
is_holiday_workalendar     0
day_of_week                0
Length: 102, dtype: int64


In [None]:
print(df.dtypes)


date                       datetime64[ns]
570                               float64
730                               float64
578080                            float64
1172470                           float64
                                ...      
1086940                           float64
total players                     float64
is_holiday_holidays_lib           float64
is_holiday_workalendar            float64
day_of_week                       float64
Length: 102, dtype: object


In [None]:
# sequence preparation
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length, :-1])  # features
        y.append(data[i + seq_length, -1])  # target variable i.e., total players
    return np.array(X), np.array(y)

seq_length = 30  # use last 30 days to predict
data = df[feature_cols].values
X, y = create_sequences(data, seq_length)

# train-validation-test split (0.7-0.15-0.15)
train_size = int(0.7 * len(X))
val_size = int(0.15 * len(X))
test_size = len(X) - train_size - val_size
X_train, y_train = X[:train_size], y[:train_size]
X_val, y_val = X[train_size:train_size+val_size], y[train_size:train_size+val_size]
X_test, y_test = X[train_size+val_size:], y[train_size+val_size:]


In [None]:
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}") #verify that X train & test shapes make sense
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

X_train shape: (489, 30, 99), y_train shape: (489,)
X_test shape: (106, 30, 99), y_test shape: (106,)


In [None]:
# LSTM model training
model = Sequential([
    LSTM(100, return_sequences=True, input_shape=(seq_length, X.shape[2])),
    Dropout(0.2),
    LSTM(100, return_sequences=False),
    Dropout(0.2),
    Dense(50, activation='relu'),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=30, batch_size=16, validation_data=(X_test, y_test))

Epoch 1/30


  super().__init__(**kwargs)


[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 65ms/step - loss: 0.1464 - val_loss: 0.1121
Epoch 2/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 64ms/step - loss: 0.1168 - val_loss: 0.1101
Epoch 3/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 37ms/step - loss: 0.1118 - val_loss: 0.1124
Epoch 4/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - loss: 0.1193 - val_loss: 0.1137
Epoch 5/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - loss: 0.1134 - val_loss: 0.1055
Epoch 6/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - loss: 0.1080 - val_loss: 0.1200
Epoch 7/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 0.1071 - val_loss: 0.1146
Epoch 8/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - loss: 0.1022 - val_loss: 0.0836
Epoch 9/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

<keras.src.callbacks.history.History at 0x78760887a9d0>

In [None]:
# model evaluation
y_pred = model.predict(X_test)
y_pred_inv = scaler_players.inverse_transform(y_pred.reshape(-1, 1))
y_test_inv = scaler_players.inverse_transform(y_test.reshape(-1, 1))

mae = mean_absolute_error(y_test_inv, y_pred_inv)
rmse = np.sqrt(mean_squared_error(y_test_inv, y_pred_inv))
r2 = r2_score(y_test_inv, y_pred_inv)

print(f"MAE: {mae}, RMSE: {rmse}, R^2: {r2}")

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 124ms/step
MAE: 122608.39308176069, RMSE: 197654.14833836447, R^2: 0.9491055460797315


In [None]:
print(model.input_shape)  # expecting: (None, sequence_length, feature_size)


(None, 30, 99)


In [None]:
def predict_future(date):
    date = pd.to_datetime(date)
    day_of_week = date.weekday()
    is_holiday_holidays = int(date in us_holidays)
    is_holiday_workalendar = int(cal.is_holiday(date))

    last_seq = df[feature_cols].values[-seq_length:]  # ensure the shape is correct

    # ensure extra_features has same number of columns as features
    extra_features = np.zeros(len(feature_cols))  # replace null values w/ zeros
    extra_features[-3:] = [day_of_week / 6.0, is_holiday_holidays, is_holiday_workalendar]  # normalization

    # stack last_seq w/ extra_features
    last_seq = np.vstack([last_seq[1:], extra_features])  # (seq_length, num_features)

    # reshape sequence to (1, seq_length, num_features)
    last_seq = np.expand_dims(last_seq, axis=0)  # shape (1, seq_length, num_features)

    # ensure input shape matches model's expected input (99 features)
    last_seq = last_seq[:, :, :99]  # trim/select first 99 features

    print(f"last_seq shape before prediction: {last_seq.shape}")  # ensure last sequence shape is what we expect
    print(f"Last input sequence:\n{last_seq}")

    # prediction
    prediction = model.predict(last_seq)
    print(f"Raw model output: {prediction[0][0]}")  # debugging

    # check inverse transformation
    predicted_players_scaled = np.array([[prediction[0][0]]])  # shape should be (1, 1)

    predicted_players = scaler_players.inverse_transform(predicted_players_scaled)[0][0]

    print(f"Predicted total players (scaled): {prediction[0][0]}")
    print(f"Predicted total players after inverse transform: {predicted_players}")

    # debugging
    print("Scaler Min:", scaler_players.data_min_)
    print("Scaler Max:", scaler_players.data_max_)

    # manual computation of inverse transformation
    min_val = scaler_players.data_min_[0]  # min value used in training
    max_val = scaler_players.data_max_[0]  # max value used in training
    predicted_manual = min_val + (max_val - min_val) * prediction[0][0]

    print("Manually-calculated inverse transform:", predicted_manual)

    # extra debugging
    print(f"Day of week (normalized): {day_of_week / 6.0}")
    print(f"Holiday feature 1: {is_holiday_holidays}")
    print(f"Holiday feature 2: {is_holiday_workalendar}")

    return predicted_players

# test w/ future date
future_date = "2025-04-18"
predict_future(future_date)

last_seq shape before prediction: (1, 30, 99)
Last input sequence:
[[[0.32600305 0.8271795  0.76030521 ... 0.10815171 0.         0.        ]
  [0.36596354 0.91537016 0.8116333  ... 0.12318548 0.         0.        ]
  [0.13071803 0.71624328 0.75790826 ... 0.10026211 1.         1.        ]
  ...
  [0.06112511 0.90901566 0.91060804 ... 0.07094232 0.         0.        ]
  [0.2081065  1.         0.95082665 ... 0.09067778 0.         0.        ]
  [0.         0.         0.         ... 0.         0.66666667 0.        ]]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step
Raw model output: 0.36310824751853943
Predicted total players (scaled): 0.36310824751853943
Predicted total players after inverse transform: 5674342.0
Scaler Min: [4719905.]
Scaler Max: [7348425.]
Manually-calculated inverse transform: 5674342.290767431
Day of week (normalized): 0.6666666666666666
Holiday feature 1: 0
Holiday feature 2: 0


np.float32(5674342.0)

In [None]:
# from tensorflow import keras
model.export("player_count_model")

Saved artifact at 'player_count_model'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 30, 99), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  132448343699152: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132448343699728: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132448343699536: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132448343702416: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132448343698768: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132448343702992: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132448343703184: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132448343701840: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132448343702032: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132448343702608: TensorSpec(shape=(), dtype=tf.resource, name=None)


In [None]:
from huggingface_hub import HfApi, HfFolder, Repository, create_repo, upload_folder

upload_folder (
    repo_id="reemchaaban/player-count-prediction",
    folder_path="draft_player_count",
    path_in_repo="draft_player_count",
    commit_message="Initial commit from Google Colab"
)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


fingerprint.pb:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

saved_model.pb:   0%|          | 0.00/126k [00:00<?, ?B/s]

variables.data-00000-of-00001:   0%|          | 0.00/1.33M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/reemchaaban/player-count-prediction/commit/58f689e9d9cc7a5a4a9de068a886b93779e058da', commit_message='Initial commit from Google Colab', commit_description='', oid='58f689e9d9cc7a5a4a9de068a886b93779e058da', pr_url=None, repo_url=RepoUrl('https://huggingface.co/reemchaaban/player-count-prediction', endpoint='https://huggingface.co', repo_type='model', repo_id='reemchaaban/player-count-prediction'), pr_revision=None, pr_num=None)

In [None]:
GITHUB_USERNAME = "reemchaaban"
GITHUB_EMAIL = "reem.chaabann@gmail.com"
REPO_NAME = "game_system"
BRANCH = "main"
TARGET_SUBDIR = "IEP1"

token = userdata.get('GITHUB_PAT')

!git config --global user.email "{GITHUB_EMAIL}"
!git config --global user.name "{GITHUB_USERNAME}"

!rm -rf {REPO_NAME}
!git clone https://{token}@github.com/{GITHUB_USERNAME}/{REPO_NAME}.git

!rm -rf {REPO_NAME}/{TARGET_SUBDIR}/model
!mkdir -p {REPO_NAME}/{TARGET_SUBDIR}/model
!ls
!cp player_count_model/fingerprint.pb {REPO_NAME}/{TARGET_SUBDIR}/model/
!cp player_count_model/saved_model.pb {REPO_NAME}/{TARGET_SUBDIR}/model/
!cp -r player_count_model/variables {REPO_NAME}/{TARGET_SUBDIR}/model/

!cp scaler_players.pkl {REPO_NAME}/{TARGET_SUBDIR}/utils/scaler_players.pkl

%cd {REPO_NAME}
!git add .
!git commit -m "Update IEP1 model and scaler from Colab"
!git push origin {BRANCH}

%cd ..

Cloning into 'game_system'...
remote: Enumerating objects: 93, done.[K
remote: Counting objects: 100% (93/93), done.[K
remote: Compressing objects: 100% (55/55), done.[K
remote: Total 93 (delta 26), reused 93 (delta 26), pack-reused 0 (from 0)[K
Receiving objects: 100% (93/93), 9.29 MiB | 20.20 MiB/s, done.
Resolving deltas: 100% (26/26), done.
drive  game_system  player_count_model	sample_data  scaler_players.pkl
/content/game_system
[main c01ae89] Update IEP1 model and scaler from Colab
 5 files changed, 1 insertion(+), 1 deletion(-)
 rewrite IEP1/model/variables/variables.data-00000-of-00001 (91%)
 rewrite IEP1/model/variables/variables.index (98%)
 rewrite IEP1/utils/scaler_players.pkl (100%)
Enumerating objects: 21, done.
Counting objects: 100% (21/21), done.
Delta compression using up to 2 threads
Compressing objects: 100% (11/11), done.
Writing objects: 100% (11/11), 1.17 MiB | 3.09 MiB/s, done.
Total 11 (delta 5), reused 0 (delta 0), pack-reused 0
remote: Resolving deltas: 