In [1]:
import json
import re
from os.path import join
import os

import pandas as pd
import numpy as np
from tqdm import tqdm
import xgboost as xgb
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import optuna
from sklearn.metrics import mean_absolute_error
import shap
from sklearn.metrics import log_loss
from sklearn.metrics import precision_recall_curve, auc
import cfbd

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

with open('../config.json') as f:
    configuration = cfbd.Configuration(
        access_token = json.load(f)['CFBD_API_KEY']
    )

In [2]:
YEARS = range(2013, 2025)
DATA_DIR = '../data'

In [3]:
def convert_to_snake_case(cols):
    cols_new = []
    for c in cols:
        s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', c)
        cols_new.append(re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower())
    return cols_new

# NOTE: use the advanced rushing stats

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    api_instance = cfbd.StatsApi(api_client)
    data = api_instance.get_advanced_game_stats(year=2022, week=1, season_type=cfbd.SeasonType('postseason'))
tmp = pd.DataFrame([val.to_dict() for val in data])
tmp.columns = convert_to_snake_case(tmp.columns)

import json; print(json.dumps(tmp.offense.values[0], indent=2))

{
  "passingPlays": {
    "explosiveness": 1.8541393836035363,
    "successRate": 0.3870967741935484,
    "totalPPA": 1.487685319139701,
    "ppa": 0.04798984900450649
  },
  "rushingPlays": {
    "explosiveness": 0.8463265955213564,
    "successRate": 0.48484848484848486,
    "totalPPA": 7.5728849012688295,
    "ppa": 0.22948136064451
  },
  "passingDowns": {
    "explosiveness": 2.391809670287178,
    "successRate": 0.29411764705882354,
    "ppa": 0.4032850938806822
  },
  "standardDowns": {
    "explosiveness": 1.036167381745576,
    "successRate": 0.48936170212765956,
    "ppa": 0.04690901328589221
  },
  "openFieldYardsTotal": 8,
  "openFieldYards": 0.24242424242424243,
  "secondLevelYardsTotal": 32,
  "secondLevelYards": 0.9696969696969697,
  "lineYardsTotal": 119,
  "lineYards": 3.5939393939393938,
  "stuffRate": 0.09090909090909091,
  "powerSuccess": 0.6,
  "explosiveness": 1.2782463618422906,
  "successRate": 0.4375,
  "totalPPA": 9.06057022040853,
  "ppa": 0.1415714096938833,

In [17]:
import json; print(json.dumps(tmp.query('team=="Air Force" and season==2022').offense.values[0], indent=2))

{
  "passingPlays": {
    "explosiveness": 3.084834488663667,
    "successRate": 0.5714285714285714,
    "totalPPA": 10.521407129473014,
    "ppa": 1.5030581613532876
  },
  "rushingPlays": {
    "explosiveness": 0.923499461339027,
    "successRate": 0.3880597014925373,
    "totalPPA": -0.008763854486361583,
    "ppa": -0.00013080379830390424
  },
  "passingDowns": {
    "explosiveness": 1.9496321186548522,
    "successRate": 0.35294117647058826,
    "ppa": 0.44910251364721143
  },
  "standardDowns": {
    "explosiveness": 1.0271888015641775,
    "successRate": 0.42105263157894735,
    "ppa": 0.05048948321024663
  },
  "openFieldYardsTotal": 43,
  "openFieldYards": 0.6417910447761194,
  "secondLevelYardsTotal": 59,
  "secondLevelYards": 0.8805970149253731,
  "lineYardsTotal": 191,
  "lineYards": 2.8492537313432837,
  "stuffRate": 0.19402985074626866,
  "powerSuccess": 0.5833333333333334,
  "explosiveness": 1.2116774649823123,
  "successRate": 0.40540540540540543,
  "totalPPA": 10.51264

In [18]:
import json; print(json.dumps(tmp.query('team=="Air Force" and season==2022').defense.values[0], indent=2))

{
  "passingPlays": {
    "explosiveness": 1.728313514270139,
    "successRate": 0.36,
    "totalPPA": 5.563626383672537,
    "ppa": 0.22254505534690147
  },
  "rushingPlays": {
    "explosiveness": 1.7483239824847996,
    "successRate": 0.16666666666666666,
    "totalPPA": -4.979452773483793,
    "ppa": -0.20747719889515803
  },
  "passingDowns": {
    "explosiveness": 2.476020333520312,
    "successRate": 0.10526315789473684,
    "ppa": -0.16967593155011507
  },
  "standardDowns": {
    "explosiveness": 1.599643353757257,
    "successRate": 0.36666666666666664,
    "ppa": 0.12693387698803102
  },
  "openFieldYardsTotal": 0,
  "openFieldYards": 0,
  "secondLevelYardsTotal": 5,
  "secondLevelYards": 0.20833333333333334,
  "lineYardsTotal": 50,
  "lineYards": 2.091666666666667,
  "stuffRate": 0.20833333333333334,
  "powerSuccess": 0.75,
  "explosiveness": 1.7344705814131116,
  "successRate": 0.2653061224489796,
  "totalPPA": 0.5841736101887443,
  "ppa": 0.011921910412015188,
  "drives":

In [19]:
tmp.head()

Unnamed: 0,game_id,season,week,team,opponent,offense,defense
0,401441993,2022,1,Miami (OH),UAB,{'passingPlays': {'explosiveness': 1.854139383...,{'passingPlays': {'explosiveness': 2.074307527...
1,401441993,2022,1,UAB,Miami (OH),{'passingPlays': {'explosiveness': 2.074307527...,{'passingPlays': {'explosiveness': 1.854139383...
2,401441994,2022,1,Troy,UTSA,{'passingPlays': {'explosiveness': 1.613148087...,{'passingPlays': {'explosiveness': 1.034500487...
3,401441994,2022,1,UTSA,Troy,{'passingPlays': {'explosiveness': 1.034500487...,{'passingPlays': {'explosiveness': 1.613148087...
4,401441995,2022,1,Cincinnati,Louisville,{'passingPlays': {'explosiveness': 1.351053806...,{'passingPlays': {'explosiveness': 1.447940774...
