# Documentation

[Python Github Repo](https://github.com/CFBD/cfbd-python/tree/main)


# Install CFBD and perform Imports

In [None]:
pip install -U cfbd

In [None]:
pip install pyzipcode

In [None]:
pip install pandas

In [None]:
import requests, pandas as pd
from getpass import getpass
import numpy as np
import cfbd
import time
from cfbd.models.team import Team
from cfbd.rest import ApiException
from pprint import pprint

from cfbd.models.division_classification import DivisionClassification
from cfbd.models.game import Game
from cfbd.models.season_type import SeasonType

from cfbd.models.coach import Coach

from collections import defaultdict

from dataclasses import dataclass, asdict

from datetime import datetime, timedelta, timezone
from pydantic import BaseModel, Field, StrictBool, StrictFloat, StrictInt, StrictStr

from cfbd import GameLine, BettingGame, GameWeather

from cfbd.models.venue import Venue

from pyzipcode import ZipCodeDatabase


# Constants

In [None]:
# Update This every week to pull the latest data
CURR_WEEK = 6
print(CURR_WEEK)
CONFERENCES = set({'ACC',
 'American Athletic',
 'Big 12',
 'Big Ten',
 'Conference USA',
 'FBS Independents',
 'Mid-American',
 'Mountain West',
 'Pac-12',
 'SEC',
 'Sun Belt'})

# Class Definitions

In [None]:
@dataclass
class GameRecord:
  id: int = None
  season: int = None # Game
  week: int = None # Game
  neutral: int = None # Game
  conference_game: int = None # Game
  venue: str = None # Game
  venue_id: int = None # Game
  home_team: str = None # Game
  home_team_id: int = None # Game
  home_conference: str = None #Game
  home_points: int = None # Game
  home_in_acc: int = None # Game
  home_in_aac: int = None # Game
  home_in_big12: int = None # Game
  home_in_big10: int = None # Game
  home_in_cusa: int = None # Game
  home_independent: int = None # Game
  home_in_mac: int = None # Game
  home_in_mwc: int = None # Game
  home_in_pac12: int = None # Game
  home_in_sec: int = None # Game
  home_in_sunbelt: int = None # Game
  home_fcs: int = None # Game
  home_time_change: int = None # Game
  away_team: str = None # Game
  away_team_id: int = None # Game
  away_conference: str = None #Game
  away_points: int = None # Game
  away_in_acc: int = None # Game
  away_in_aac: int = None # Game
  away_in_big12: int = None # Game
  away_in_big10: int = None # Game
  away_in_cusa: int = None # Game
  away_independent: int = None # Game
  away_in_mac: int = None # Game
  away_in_mwc: int = None # Game
  away_in_pac12: int = None # Game
  away_in_sec: int = None # Game
  away_in_sunbelt: int = None # Game
  away_fcs: int = None # Game
  away_time_change: int = None
  # home_coach_name: str # Coach
  # home_coach_tenure: int # Coach, derived
  home_coach_interim: int = None # Coach, derived
  # away_coach_name: str # Coach
  # away_coach_tenure: int # Coach, derived
  away_coach_interim: int = None # Coach, derived
  tenure_delta: int = None # Home coach tenure - away coach tenure
  spread: float = None # GameLine
  spread_open: float = None # GameLine
  home_favorite: int = None
  temperature: float = None # GameWeather
  dew_point: float = None # GameWeather
  humidity: float = None # GameWeather
  precipitation: float = None # GameWeather
  snowfall: float = None # GameWeather
  wind_speed: float = None # GameWeather
  weather_condition: int = None # GameWeather
  weather_condition_str: str = None # GameWeather
  of_interest: int = 0 # Created and set to whether the game is included in the pick em pool
  # result: int # Game, derived
  covered: int = None
  start_date: datetime = None # Game
  home_pregame_elo: int = None # Game
  replaced_home_elo: int = 0 # Computed
  away_pregame_elo: int = None # Game
  replaced_away_elo: int = 0 # Computed
  over_under: float = None # GameLine
  over_under_open: float = None # GameLine
  home_moneyline: float = None # GameLine
  away_moneyline: float  = None # GameLine
  game_indoors: int = None # GameWeather 
  wind_dir: float =  None # GameWeather
  atm_pressure: float = None # GameWeather


  def to_dict(self):
    return asdict(self)

**CoachLite**

Store a small version of the coach object that contains some computed fields to handle cases where coaches were fired midseason

In [None]:
@dataclass
class CoachLite:
  name: str
  tenure: int
  fired: bool
  interim: bool
  last_week_coached: int
  season: int
  season_games_coached: int

  def __init__(self, name="", tenure=0, fired=False, interim=False,
               last_week_coached=None, season=None, season_games_coached=-1):
    self.name = name
    self.tenure = tenure
    self.fired = fired
    self.interim=interim
    self.last_week_coached=last_week_coached
    self.season = season
    self.season_games_coached=season_games_coached

  def to_dict(self):
    return asdict(self)

**VenueLite**

Store a small version of the venue that stores the UTC offset as the timezone. The default value is -5 (i.e. Eastern Time) because of the concentration of colleges on the East Coast. Also store the zip code to allow for replacing missing weather data with weather data for nearby games.

In [None]:
@dataclass
class VenueLite:
    id: int = 0
    name: str = ""
    city: str = ""
    state: str = ""
    zip: str = ""
    timezone: int = -5

    def __init__(self, id: int, name: str, city: str, state: str):
        self.id = id
        self.name = name
        self.city = city
        self.state = state
    
    def to_dict(self):
        return asdict(self)

# Utility

API configurations

In [None]:
# Free API key available at collegefootballdata.com
API_KEY = getpass("Enter API Key")
HEADERS = {"Authorization": f"Bearer {API_KEY}"}

configuration = cfbd.Configuration(
    access_token = API_KEY
)

In [None]:
# Store all the teams
teams_dict = defaultdict(dict)
# Not used right now
games_dict = defaultdict(dict)
# Store all the coaches with key as a tuple (Team, Year)
coaches_dict = defaultdict(list)
# Store the weather where the key is the game ID
weather_dict = defaultdict(dict)
# Store the lines
lines_dict = defaultdict(GameLine)
# Store the venues dictionary where the key is the venue ID
venues_dict = defaultdict(VenueLite)
# Store venues with zip code as a key. Use this to replace weather with weather at nearby games when possible
venue_zip_dict = {}
# Use the zip code database to normalize the representation of game locations
zcdb = ZipCodeDatabase()

In [None]:
def success_msg(year, type):
    print(" ".join([str(year), type, "retrieved successfully"]))
    return

# Retrieve Data

# Games

**Get 2025 Games**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    api_instance = cfbd.GamesApi(api_client)
    year = 2025

    try:
        games_2025 = api_instance.get_games(year=year, season_type="regular")
        success_msg(year, "games")
    except Exception as e:
        print("Exception when calling GamesApi->get_games: %s\n" % e)

**Get 2025 Subset**

Get the games that have already occurred in 2025, as well as the games for the current week

In [None]:
def get_curr_year_subset(games: list[Game], from_week: int):
  return [game for game in games if game.week <= CURR_WEEK and game.week >= from_week]

In [None]:
games_2025_subset = get_curr_year_subset(games_2025, 0)

**Set "Of Interest" Games**

The concept is to set a special flag for games that you're most interested in checking against results. The below code is just an example of looking for the games I was interested in during week 4.

In [None]:
# From week 4, to update process
home_teams = set({"Rutgers", "Clemson", "Wisconsin", "TCU", "Utah", "Nebraska",
                  "Oklahoma", "UCF", "Duke", "Kansas", "Miami", "Indiana",
                  "Washington State", "Baylor", "USC"})
bet_games = [game for game in games_2025_subset if game.home_team in home_teams and game.week == CURR_WEEK]
for game in bet_games:
  # print(game.home_team, game.away_team, game.id)
  print(game.id)
  print(game.home_team)
  print(game.away_team)
  print()

**Get 2024 Games**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    api_instance = cfbd.GamesApi(api_client)
    year = 2024

    try:
        games_2024 = api_instance.get_games(year=year, season_type="regular")
        success_msg(year, "games")
    except Exception as e:
        print("Exception when calling GamesApi->get_games: %s\n" % e)

**Get 2023 Games**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    api_instance = cfbd.GamesApi(api_client)
    year = 2023

    try:
        games_2023 = api_instance.get_games(year=year, season_type="regular")
        success_msg(year, "games")
    except Exception as e:
        print("Exception when calling GamesApi->get_games: %s\n" % e)

**2022 Games**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    api_instance = cfbd.GamesApi(api_client)
    year = 2022

    try:
        games_2022 = api_instance.get_games(year=year, season_type="regular")
        success_msg(year, "games")
    except Exception as e:
        print("Exception when calling GamesApi->get_games: %s\n" % e)

**2021 Games**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    api_instance = cfbd.GamesApi(api_client)
    year = 2021

    try:
        games_2021 = api_instance.get_games(year=year, season_type="regular")
        success_msg(year, "games")
    except Exception as e:
        print("Exception when calling GamesApi->get_games: %s\n" % e)

**2020 Games**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    api_instance = cfbd.GamesApi(api_client)
    year = 2020

    try:
        games_2020 = api_instance.get_games(year=year, season_type="regular")
        success_msg(year, "games")
    except Exception as e:
        print("Exception when calling GamesApi->get_games: %s\n" % e)

# Coaches

**Get 2025 Coaches**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    api_instance = cfbd.CoachesApi(api_client)
    year = 2025

    try:
        coaches_2025 = api_instance.get_coaches(year=year)
        success_msg(year, "coaches")
    except Exception as e:
        print("Exception when calling CoachesApi->get_coaches: %s\n" % e)

**Get 2024 Coaches**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    api_instance = cfbd.CoachesApi(api_client)
    year = 2024

    try:
        coaches_2024 = api_instance.get_coaches(year=year)
        success_msg(year, "coaches")
    except Exception as e:
        print("Exception when calling CoachesApi->get_coaches: %s\n" % e)

**2023 Coaches**

In [None]:

with cfbd.ApiClient(configuration) as api_client:
    api_instance = cfbd.CoachesApi(api_client)
    year = 2023

    try:
        coaches_2023 = api_instance.get_coaches(year=year)
        success_msg(year, "coaches")
    except Exception as e:
        print("Exception when calling CoachesApi->get_coaches: %s\n" % e)

**2022 Coaches**


In [None]:
with cfbd.ApiClient(configuration) as api_client:
    api_instance = cfbd.CoachesApi(api_client)
    year = 2022

    try:
        coaches_2022 = api_instance.get_coaches(year=year)
        success_msg(year, "coaches")
    except Exception as e:
        print("Exception when calling CoachesApi->get_coaches: %s\n" % e)

**2021 Coaches**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    api_instance = cfbd.CoachesApi(api_client)
    year = 2021

    try:
        coaches_2021 = api_instance.get_coaches(year=year)
        success_msg(year, "coaches")
    except Exception as e:
        print("Exception when calling CoachesApi->get_coaches: %s\n" % e)

**2020 Coaches**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    api_instance = cfbd.CoachesApi(api_client)
    year = 2020

    try:
        coaches_2020 = api_instance.get_coaches(year=year)
        success_msg(year, "coaches")
    except Exception as e:
        print("Exception when calling CoachesApi->get_coaches: %s\n" % e)

**Build Coaches Map**

Function to create the map of coaches that accounts for mid-year coaching changes. The assumption is that there will only be 1 interim coach per season.

In [None]:
def update_coaches_dict(coaches: list[Coach], curr_year: int, games_response: list[Game]):

  def calculate_tenure(hire_date: datetime):
    # Calculate tenure to be the number of full seasons a coach has been HC
    if interim:
      return 0
    reduce_tenure = hire_date.month > 8
    return curr_year - (hire_date.year + reduce_tenure)

  def calculate_coaching_change():
    try:
      existing_coach = coaches_dict[(school, curr_year)][0]
      # To handle a weird case in 2021 data with Utah St
      if coach_to_add.name == existing_coach.name:
        return
      if coach_to_add.interim:
        interim = coach_to_add
        fired = existing_coach
      else:
        interim = existing_coach
        fired = coach_to_add
      fired.fired = True
      team_games = sorted([game.week for game in games_response if game.home_team == school or game.away_team == school])
      # Set the last week the fired coach was HC for proper game record data
      fired.last_week_coached = team_games[fired.season_games_coached - 1]
      coaches_dict[(school, curr_year)] = [fired, interim]
    except:
      print(f"Exception occurred in {school}, {curr_year}")
    finally:
      return

  for coach in coaches:
    season = coach.seasons[0]
    school = season.school
    name = " ".join([coach.first_name, coach.last_name])
    interim = coach.hire_date is None
    tenure = calculate_tenure(coach.hire_date)
    games_coached = season.games
    coach_to_add = CoachLite(name = name, season = curr_year, interim=interim,
                             tenure=tenure, season_games_coached=games_coached)
    if (school, curr_year) in coaches_dict:
      calculate_coaching_change()
    else:
      coaches_dict[(school, curr_year)].append(coach_to_add)

In [None]:
coaches_dict = defaultdict(list)
coaches = [coaches_2025, coaches_2024, coaches_2023, coaches_2022, coaches_2021, coaches_2020]
years = [2025, 2024, 2023, 2022, 2021, 2020]
games = [games_2025_subset, games_2024, games_2023, games_2022, games_2021, games_2020]

update_coaches_dict(coaches[0], years[0], games[0])
update_coaches_dict(coaches[1], years[1], games[1])
update_coaches_dict(coaches[2], years[2], games[2])
update_coaches_dict(coaches[3], years[3], games[3])
update_coaches_dict(coaches[4], years[4], games[4])


# Lines

**Get Lines 2025**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    api_instance = cfbd.BettingApi(api_client)
    season_type = "regular"
    year = 2025

    try:
        lines_2025 = api_instance.get_lines(year=year, season_type=season_type)
        success_msg(year, "lines")
    except Exception as e:
        print("Exception when calling BettingApi->get_lines: %s\n" % e)

**Get Spreads 2024**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    api_instance = cfbd.BettingApi(api_client)
    season_type = "regular"
    year = 2024

    try:
        lines_2024 = api_instance.get_lines(year=year, season_type=season_type)
        success_msg(year, "lines")
    except Exception as e:
        print("Exception when calling BettingApi->get_lines: %s\n" % e)

**Get Spreads 2023**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    api_instance = cfbd.BettingApi(api_client)
    year = 2023
    season_type = "regular"

    try:
        lines_2023 = api_instance.get_lines(year=year, season_type=season_type)
        success_msg(year, "lines")
    except Exception as e:
        print("Exception when calling BettingApi->get_lines: %s\n" % e)

**Get Lines 2022**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    api_instance = cfbd.BettingApi(api_client)
    year = 2022
    season_type = "regular"
    try:
        lines_2022 = api_instance.get_lines(year=year, season_type=season_type)
        success_msg(year, "lines")
    except Exception as e:
        print("Exception when calling BettingApi->get_lines: %s\n" % e)

**Get Lines 2021**


In [None]:
with cfbd.ApiClient(configuration) as api_client:
    api_instance = cfbd.BettingApi(api_client)
    year = 2021
    season_type = "regular"

    try:
        lines_2021 = api_instance.get_lines(year=year, season_type=season_type)
        success_msg(year, "lines")
    except Exception as e:
        print("Exception when calling BettingApi->get_lines: %s\n" % e)

**Get Lines 2020**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    api_instance = cfbd.BettingApi(api_client)
    year = 2020
    season_type = "regular"

    try:
        lines_2020 = api_instance.get_lines(year=year, season_type=season_type)
        success_msg(year, "lines")
    except Exception as e:
        print("Exception when calling BettingApi->get_lines: %s\n" % e)

**Build Lines Map**

The lines available were not consistent across years. The priorities are the mainstays: 1) consensus, 2) Bovada

If those two aren't available, we'll take the first line in the list.

In [None]:
LINE_PREFERENCE = ["consensus", "Bovada"]
def update_line_dict(betting_games: list[BettingGame]):
  for game in betting_games:
    book_line_to_use = None
    if not game.lines:
      continue
    for book_line in game.lines:
      if book_line.provider == "consensus":
        book_line_to_use = book_line
        break
      elif book_line.provider == "Bovada":
        book_line_to_use = book_line
    if book_line_to_use == None:
      book_line_to_use = game.lines[0]
    lines_dict[game.id] = book_line_to_use


In [None]:
all_lines = [lines_2020, lines_2021, lines_2022, lines_2023, lines_2024, lines_2025]
for lines_for_year in all_lines:
  update_line_dict(lines_for_year)


# Venues

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    api_instance = cfbd.VenuesApi(api_client)

    try:
        venues_all = api_instance.get_venues()
        success_msg("all", "venues")
    except Exception as e:
        print("Exception when calling VenuesApi->get_venues: %s\n" % e)

**Venues Dict**

Map venue IDs to full venue details

In [None]:
error_set = []
venues_dict = defaultdict(VenueLite)
for venue in venues_all:
    curr = VenueLite(id=venue.id, name=venue.name, city=venue.city, state=venue.state)
    found_zip = False
    if venue.zip:
        try:
            lookup = zcdb[venue.zip]
            curr.timezone = lookup.timezone
            curr.zip = lookup.zip
            found_zip = True
        except:
            error_set.append(venue.id)
    if not found_zip:
        zc = zcdb.find_zip(city=curr.city, state=curr.state)
        if zc:
            to_use = zc[0]
            curr.zip = to_use.zip
            curr.timezone = to_use.timezone
        else:
            curr.timezone = -5
    venues_dict[venue.id] = curr

**Venues Dict by Zip Code**

Look up venues by zip code to allow for replacing weather details with weather details from nearby games.

In [None]:
# Venues dict by zip code
for venue in venues_dict.values():
    if venue.zip in venue_zip_dict:
        venue_zip_dict[venue.zip].append(venue)
    else:
        venue_zip_dict[venue.zip] = [venue]

# Weather

**Get Weather 2025**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    weather_api = cfbd.GamesApi(api_client)
    year = 2025
    season_type = "regular"
    try:
        weather_2025 = weather_api.get_weather(year=year, season_type=season_type)
        success_msg(2025, "weather")
    except Exception as e:
        print("Exception when calling GamesApi->get_weather: %s\n" % e)

**Get Weather 2024**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    # Create an instance of the API class
    weather_api = cfbd.GamesApi(api_client)
    year = 2024
    season_type = "regular"
    try:
        weather_2024 = weather_api.get_weather(year=year, season_type=season_type)
        success_msg(2024, "weather")
    except Exception as e:
        print("Exception when calling GamesApi->get_weather: %s\n" % e)

**Weather 2023**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    weather_api = cfbd.GamesApi(api_client)
    year = 2023
    season_type = "regular"
    try:
        weather_2023 = weather_api.get_weather(year=year, season_type=season_type, classification="fbs")
        success_msg(2023, "weather")
    except Exception as e:
        print("Exception when calling GamesApi->get_weather: %s\n" % e)

**Weather 2022**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    weather_api = cfbd.GamesApi(api_client)
    year = 2022
    season_type = "regular"
    try:
        weather_2022 = weather_api.get_weather(year=year, season_type=season_type, classification="fbs")
        success_msg(2022, "weather")
    except Exception as e:
        print("Exception when calling GamesApi->get_weather: %s\n" % e)

**Weather 2021**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    weather_api = cfbd.GamesApi(api_client)
    year = 2021
    season_type = "regular"
    try:
        weather_2021 = weather_api.get_weather(year=year, season_type=season_type, classification="fbs")
        success_msg(2021, "weather")
    except Exception as e:
        print("Exception when calling GamesApi->get_weather: %s\n" % e)

**Weather 2020**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    weather_api = cfbd.GamesApi(api_client)
    year = 2020
    season_type = "regular"
    try:
        weather_2020 = weather_api.get_weather(year=year, season_type=season_type, classification="fbs")
        success_msg(2020, "weather")
    except Exception as e:
        print("Exception when calling GamesApi->get_weather: %s\n" % e)

In [None]:
def update_weather_map(weather_observations: list[GameWeather]):
  for observation in weather_observations:
    weather_dict[observation.id] = observation

In [None]:
weather_dict = defaultdict(GameWeather)
weather_years = [weather_2025, weather_2024, weather_2023, weather_2022, weather_2021, weather_2020]
for year in weather_years:
  update_weather_map(year)


In [None]:
# Replace Weather with a nearby game
def replace_weather(venue: VenueLite, week: int, start_date: datetime, games_list: list[Game]):
    year = start_date.year
    day = start_date.day
    month = start_date.month
    radii = [5, 15, 30, 50]
    for radius in radii:
        nearby = zcdb.get_zipcodes_around_radius(venue.zip, radius)
        zip_cds = [] if not nearby else [nearby.zip for nearby in nearby]
        for zip_cd in zip_cds:
            if zip_cd in venue_zip_dict:
                venues = venue_zip_dict[zip_cd]
                for close_venue in venues:
                    if close_venue.id != venue.id:
                        game_id = [game.id for game in games_list if game.venue_id == close_venue.id and game.start_date.year == year and game.start_date.day == day and game.start_date.month == month]
                        candidate = game_id[0] if game_id else None
                        if candidate:
                            return weather_dict[candidate]
    
    return None

# Get Teams

Retrieve all teams each season

**2025**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    # Create an instance of the API class
    api_instance = cfbd.TeamsApi(api_client)
    year = 2025 # int | Year or season (optional)

    try:
        all_teams_2025 = api_instance.get_teams(year=year)
        success_msg(2025, "all teams")
    except Exception as e:
        print("Exception when calling TeamsApi->get_fbs_teams: %s\n" % e)

**2024**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    # Create an instance of the API class
    api_instance = cfbd.TeamsApi(api_client)
    year = 2024 # int | Year or season (optional)

    try:
        all_teams_2024 = api_instance.get_teams(year=year)
        success_msg(2024, "all teams")
    except Exception as e:
        print("Exception when calling TeamsApi->get_fbs_teams: %s\n" % e)

**2023**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    # Create an instance of the API class
    api_instance = cfbd.TeamsApi(api_client)
    year = 2023 # int | Year or season (optional)

    try:
        all_teams_2023 = api_instance.get_teams(year=year)
        success_msg(2023, "all teams")
    except Exception as e:
        print("Exception when calling TeamsApi->get_fbs_teams: %s\n" % e)

**2022**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    # Create an instance of the API class
    api_instance = cfbd.TeamsApi(api_client)
    year = 2022 # int | Year or season (optional)

    try:
        all_teams_2022 = api_instance.get_teams(year=year)
        success_msg(2022, "all teams")
    except Exception as e:
        print("Exception when calling TeamsApi->get_fbs_teams: %s\n" % e)

**2021**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    # Create an instance of the API class
    api_instance = cfbd.TeamsApi(api_client)
    year = 2021 # int | Year or season (optional)

    try:
        all_teams_2021 = api_instance.get_teams(year=year)
        success_msg(2021, "all teams")
    except Exception as e:
        print("Exception when calling TeamsApi->get_fbs_teams: %s\n" % e)

**2020**

In [None]:
with cfbd.ApiClient(configuration) as api_client:
    # Create an instance of the API class
    api_instance = cfbd.TeamsApi(api_client)
    year = 2020 # int | Year or season (optional)

    try:
        all_teams_2020 = api_instance.get_teams(year=year)
        success_msg(2020, "all teams")
    except Exception as e:
        print("Exception when calling TeamsApi->get_fbs_teams: %s\n" % e)

**Teams Lite Class**

In [None]:
class TeamLite:
    id: int = -1
    name: str = ""
    timezone: int = 0

    def to_str(self):
        return f"id: {self.id}, name: {self.name}, timezone: {self.timezone}"

**Build TeamLite**

In [None]:
def build_team_lite(team: Team):
    home_venue = team.location
    curr = TeamLite()
    curr.id = team.id
    curr.name = team.school
    try:
        curr.timezone = venues_dict[home_venue.id].timezone
    except:
        curr.timezone = -5

    return curr

**Build Teams Dict**

In [None]:
teams_dict = defaultdict(TeamLite)

In [None]:
all_teams = all_teams_2025 + all_teams_2024 + all_teams_2023 + all_teams_2022 + all_teams_2021 + all_teams_2020
for team in all_teams:
    curr = build_team_lite(team)
    teams_dict[team.id] = curr

# Build the full dataset

In [None]:
game_records_dataset = []
problem_set = []

In [None]:
def update_game_records(games: list[Game]):
  def assign_coach(coaches_to_consider):
    if coaches_to_consider == []:
      return [CoachLite()]
    fired = coaches_to_consider[0]
    interim = coaches_to_consider[1]
    if week > fired.last_week_coached:
      return [interim]
    else:
      return [fired]

  def covered():
    if game.home_points == None or game.away_points == None:
      return None
    try:
      point_delta = game.away_points - game.home_points
      if spread < 0:
        return point_delta < spread
      else:
        return point_delta > spread
    except:
      print(f"Exception occurred on {game.id} where week is {game.week} and season is {year}")
  for game in games:
    try:
      curr = GameRecord()
      curr.id = game.id
      # Fields that will be used throughout construction
      game_id = game.id
      year = game.season
      week = game.week
      home = game.home_team
      away = game.away_team
      game_venue = venues_dict[game.venue_id]
      home_team_lite = teams_dict[game.home_id]
      home_timezone = home_team_lite.timezone
      away_team_lite = teams_dict[game.away_id]
      away_timezone = away_team_lite.timezone
      if game_id not in lines_dict or lines_dict[game_id].spread == None:
        continue
      # print(home, away, game_id)
      # Fields from the Game response
      curr.season = game.season
      curr.week = week
      curr.start_date = game.start_date
      curr.neutral = int(game.neutral_site)
      curr.conference_game = int(game.conference_game)
      curr.venue = game.venue
      curr.venue_id = game.venue_id
      curr.home_team = game.home_team
      curr.home_team_id = game.home_id
      curr.home_conference = game.home_conference
      curr.home_points = game.home_points
      curr.home_in_acc = int(game.home_conference == "ACC")
      curr.home_in_aac = int(game.home_conference == "American Athletic")
      curr.home_in_big12 = int(game.home_conference == "Big 12")
      curr.home_in_big10 = int(game.home_conference == "Big Ten")
      curr.home_in_cusa = int(game.home_conference == "Conference USA")
      curr.home_independent = int(game.home_conference == "FBS Independents")
      curr.home_in_mac = int(game.home_conference == "Mid-American")
      curr.home_in_mwc = int(game.home_conference == "Mountain West")
      curr.home_in_pac12 = int(game.home_conference == "Pac-12")
      curr.home_in_sec = int(game.home_conference == "SEC")
      curr.home_in_sunbelt = int(game.home_conference == "Sun Belt")
      curr.home_fcs = int(game.home_conference not in CONFERENCES)
      # Subtract the timezone of the home team from the timezone of the home venue.
      # E.g. If Ohio State were playing a neutral game in Dallas, the value would be -1
      curr.home_time_change = 0 if not curr.neutral else game_venue.timezone - home_timezone
      curr.home_pregame_elo = game.home_pregame_elo
      curr.away_team = game.away_team
      curr.away_team_id = game.away_id
      curr.away_conference = game.away_conference
      curr.away_points = game.away_points
      curr.away_in_acc = int(game.away_conference == "ACC")
      curr.away_in_aac = int(game.away_conference == "American Athletic")
      curr.away_in_big12 = int(game.away_conference == "Big 12")
      curr.away_in_big10 = int(game.away_conference == "Big Ten")
      curr.away_in_cusa = int(game.away_conference == "Conference USA")
      curr.away_independent = int(game.away_conference == "FBS Independents")
      curr.away_in_mac = int(game.away_conference == "Mid-American")
      curr.away_in_mwc = int(game.away_conference == "Mountain West")
      curr.away_in_pac12 = int(game.away_conference == "Pac-12")
      curr.away_in_sec = int(game.away_conference == "SEC")
      curr.away_in_sunbelt = int(game.away_conference == "Sun Belt")
      curr.away_fcs = int(game.away_conference not in CONFERENCES)
      # E.g. Oregon @ Ohio State would give a value of 3
      curr.away_time_change = game_venue.timezone - away_timezone
      curr.away_pregame_elo = game.away_pregame_elo

      # Fields from the coaches dict
      home_coach = coaches_dict[(home, year)]
      if len(home_coach) != 1:
        home_coach = assign_coach(home_coach)
      away_coach = coaches_dict[(away, year)]
      if len(away_coach) != 1:
        away_coach = assign_coach(away_coach)
      home_coach = home_coach[0]
      away_coach = away_coach[0]
      # curr.home_coach_name = home_coach.name
      # curr.home_coach_tenure = home_coach.tenure
      curr.tenure_delta = home_coach.tenure - away_coach.tenure
      curr.home_coach_interim = int(home_coach.interim)
      # curr.away_coach_name = away_coach.name
      # curr.away_coach_tenure = away_coach.tenure
      curr.away_coach_interim = int(away_coach.interim)

      # Fields from the lines dict
      line = lines_dict[game_id]
      spread = line.spread
      curr.spread = spread
      curr.spread_open = line.spread_open
      curr.home_favorite = int(spread < 0)
      curr.over_under = line.over_under
      curr.over_under_open = line.over_under_open
      curr.home_moneyline = line.home_moneyline
      curr.away_moneyline = line.away_moneyline

      # Fields from the weather dict
      if game_id in weather_dict:
        game_weather = weather_dict[game_id]
        curr.game_indoors = int(game_weather.game_indoors)
        curr.temperature = game_weather.temperature
        curr.dew_point = game_weather.dew_point
        curr.humidity = game_weather.humidity
        curr.precipitation = game_weather.precipitation
        curr.snowfall = game_weather.snowfall
        curr.wind_dir = game_weather.wind_direction
        curr.wind_speed = game_weather.wind_speed
        curr.atm_pressure = game_weather.pressure
        curr.weather_condition = game_weather.weather_condition_code
        curr.weather_condition_str = game_weather.weather_condition

      else:
        new_weather = replace_weather(venue=game_venue, week=game.week, start_date=game.start_date, games_list=games)
        if new_weather != None:
          # Assume game indoors to be false, we can't glean that from the weather at a nearby game
          curr.game_indoors = 0
          curr.temperature = new_weather.temperature
          curr.dew_point = new_weather.dew_point
          curr.humidity = new_weather.humidity
          curr.precipitation = new_weather.precipitation
          curr.snowfall = new_weather.snowfall
          curr.wind_dir = new_weather.wind_direction
          curr.wind_speed = new_weather.wind_speed
          curr.atm_pressure = new_weather.pressure
          curr.weather_condition = new_weather.weather_condition_code
          curr.weather_condition_str = game_weather.weather_condition
          
          
      
      covered_spread = covered()
      curr.covered = int(covered_spread) if covered_spread != None else None


      game_records_dataset.append(curr)
    except Exception as e:
      print(e)
      problem_set.append(game)

In [None]:
all_games = games_2025_subset + games_2024 + games_2023 + games_2022 + games_2021 + games_2020
all_fbs_games = [game for game in all_games if game.home_classification == "fbs"]
# len(all_fbs_games)

In [None]:
game_records_dataset = []
problem_set = []
update_game_records(all_fbs_games)

In [None]:
# Sanity Checks
print(len(game_records_dataset), len(problem_set))

for game in problem_set:
    print(game.season, game.week)

# Post Processing Updates

In [None]:
from collections import Counter

Replace missing ELO values, and subsequently drop any rows that still don't have both home and away ELO. It seems like these are typically week 1 games against FCS opponents.

In [None]:
def replace_missing_elo(game):
    def replace_elo(team, conference):
        if week == 1:
            conf_home_games = [game for game in game_records_dataset if game.season == season and game.week == 1 and game.home_conference == conference and game.id != curr_id]
            conf_away_games = [game for game in game_records_dataset if game.season == season and game.week == 1 and game.away_conference == conference and game.id != curr_id]
            home_elo = [game.home_pregame_elo for game in conf_home_games]
            away_elo = [game.away_pregame_elo for game in conf_away_games]
            elo_arr = home_elo + away_elo
            elo_arr_upd = [record for record in elo_arr if record != None]
            if len(elo_arr_upd) > 5:
                return np.mean(elo_arr_upd)
            else:
                return None
            # return np.mean(conf_games)
        else:
            prev = [game for game in game_records_dataset if game.season == season and game.week == week -1 and game.home_team == team or game.away_team == team]
            last_game = prev[0]
            if last_game.away_team == team:
                return last_game.away_pregame_elo
            else:
                return last_game.home_pregame_elo
    curr_id = game.id
    week = game.week
    season = game.season
    if game.home_pregame_elo == None:
        game.home_pregame_elo = replace_elo(game.home_team, game.home_conference)
        game.replaced_home_elo = 1
    if game.away_pregame_elo == None:
        game.away_pregame_elo = replace_elo(game.away_team, game.away_conference)
        game.replaced_away_elo = 1
    
    return

In [None]:
no_away_elo = [game for game in game_records_dataset if game.away_pregame_elo == None]
no_home_elo = [game for game in game_records_dataset if game.home_pregame_elo == None]
pre_update = len(no_away_elo) + len(no_home_elo)
print(f"Before updates, there are {pre_update} games missing at least one ELO value")
for game in no_away_elo:
    replace_missing_elo(game)

for game in no_home_elo:
    replace_missing_elo(game)

no_away_elo = [game for game in game_records_dataset if game.away_pregame_elo == None]
no_home_elo = [game for game in game_records_dataset if game.home_pregame_elo == None]
post_update = len(no_away_elo) + len(no_home_elo)
print(f"After updates, there are {post_update} games missing at least one ELO value")


Replace missing weather conditions with the mean or the mode, as appropriate. If the indoors flag is missing, we set it to false.

To-do later: Try using the mean and mode at the particular venue as the first priority, when available

In [None]:
weather_conditions = {}
populated_temps = [game.temperature for game in game_records_dataset if game.temperature != None]
MEAN_TEMPERATURE = round(np.mean(populated_temps), 1)
populated_dew_pts = [game.dew_point for game in game_records_dataset if game.dew_point != None]
MEAN_DEW_PT = round(np.mean(populated_dew_pts), 1)
populated_humidity = [game.humidity for game in game_records_dataset if game.humidity != None]
MEAN_HUMIDITY = round(np.mean(populated_humidity), 1)
populated_precipitation = [game.precipitation for game in game_records_dataset if game.precipitation != None]
MODE_PRECIPITATION = Counter(populated_precipitation).most_common(1)[0][0]
populated_windspeed = [game.wind_speed for game in game_records_dataset if game.wind_speed != None]
MEAN_WINDSPEED = round(np.mean(populated_windspeed), 1)
populated_weather_condition = [[game.weather_condition, game.weather_condition_str] for game in game_records_dataset if game.weather_condition != None]
for observation in populated_weather_condition:
    if observation[0] in weather_conditions:
        continue
    else:
        weather_conditions[observation[0]] = observation[1]
weather_condition_ints = [observation[0] for observation in populated_weather_condition]
MODE_WEATHER_CONDITION = Counter(weather_condition_ints).most_common(1)[0][0]
print(f"Populate missing weather conditions with {MODE_WEATHER_CONDITION}: {weather_conditions[MODE_WEATHER_CONDITION]}")
populated_wind_dir = [game.wind_dir for game in game_records_dataset if game.wind_dir != None]
MEAN_WIND_DIR = round(np.mean(populated_wind_dir), 1)
populated_atm_pressure = [game.atm_pressure for game in game_records_dataset if game.atm_pressure != None]
MEAN_ATM_PRESSURE = round(np.mean(populated_atm_pressure), 1)


for game in game_records_dataset:
    if game.temperature == None:
        game.temperature = MEAN_TEMPERATURE
    if game.dew_point == None:
        game.dew_point = MEAN_DEW_PT
    if game.humidity == None:
        game.humidity = MEAN_HUMIDITY
    if game.precipitation == None:
        game.precipitation = MODE_PRECIPITATION
    if game.wind_speed == None:
        game.wind_speed = MEAN_WINDSPEED
    if game.weather_condition == None:
        game.weather_condition = MODE_WEATHER_CONDITION
    if game.wind_dir == None:
        game.wind_dir = MEAN_WIND_DIR
    if game.atm_pressure == None:
        game.atm_pressure = MEAN_ATM_PRESSURE
    if game.game_indoors == None:
        game.game_indoors = 0

# print(MEAN_TEMPERATURE, MEAN_DEW_PT, MEAN_DEW_PT, MODE_PRECIPITATION, MEAN_WINDSPEED, MODE_WEATHER_CONDITION, MEAN_WIND_DIR, MEAN_ATM_PRESSURE)

# Drop Bad Rows

In [None]:
# Remove records where there is no Elo. A little more than 1% as of 9/28. Also remove the very small number of records with no over_under, was only 1 when I last checked
df = pd.DataFrame(game_records_dataset)
old_len = len(df)
df = df.dropna(subset=["away_pregame_elo", "home_pregame_elo", "over_under"])
new_len = len(df)
print(old_len, new_len)

# Get Dataset

In [None]:
today = datetime.now()
month = today.month if today.month > 9 else "0" + str(today.month)
date = str(today.day)
today_str = month+date
file_name = "cfb-ats-data-" + today_str + ".csv"

In [None]:
df.to_csv(file_name, index=False)

In [None]:
# Cols to use in the model
cols = ["season", "week", "neutral", "conference_game", "venue_id", "home_team_id", "home_in_acc", "home_in_aac", "home_in_big12", "home_in_big10", "home_in_cusa", "home_independent", "home_in_mac",
 "home_in_mwc", "home_in_pac12", "home_in_sec", "home_in_sunbelt", "home_time_change", "away_team_id", "away_in_acc", "away_in_aac", "away_in_big12", "away_in_big10", "away_in_cusa", "away_independent",
 "away_in_mac", "away_in_mwc", "away_in_pac12", "away_in_sec", "away_in_sunbelt", "away_fcs", "away_time_change", "home_coach_interim", "away_coach_interim", "tenure_delta", "spread",
 "home_favorite", "temperature", "dew_point", "humidity", "precipitation", "wind_speed", "weather_condition", "home_pregame_elo", "replaced_home_elo", "away_pregame_elo", "over_under",
 "game_indoors", "wind_dir", "atm_pressure"]
len(cols)
