In [3]:
import cfbd
from dotenv import load_dotenv
import json
import os
import pandas as pd

# Load environment variables from .env file
load_dotenv()

# Configure API key authorization: ApiKeyAuth
cfbd_config = cfbd.Configuration()
cfbd_config.api_key["Authorization"] = os.getenv("CFBD_API_KEY")
cfbd_config.api_key_prefix["Authorization"] = "Bearer"

teams_api = cfbd.TeamsApi(cfbd.ApiClient(cfbd_config))
games_api = cfbd.GamesApi(cfbd.ApiClient(cfbd_config))

In [6]:
with open("cfb_teams.json", "rb") as file:
    teams = json.load(file)

In [24]:
teams_df = pd.DataFrame(teams)
teams_df

Unnamed: 0,id,school,mascot,abbreviation,alt_name1,alt_name2,alt_name3,conference,division,color,alt_color,logos,twitter,location
0,2005,Air Force,Falcons,AFA,,AFA,Air Force,Mountain West,Mountain,#004a7b,#ffffff,[http://a.espncdn.com/i/teamlogos/ncaa/500/200...,@AF_Football,"{'venue_id': 3713, 'name': 'Falcon Stadium', '..."
1,2006,Akron,Zips,AKR,,AKR,Akron,Mid-American,East,#00285e,#84754e,[http://a.espncdn.com/i/teamlogos/ncaa/500/200...,@ZipsFB,"{'venue_id': 3768, 'name': 'Summa Field at Inf..."
2,333,Alabama,Crimson Tide,ALA,,ALA,Alabama,SEC,West,#690014,#f1f2f3,[http://a.espncdn.com/i/teamlogos/ncaa/500/333...,@AlabamaFTBL,"{'venue_id': 3657, 'name': 'Bryant Denny Stadi..."
3,2026,Appalachian State,Mountaineers,APP,,APP,Appalachian St,Sun Belt,East,#000000,#ffcd00,[http://a.espncdn.com/i/teamlogos/ncaa/500/202...,@AppState_FB,"{'venue_id': 3792, 'name': 'Kidd Brewer Stadiu..."
4,12,Arizona,Wildcats,ARIZ,,ARIZ,Arizona,Pac-12,,#002449,#00205b,[http://a.espncdn.com/i/teamlogos/ncaa/500/12....,@ArizonaFBall,"{'venue_id': 3619, 'name': 'Arizona Stadium', ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126,98,Western Kentucky,Hilltoppers,WKU,,WKU,W Kentucky,Conference USA,,#F32026,#b3b5b8,[http://a.espncdn.com/i/teamlogos/ncaa/500/98....,@WKUFootball,"{'venue_id': 3796, 'name': 'Houchens Industrie..."
127,2711,Western Michigan,Broncos,WMU,,WMU,W Michigan,Mid-American,West,#532e1f,#8b7f79,[http://a.espncdn.com/i/teamlogos/ncaa/500/271...,@WMU_Football,"{'venue_id': 3980, 'name': 'Waldo Stadium', 'c..."
128,277,West Virginia,Mountaineers,WVU,,WVU,West Virginia,Big 12,,#FFC600,#eaaa00,[http://a.espncdn.com/i/teamlogos/ncaa/500/277...,@WVUFootball,"{'venue_id': 3842, 'name': 'Mountaineer Field ..."
129,275,Wisconsin,Badgers,WIS,,WIS,Wisconsin,Big Ten,West,#A00001,#f7f7f7,[http://a.espncdn.com/i/teamlogos/ncaa/500/275...,@BadgerFootball,"{'venue_id': 347, 'name': 'Camp Randall Stadiu..."


In [46]:
fbs_teams = set(teams_df.school)
fbs_teams

{'Air Force',
 'Akron',
 'Alabama',
 'Appalachian State',
 'Arizona',
 'Arizona State',
 'Arkansas',
 'Arkansas State',
 'Army',
 'Auburn',
 'BYU',
 'Ball State',
 'Baylor',
 'Boise State',
 'Boston College',
 'Bowling Green',
 'Buffalo',
 'California',
 'Central Michigan',
 'Charlotte',
 'Cincinnati',
 'Clemson',
 'Coastal Carolina',
 'Colorado',
 'Colorado State',
 'Connecticut',
 'Duke',
 'East Carolina',
 'Eastern Michigan',
 'Florida',
 'Florida Atlantic',
 'Florida International',
 'Florida State',
 'Fresno State',
 'Georgia',
 'Georgia Southern',
 'Georgia State',
 'Georgia Tech',
 "Hawai'i",
 'Houston',
 'Illinois',
 'Indiana',
 'Iowa',
 'Iowa State',
 'James Madison',
 'Kansas',
 'Kansas State',
 'Kent State',
 'Kentucky',
 'LSU',
 'Liberty',
 'Louisiana',
 'Louisiana Monroe',
 'Louisiana Tech',
 'Louisville',
 'Marshall',
 'Maryland',
 'Memphis',
 'Miami',
 'Miami (OH)',
 'Michigan',
 'Michigan State',
 'Middle Tennessee',
 'Minnesota',
 'Mississippi State',
 'Missouri',
 'NC

In [72]:
team_records = games_api.get_team_records(year=2022)
team_records = [record.__dict__ for record in team_records]
team_records = [{**record, **record["_total"].__dict__} for record in team_records]
len(team_records)

131

In [77]:
records_df = pd.DataFrame(team_records)

# Select relevant columns
records_df = records_df[[
    "_year",
    "_team",
    "_games",
    "_wins",
    "_losses"
]]

# Remove _ from column names
records_df = records_df.rename({col: col[1:] for col in records_df.columns}, axis=1)

records_df["win_percentage"] = records_df.wins / records_df.games

records_df

Unnamed: 0,year,team,games,wins,losses,win_percentage
0,2022,Air Force,13,10,3,0.769231
1,2022,Akron,13,2,10,0.153846
2,2022,Alabama,13,11,2,0.846154
3,2022,Appalachian State,12,6,6,0.500000
4,2022,Arizona,12,5,7,0.416667
...,...,...,...,...,...,...
126,2022,Western Kentucky,14,9,5,0.642857
127,2022,Western Michigan,12,5,7,0.416667
128,2022,West Virginia,12,5,7,0.416667
129,2022,Wisconsin,13,7,6,0.538462


In [26]:
roster = teams_api.get_roster(year=2022)
# Convert from Player type objects to normal dicts
roster = [player.__dict__ for player in roster]
len(roster)

30420

In [62]:
df = pd.DataFrame(roster)

# Select relevant columns
df = df[[
    "_id",
    "_first_name",
    "_last_name",
    "_team",
    "_height",
    "_weight",
    "_position"
]]

# Remove _ from column names
df = df.rename({col: col[1:] for col in df.columns}, axis=1)

# Limit to just FBS teams:
df = df[df.team.apply(lambda team: team in fbs_teams)]

# Limit to just players with weights:
df = df[~df.weight.isnull()]

df = df.reset_index(drop=True)
df

Unnamed: 0,id,first_name,last_name,team,height,weight,position
0,19014,Matt,Harmon,Kent State,77.0,254.0,LB
1,102597,Will,Rogers,Mississippi State,74.0,210.0,QB
2,107494,Trey,Sanders,Alabama,72.0,214.0,RB
3,136429,Cedric,Patterson III,Rice,71.0,190.0,WR
4,146583,John,Adams,Temple,74.0,185.0,WR
...,...,...,...,...,...,...,...
15475,5102327,John,Yayi-Bondje,Miami,68.0,175.0,DB
15476,5102330,Jacob,Fitts,Texas A&M,72.0,286.0,OL
15477,5102333,Davian,Sullivan,Clemson,76.0,195.0,WR
15478,5102350,Henry,Belvin IV,Duke,75.0,212.0,QB


## Save Data to CSVs

In [81]:
records_df.to_csv("cfb_team_records_2022.csv", index=False)
df.to_csv("cfb_players_2022.csv", index=False)