In [213]:
import os
import requests
from datetime import datetime

from tqdm import tqdm_notebook as tqdm

import pandas as pd
import numpy as np

In [217]:
class DownloadCollegeFootballData(object):
    """
    Object to download data from college football data api

    """
    base_url = 'https://api.collegefootballdata.com/'

    years = [int(2018 + year) for year in range(int(pd.Timestamp(datetime.now()).year - 2018))]

    endpoints = {
        'games':  'games/',
        'stats': 'games/teams'
    }

    def download_games(self) -> pd.DataFrame:
        """
        Download Game Information (no stats) for a year
        """
        df = []
        for year in (self.years):
            print(year)
            # Return data
            r = requests.get(self.base_url + self.endpoints['games'], params={'year': year})

            # Convert to dataframe
            df_year = pd.DataFrame.from_records(r.json())

            # Only retain useful fields, rename as necessary
            df_year = df_year[
                ['id', 'season', 'week', 'season_type', 'home_team', 'away_team', 'home_points', 'away_points']
            ].rename(columns={'id': 'game_id'})

            # Gather
            df.append(df_year)
        df = pd.concat(df).reset_index(drop=True)

        return df

    def download_stats(self, df_games: pd.DataFrame = None) -> pd.DataFrame:
        """
        Download stats for each game
        """
        if df_games is None:
            df_games = self.download_games()
        assert 'game_id' in df_games.columns
        print('stats')
        df_stats, not_working = [], {}
        for game_id in set(df_games['game_id']):
            params = {'gameId': game_id}
            r = requests.get(self.base_url + self.endpoints['stats'], params=params)
            if r.status_code != 200:
                continue
            try: 
                stats_by_team = r.json()[0]['teams']
                df_game = []
                for team in stats_by_team:
                    df_game_long = pd.DataFrame.from_records(team['stats'])
                    df_game_long['category'] = team['homeAway'] + '_' + df_game_long['category']
                    df_game.append(df_game_long)
                df_game = pd.concat(df_game)
                # Pivot
                df_game = pd.pivot_table(df_game, columns='category', values='stat', aggfunc='first').reset_index(drop=True)
                df_stats.append(df_game.assign(game_id=game_id))
            except:
                not_working[game_id] = r.json()
        df_stats = pd.concat(df_stats).reset_index(drop=True)
        
        return df_stats, not_working

In [218]:
downloader = DownloadCollegeFootballData()

In [219]:
df_stats, didnt_work = downloader.download_stats()

2018
2019
stats


In [220]:
df_stats.shape

(1693, 71)

In [221]:
len(didnt_work)

1

In [222]:
didnt_work

{401117539: []}

In [223]:
df_stats.head()

Unnamed: 0,away_completionAttempts,away_defensiveTDs,away_firstDowns,away_fourthDownEff,away_fumblesLost,away_fumblesRecovered,away_interceptions,away_kickReturnTDs,away_kickReturnYards,away_kickReturns,away_kickingPoints,away_netPassingYards,away_passesDeflected,away_passingTDs,away_possessionTime,away_qbHurries,away_rushingAttempts,away_rushingTDs,away_rushingYards,away_sacks,away_tackles,away_tacklesForLoss,away_thirdDownEff,away_totalPenaltiesYards,away_totalYards,away_turnovers,away_yardsPerPass,away_yardsPerRushAttempt,home_completionAttempts,home_defensiveTDs,home_firstDowns,home_fourthDownEff,home_fumblesLost,home_fumblesRecovered,home_interceptions,home_kickReturnTDs,home_kickReturnYards,home_kickReturns,home_kickingPoints,home_netPassingYards,home_passesDeflected,home_passingTDs,home_possessionTime,home_puntReturnTDs,home_puntReturnYards,home_puntReturns,home_qbHurries,home_rushingAttempts,home_rushingTDs,home_rushingYards,home_sacks,home_tackles,home_tacklesForLoss,home_thirdDownEff,home_totalFumbles,home_totalPenaltiesYards,home_totalYards,home_turnovers,home_yardsPerPass,home_yardsPerRushAttempt,game_id,away_totalFumbles,home_interceptionTDs,home_interceptionYards,home_passesIntercepted,away_interceptionTDs,away_interceptionYards,away_passesIntercepted,away_puntReturnTDs,away_puntReturnYards,away_puntReturns
0,16-29,0,18,1-2,1,0,0,0,31,3,4,244,5,3,20:21,0,28,1,140,0,47,5,2-11,6-60,384,1,8.4,5.0,12-24,0,22,1-1,0,1,0,0,170,6,11,87,4,0,39:39,0.0,-1.0,1.0,2,50,3,310,3,33,5,4-14,0,7-54,397,0,3.6,6.2,401014972,,,,,,,,,,
1,22-47,0,12,0-5,0,1,1,0,87,4,4,296,2,1,22:33,1,16,0,-9,3,33,8,3-15,6-30,287,1,6.3,-0.6,13-20,1,20,0-0,1,0,0,0,43,3,10,267,8,1,37:27,0.0,20.0,2.0,7,46,2,226,4,25,4,7-15,1,4-45,493,1,13.4,4.9,401014973,2.0,1.0,26.0,1.0,,,,,,
2,19-29,0,22,2-3,0,2,3,0,57,3,6,209,2,2,37:21,2,48,2,231,3,37,5,4-14,6-55,440,3,7.2,4.8,11-23,0,15,4-5,2,0,2,0,120,5,2,91,2,1,22:39,,,,1,43,3,217,3,42,6,3-13,3,7-82,308,4,4.0,5.0,401014974,2.0,0.0,1.0,3.0,0.0,28.0,2.0,,,
3,24-37,0,24,0-0,0,0,0,0,33,2,5,272,5,2,31:48,4,38,3,171,1,36,2,6-14,7-62,443,0,7.4,4.5,25-46,0,23,2-5,0,0,0,0,50,2,4,357,2,3,28:12,0.0,15.0,2.0,3,27,1,87,2,44,5,5-15,1,8-71,444,0,7.8,3.2,401014975,,,,,,,,0.0,50.0,2.0
4,12-24,0,9,0-0,0,0,1,0,66,3,1,252,3,2,15:47,2,19,1,4,2,57,14,2-10,4-40,256,1,10.5,0.2,41-51,2,34,2-3,0,0,0,1,116,2,16,461,0,3,44:13,0.0,5.0,2.0,2,43,0,68,4,18,4,9-17,1,5-40,529,0,9.0,1.6,401014976,1.0,1.0,36.0,1.0,,,,0.0,8.0,1.0


In [129]:
teams

[{'school': 'LSU',
  'conference': 'SEC',
  'homeAway': 'home',
  'points': 37,
  'stats': [{'category': 'tacklesForLoss', 'stat': '4'},
   {'category': 'defensiveTDs', 'stat': '0'},
   {'category': 'tackles', 'stat': '31'},
   {'category': 'sacks', 'stat': '3'},
   {'category': 'qbHurries', 'stat': '1'},
   {'category': 'passesDeflected', 'stat': '4'},
   {'category': 'fumblesRecovered', 'stat': '0'},
   {'category': 'rushingTDs', 'stat': '0'},
   {'category': 'passingTDs', 'stat': '4'},
   {'category': 'kickingPoints', 'stat': '13'},
   {'category': 'interceptionYards', 'stat': '17'},
   {'category': 'interceptionTDs', 'stat': '0'},
   {'category': 'passesIntercepted', 'stat': '2'},
   {'category': 'possessionTime', 'stat': '33:38'},
   {'category': 'interceptions', 'stat': '0'},
   {'category': 'fumblesLost', 'stat': '0'},
   {'category': 'turnovers', 'stat': '0'},
   {'category': 'totalPenaltiesYards', 'stat': '5-45'},
   {'category': 'yardsPerRushAttempt', 'stat': '3.7'},
   {'cat

In [172]:
df_stats = []
for team in teams:
    df_stat = pd.DataFrame.from_records(team['stats'])
    df_stat['category'] = team['homeAway'] + '_' + df_stat['category']
    df_stats.append(df_stat)
df_stats = pd.concat(df_stats)

In [171]:
df_stats.head()

Unnamed: 0,category,stat
0,home_tacklesForLoss,4
1,home_defensiveTDs,0
2,home_tackles,31
3,home_sacks,3
4,home_qbHurries,1


In [173]:
df_pivoted = pd.pivot_table(df_stats, columns='category', values='stat', aggfunc='first').reset_index(drop=True)
df_pivoted.head()

category,away_completionAttempts,away_defensiveTDs,away_firstDowns,away_fourthDownEff,away_fumblesLost,away_fumblesRecovered,away_interceptions,away_kickingPoints,away_netPassingYards,away_passesDeflected,away_passingTDs,away_possessionTime,away_qbHurries,away_rushingAttempts,away_rushingTDs,away_rushingYards,away_sacks,away_tackles,away_tacklesForLoss,away_thirdDownEff,away_totalPenaltiesYards,away_totalYards,away_turnovers,away_yardsPerPass,away_yardsPerRushAttempt,home_completionAttempts,home_defensiveTDs,home_firstDowns,home_fourthDownEff,home_fumblesLost,home_fumblesRecovered,home_interceptionTDs,home_interceptionYards,home_interceptions,home_kickingPoints,home_netPassingYards,home_passesDeflected,home_passesIntercepted,home_passingTDs,home_possessionTime,home_qbHurries,home_rushingAttempts,home_rushingTDs,home_rushingYards,home_sacks,home_tackles,home_tacklesForLoss,home_thirdDownEff,home_totalPenaltiesYards,home_totalYards,home_turnovers,home_yardsPerPass,home_yardsPerRushAttempt
0,20-43,0,20,3-3,0,0,2,4,225,3,1,26:22,5,25,0,61,2,47,3,3-13,3-17,286,2,5.2,2.4,28-38,0,26,0-0,0,0,0,17,0,13,349,4,2,4,33:38,1,36,0,132,3,31,4,9-16,5-45,481,0,9.2,3.7


In [162]:
df_pivoted.shape

(2, 28)

In [163]:
df_pivoted.columns

Index(['completionAttempts', 'defensiveTDs', 'firstDowns', 'fourthDownEff',
       'fumblesLost', 'fumblesRecovered', 'interceptionTDs',
       'interceptionYards', 'interceptions', 'kickingPoints',
       'netPassingYards', 'passesDeflected', 'passesIntercepted', 'passingTDs',
       'possessionTime', 'qbHurries', 'rushingAttempts', 'rushingTDs',
       'rushingYards', 'sacks', 'tackles', 'tacklesForLoss', 'thirdDownEff',
       'totalPenaltiesYards', 'totalYards', 'turnovers', 'yardsPerPass',
       'yardsPerRushAttempt'],
      dtype='object', name='category')