In [1]:
import cfbd
import matplotlib.pyplot as plt
import pandas as pd
from config import api_key
import scipy.stats as st

configuration = cfbd.Configuration()
configuration.api_key['Authorization'] = api_key
configuration.api_key_prefix['Authorization'] = 'Bearer'

api_instance = cfbd.GamesApi(cfbd.ApiClient(configuration))

In [2]:
season = [year for year in range(2004,2021)]
season_extended = [year for year in range(1970,2021)]

score_df = pd.DataFrame({"Season":season,
                         "Avg Points Scored Per Game": "",
                         "Avg Turnovers": "",
                         "Avg Total Yards": "",
                         "Avg Third Down Eff": "",
                         "Avg Margin of Victory": "",
                         "Avg Number of Punts": "",
                         "Number of Punts": "",
                         "Avg Loss of Yard Plays": "",
                         "Avg Standard Gain Plays": "",
                         "Avg Explosive Plays": "",
                         "Num Loss of Yard Plays": "",
                         "Num Standard Gain Plays": "",
                         "Num Explosive Plays": "",
                         "Number of Games": ""})

In [4]:
for year in season:
    games = api_instance.get_games(year=year)
    total_points = []
    point_diff = []
    for game in games:
        try:
            total_points.append(int(game.home_points)+int(game.away_points))
            point_diff.append(abs(int(game.home_points)-int(game.away_points)))
        except:
            print("Skip Game")
    score_df.loc[score_df["Season"] == year, ["Avg Points Scored Per Game"]] = sum(total_points) / len(total_points)
    score_df.loc[score_df["Season"] == year, ["Avg Margin of Victory"]] = sum(point_diff) / len(point_diff)

In [5]:
for year in season:
    turnovers = []
    total_yards = []
    third_attempts = []
    third_successes = []
    num_games = 0
    for week in range(1,17):
        try:
            game_teams = api_instance.get_team_game_stats(year=year, week=week)
            for game in game_teams:
                stats_list_1 = game.teams[0]["stats"]
                stats_list_2 = game.teams[1]["stats"]
                num_games += 1
                for result in stats_list_1:                 
                    if result["category"] == "turnovers":
                        turnover_1 = int(result["stat"])
                    elif result["category"] == "totalYards":
                        tot_yards_1 = int(result["stat"])  
                    elif result["category"] == "thirdDownEff":
                        third_eff_str_1 = result["stat"].split("-") #thirdDownEff
                for result in stats_list_2:                 
                    if result["category"] == "turnovers":
                        turnover_2 = int(result["stat"])
                    elif result["category"] == "totalYards":
                        tot_yards_2 = int(result["stat"])
                    elif result["category"] == "thirdDownEff":
                        third_eff_str_2 = result["stat"].split("-") #thirdDownEff
                
                turnovers.append(turnover_1 + turnover_2)
                total_yards.append(tot_yards_1 + tot_yards_2)
                third_attempts.append(int(third_eff_str_1[1])+int(third_eff_str_2[1]))
                third_successes.append(int(third_eff_str_1[0])+int(third_eff_str_2[0]))
        except Exception as e:
            print(f"No data for season {year} week {week}. Skipping {e}")
    score_df.loc[score_df["Season"] == year, ["Avg Turnovers"]] = sum(turnovers)/len(turnovers)
    score_df.loc[score_df["Season"] == year, ["Avg Total Yards"]] = sum(total_yards)/len(total_yards)
    score_df.loc[score_df["Season"] == year, ["Avg Third Down Eff"]] = (sum(third_successes)/len(third_successes)) / (sum(third_attempts)/len(third_attempts))
    score_df.loc[score_df["Season"] == year, ["Number of Games"]] = num_games

In [28]:
api_instance = cfbd.DrivesApi(cfbd.ApiClient(configuration))

for year in season:
    games = api_instance.get_drives(year=year)
    drive_results = pd.DataFrame([game.to_dict() for game in games])
    drive_result_value_counts = drive_results["drive_result"].value_counts()
    score_df.loc[score_df["Season"] == year, ["Number of Punts"]] = drive_result_value_counts["PUNT"] + drive_result_value_counts["PUNT RETURN TD"]
    print(f"{year} processed")

2004 processed
2005 processed
2006 processed
2007 processed
2008 processed
2009 processed
2010 processed
2011 processed
2012 processed
2013 processed
2014 processed
2015 processed
2016 processed
2017 processed
2018 processed
2019 processed
2020 processed


In [None]:
rush_types = [5, 68]
pass_types = [24, 67, 51, 4]
neg_play_types = [7, 20]

api_instance = cfbd.PlaysApi(cfbd.ApiClient(configuration))

for year in season:
    explosive_plays = 0
    standard_plays = 0
    loss_plays = 0
    for week in range(1,17):
        for rush_type in rush_types:
            plays = api_instance.get_plays(year=year,week=week,play_type=rush_type)
            for play in plays:
                if play.yards_gained >= 20:
                    explosive_plays += 1
                elif play.yards_gained >= 0:
                    standard_plays += 1
                else:
                    loss_plays += 1
        for pass_type in pass_types:
            plays = api_instance.get_plays(year=year,week=week,play_type=pass_type)
            for play in plays:
                if play.yards_gained >= 30:
                    explosive_plays += 1
                elif play.yards_gained >= 0:
                    standard_plays += 1
                else:
                    loss_plays += 1
        for neg_play in neg_play_types:
            plays = api_instance.get_plays(year=year,week=week,play_type=neg_play)
            for play in plays:
                loss_plays += 1
    score_df.loc[score_df["Season"] == year, ["Num Explosive Plays"]] = explosive_plays
    score_df.loc[score_df["Season"] == year, ["Num Standard Gain Plays"]] = standard_plays
    score_df.loc[score_df["Season"] == year, ["Num Loss of Yard Plays"]] = loss_plays
    print(f"{year} processed")

In [29]:
score_df

Unnamed: 0,Season,Avg Points Scored Per Game,Avg Turnovers,Avg Total Yards,Avg Third Down Eff,Avg Margin of Victory,Avg Number of Punts,Number of Punts,Avg Loss of Yard Plays,Avg Standard Gain Plays,Avg Explosive Plays,Num Loss of Yard Plays,Num Standard Gain Plays,Num Explosive Plays,Number of Games,Avg Margin of Vitory
0,2004,52.252874,4.014903,746.57228,0.366142,,,4792,,,,,,,671,18.008621
1,2005,52.44058,4.076471,743.158824,0.364025,,,5949,,,,,,,680,17.895652
2,2006,47.369737,3.888742,674.646358,0.38594,,,6142,,,,,,,755,17.693421
3,2007,55.310705,4.021025,763.332457,0.39411,,,6673,,,,,,,761,17.678851
4,2008,52.783117,3.858442,726.136364,0.394913,,,7049,,,,,,,770,18.806494
5,2009,52.369509,3.5323,740.807494,0.390847,,,7191,,,,,,,774,17.684755
6,2010,54.346701,3.522639,754.135834,0.399503,,,7171,,,,,,,773,18.978008
7,2011,54.985843,3.475325,773.163636,0.404073,,,7421,,,,,,,770,18.831403
8,2012,56.498137,3.486957,799.163975,0.401242,,,7578,,,,,,,805,18.771429
9,2013,57.260976,3.406098,811.635366,0.400142,,,8198,,,,,,,820,20.397561


In [30]:
score_df.dtypes

Season                          int64
Avg Points Scored Per Game    float64
Avg Turnovers                 float64
Avg Total Yards               float64
Avg Third Down Eff            float64
Avg Margin of Victory          object
Avg Number of Punts            object
Number of Punts                object
Avg Loss of Yard Plays         object
Avg Standard Gain Plays        object
Avg Explosive Plays            object
Num Loss of Yard Plays         object
Num Standard Gain Plays        object
Num Explosive Plays            object
Number of Games                object
Avg Margin of Vitory          float64
dtype: object

In [31]:
score_df["Number of Games"] = score_df["Number of Games"].astype(int)
score_df["Number of Punts"] = score_df["Number of Punts"].astype(float)

In [33]:
score_df["Avg Number of Punts"] = score_df["Number of Punts"]/score_df["Number of Games"]

In [None]:
score_df["Avg Explosive Plays"] = score_df["Num Explosive Plays"]/score_df["Numer of Games"]
score_df["Avg Standard Gain Plays"] = score_df["Num Standard Gain Plays"]/score_df["Numer of Games"]
score_df["Avg Loss of Yard Plays"] = score_df["Num Loss of Yard Plays"]/score_df["Numer of Games"]

In [13]:
score_df["Avg Points Scored Per Game"] = score_df["Avg Points Scored Per Game"].astype(float)
score_df["Avg Turnovers"] = score_df["Avg Turnovers"].astype(float)
score_df["Avg Total Yards"] = score_df["Avg Total Yards"].astype(float)
score_df["Avg Third Down Eff"] = score_df["Avg Third Down Eff"].astype(float)
score_df["Avg Margin of Victory"] = score_df["Avg Margin of Victory"].astype(float)
score_df["Number of Games"] = score_df["Number of Games"].astype(int)
#score_df["Avg Number of Punts"] = score_df["Avg Number of Punts"].astype(float)

KeyError: 'Avg Penalty Yards'

In [34]:
corr_matrix = score_df.corr()
corr_matrix

Unnamed: 0,Season,Avg Points Scored Per Game,Avg Turnovers,Avg Total Yards,Avg Third Down Eff,Avg Number of Punts,Number of Punts,Number of Games,Avg Margin of Vitory
Season,1.0,0.773413,-0.978937,0.760679,0.565979,0.643202,0.478576,0.253071,0.403174
Avg Points Scored Per Game,0.773413,1.0,-0.70856,0.974344,0.529106,0.668223,0.500347,0.263938,0.510001
Avg Turnovers,-0.978937,-0.70856,1.0,-0.719644,-0.535351,-0.642197,-0.513307,-0.310425,-0.427674
Avg Total Yards,0.760679,0.974344,-0.719644,1.0,0.446182,0.668542,0.533458,0.31269,0.543571
Avg Third Down Eff,0.565979,0.529106,-0.535351,0.446182,1.0,0.607253,0.456646,0.28071,0.337635
Avg Number of Punts,0.643202,0.668223,-0.642197,0.668542,0.607253,1.0,0.896854,0.678448,0.661576
Number of Punts,0.478576,0.500347,-0.513307,0.533458,0.456646,0.896854,1.0,0.932375,0.755633
Number of Games,0.253071,0.263938,-0.310425,0.31269,0.28071,0.678448,0.932375,1.0,0.704263
Avg Margin of Vitory,0.403174,0.510001,-0.427674,0.543571,0.337635,0.661576,0.755633,0.704263,1.0


In [24]:
teams_df = pd.DataFrame({"Season":season,
                         "Clemson Points Allowed": "",
                         "Florida Points Allowed": "",
                         "FSU Points Allowed": ""})

In [25]:
for year in season:
    clem_points_allowed = []
    florida_points_allowed = []
    fsu_points_allowed = []
    games = api_instance.get_games(year=year,team="Clemson")
    for game in games:
        if game.away_team == "Clemson":
            clem_points_allowed.append(game.home_points)
        elif game.home_team == "Clemson":
            clem_points_allowed.append(game.away_points)
    teams_df.loc[teams_df["Season"] == year, ["Clemson Points Allowed"]] = sum(clem_points_allowed)/len(clem_points_allowed)
    games = api_instance.get_games(year=year,team="Florida")
    for game in games:
        if game.away_team == "Florida":
            florida_points_allowed.append(game.home_points)
        elif game.home_team == "Florida":
            florida_points_allowed.append(game.away_points)
    teams_df.loc[teams_df["Season"] == year, ["Florida Points Allowed"]] = sum(florida_points_allowed)/len(florida_points_allowed)
    games = api_instance.get_games(year=year,team="Florida State")
    for game in games:
        if game.away_team == "Florida State":
            fsu_points_allowed.append(game.home_points)
        elif game.home_team == "Florida State":
            fsu_points_allowed.append(game.away_points)
    teams_df.loc[teams_df["Season"] == year, ["FSU Points Allowed"]] = sum(fsu_points_allowed)/len(fsu_points_allowed)

In [36]:
score_df.to_csv("Output/metrics_by_season.csv", index=False, header=True)
teams_df.to_csv("Output/teams_points_against.csv", index=False, header=True)