In [1]:
import cfbd
import matplotlib.pyplot as plt
import pandas as pd
from config import api_key
import scipy.stats as st

configuration = cfbd.Configuration()
configuration.api_key['Authorization'] = api_key
configuration.api_key_prefix['Authorization'] = 'Bearer'

api_instance = cfbd.GamesApi(cfbd.ApiClient(configuration))

In [2]:
season = [year for year in range(2004,2021)]
season_extended = [year for year in range(1970,2021)]

score_df = pd.DataFrame({"Season":season,
                         "Avg Points Scored Per Game": "",
                         "Avg Turnovers": "",
                         "Avg Total Yards": "",
                         "Avg Third Down Eff": "",
                         "Avg Margin of Victory": "",
                         "Avg Number of Punts": "",
                         "Number of Punts": "",
                         "Avg Loss of Yard Plays": "",
                         "Avg Standard Gain Plays": "",
                         "Avg Explosive Plays": "",
                         "Num Loss of Yard Plays": "",
                         "Num Standard Gain Plays": "",
                         "Num Explosive Plays": "",
                         "Number of Games": ""})

In [4]:
for year in season:
    games = api_instance.get_games(year=year)
    total_points = []
    point_diff = []
    for game in games:
        try:
            total_points.append(int(game.home_points)+int(game.away_points))
            point_diff.append(abs(int(game.home_points)-int(game.away_points)))
        except:
            print("Skip Game")
    score_df.loc[score_df["Season"] == year, ["Avg Points Scored Per Game"]] = sum(total_points) / len(total_points)
    score_df.loc[score_df["Season"] == year, ["Avg Margin of Victory"]] = sum(point_diff) / len(point_diff)

In [5]:
for year in season:
    turnovers = []
    total_yards = []
    third_attempts = []
    third_successes = []
    num_games = 0
    for week in range(1,17):
        try:
            game_teams = api_instance.get_team_game_stats(year=year, week=week)
            for game in game_teams:
                stats_list_1 = game.teams[0]["stats"]
                stats_list_2 = game.teams[1]["stats"]
                num_games += 1
                for result in stats_list_1:                 
                    if result["category"] == "turnovers":
                        turnover_1 = int(result["stat"])
                    elif result["category"] == "totalYards":
                        tot_yards_1 = int(result["stat"])  
                    elif result["category"] == "thirdDownEff":
                        third_eff_str_1 = result["stat"].split("-") #thirdDownEff
                for result in stats_list_2:                 
                    if result["category"] == "turnovers":
                        turnover_2 = int(result["stat"])
                    elif result["category"] == "totalYards":
                        tot_yards_2 = int(result["stat"])
                    elif result["category"] == "thirdDownEff":
                        third_eff_str_2 = result["stat"].split("-") #thirdDownEff
                
                turnovers.append(turnover_1 + turnover_2)
                total_yards.append(tot_yards_1 + tot_yards_2)
                third_attempts.append(int(third_eff_str_1[1])+int(third_eff_str_2[1]))
                third_successes.append(int(third_eff_str_1[0])+int(third_eff_str_2[0]))
        except Exception as e:
            print(f"No data for season {year} week {week}. Skipping {e}")
    score_df.loc[score_df["Season"] == year, ["Avg Turnovers"]] = sum(turnovers)/len(turnovers)
    score_df.loc[score_df["Season"] == year, ["Avg Total Yards"]] = sum(total_yards)/len(total_yards)
    score_df.loc[score_df["Season"] == year, ["Avg Third Down Eff"]] = (sum(third_successes)/len(third_successes)) / (sum(third_attempts)/len(third_attempts))
    score_df.loc[score_df["Season"] == year, ["Number of Games"]] = num_games

In [None]:
api_instance = cfbd.DrivesApi(cfbd.ApiClient(configuration))

for year in season:
    games = api_instance.get_drives(year=year)
    drive_results = pd.DataFrame([game.to_dict() for game in games])
    drive_result_value_counts = drive_results["drive_result"].value_counts()
    score_df.loc[score_df["Season"] == year, ["Number of Punts"]] = drive_result_value_counts["PUNT"] + drive_result_value_counts["PUNT RETURN TD"]
    print(f"{year} processed")

2004 processed
2005 processed
2006 processed
2007 processed
2008 processed
2009 processed
2010 processed
2011 processed
2012 processed
2013 processed
2014 processed
2015 processed


In [None]:
rush_types = [5, 68]
pass_types = [24, 67, 51, 4]
neg_play_types = [7, 20]

api_instance = cfbd.PlaysApi(cfbd.ApiClient(configuration))

for year in season:
    explosive_plays = 0
    standard_plays = 0
    loss_plays = 0
    for week in range(1,17):
        for rush_type in rush_types:
            plays = api_instance.get_plays(year=year,week=week,play_type=rush_type)
            for play in plays:
                if play.yards_gained >= 20:
                    explosive_plays += 1
                elif play.yards_gained >= 0:
                    standard_plays += 1
                else:
                    loss_plays += 1
        for pass_type in pass_types:
            plays = api_instance.get_plays(year=year,week=week,play_type=pass_type)
            for play in plays:
                if play.yards_gained >= 30:
                    explosive_plays += 1
                elif play.yards_gained >= 0:
                    standard_plays += 1
                else:
                    loss_plays += 1
        for neg_play in neg_play_types:
            plays = api_instance.get_plays(year=year,week=week,play_type=neg_play)
            for play in plays:
                loss_plays += 1
    score_df.loc[score_df["Season"] == year, ["Num Explosive Plays"]] = explosive_plays
    score_df.loc[score_df["Season"] == year, ["Num Standard Gain Plays"]] = standard_plays
    score_df.loc[score_df["Season"] == year, ["Num Loss of Yard Plays"]] = loss_plays
    print(f"{year} processed")

In [None]:
score_df

In [None]:
score_df.dtypes

In [None]:
score_df["Number of Games"] = score_df["Number of Games"].astype(int)
score_df["Number of Punts"] = score_df["Number of Punts"].astype(float)

In [None]:
score_df["Avg Number of Punts"] = score_df["Number of Punts"]/score_df["Numer of Games"]

In [None]:
score_df["Avg Explosive Plays"] = score_df["Num Explosive Plays"]/score_df["Numer of Games"]
score_df["Avg Standard Gain Plays"] = score_df["Num Standard Gain Plays"]/score_df["Numer of Games"]
score_df["Avg Loss of Yard Plays"] = score_df["Num Loss of Yard Plays"]/score_df["Numer of Games"]

In [13]:
score_df["Avg Points Scored Per Game"] = score_df["Avg Points Scored Per Game"].astype(float)
score_df["Avg Turnovers"] = score_df["Avg Turnovers"].astype(float)
score_df["Avg Total Yards"] = score_df["Avg Total Yards"].astype(float)
score_df["Avg Third Down Eff"] = score_df["Avg Third Down Eff"].astype(float)
score_df["Avg Margin of Victory"] = score_df["Avg Margin of Victory"].astype(float)
score_df["Number of Games"] = score_df["Number of Games"].astype(int)
#score_df["Avg Number of Punts"] = score_df["Avg Number of Punts"].astype(float)

KeyError: 'Avg Penalty Yards'

In [23]:
corr_matrix = score_df.corr()
corr_matrix

Unnamed: 0,Season,Avg Points Scored Per Game,Avg Turnovers,Avg Total Yards,Avg Third Down Eff,Avg Margin of Vitory
Season,1.0,0.773413,-0.978937,0.760679,0.565979,0.403174
Avg Points Scored Per Game,0.773413,1.0,-0.70856,0.974344,0.529106,0.510001
Avg Turnovers,-0.978937,-0.70856,1.0,-0.719644,-0.535351,-0.427674
Avg Total Yards,0.760679,0.974344,-0.719644,1.0,0.446182,0.543571
Avg Third Down Eff,0.565979,0.529106,-0.535351,0.446182,1.0,0.337635
Avg Margin of Vitory,0.403174,0.510001,-0.427674,0.543571,0.337635,1.0


In [24]:
teams_df = pd.DataFrame({"Season":season,
                         "Clemson Points Allowed": "",
                         "Florida Points Allowed": "",
                         "FSU Points Allowed": ""})

In [25]:
for year in season:
    clem_points_allowed = []
    florida_points_allowed = []
    fsu_points_allowed = []
    games = api_instance.get_games(year=year,team="Clemson")
    for game in games:
        if game.away_team == "Clemson":
            clem_points_allowed.append(game.home_points)
        elif game.home_team == "Clemson":
            clem_points_allowed.append(game.away_points)
    teams_df.loc[teams_df["Season"] == year, ["Clemson Points Allowed"]] = sum(clem_points_allowed)/len(clem_points_allowed)
    games = api_instance.get_games(year=year,team="Florida")
    for game in games:
        if game.away_team == "Florida":
            florida_points_allowed.append(game.home_points)
        elif game.home_team == "Florida":
            florida_points_allowed.append(game.away_points)
    teams_df.loc[teams_df["Season"] == year, ["Florida Points Allowed"]] = sum(florida_points_allowed)/len(florida_points_allowed)
    games = api_instance.get_games(year=year,team="Florida State")
    for game in games:
        if game.away_team == "Florida State":
            fsu_points_allowed.append(game.home_points)
        elif game.home_team == "Florida State":
            fsu_points_allowed.append(game.away_points)
    teams_df.loc[teams_df["Season"] == year, ["FSU Points Allowed"]] = sum(fsu_points_allowed)/len(fsu_points_allowed)

In [27]:
score_df.to_csv("Output/metrics_by_season.csv", index=False, header=True)
teams_df.to_csv("Output/teams_points_against.csv", index=False, header=True)