Import Python modules.

In [2]:
import json
import pandas as pd
import numpy as np


1. Read detailed postseason game stats JSON file into a viewable pandas object.
2. Split detailed postseason panda object into an [1] "id" and [2] "teams" pandas objects.
3. Split "teams" pandas for each game into four (4) lists...
   * [1] team_0, [2] team_0.location, [3] team_1, and [4] team_1.location

In [36]:
# load detailed weekly_games game stats JSON as a pandas object
cfb_year = input('Input college football year?')
week_number = input('Input regular season week number')

# 2024-regular-1-detailedGamesCFB-JSON

json_detailed_weekly_games = cfb_year + '_cfb_JSON-CSV/' + cfb_year + '-regular-' + week_number + '-detailedGamesCFB-JSON.json'
detailed_weekly_games = pd.read_json(json_detailed_weekly_games)

# split detailed_postseason pandas object to list id_only (N-games x 1)
id_only = detailed_weekly_games.drop(['teams'], axis=1)
rows = id_only.shape[0]
print(str(rows) + ' games')

# split detailed_postseason pandas object to list teams (N-games x 1)
teams_only = detailed_weekly_games.drop(['id'], axis=1)

# split detailed_postseason 'DataFrame' into a [1] teams[j][0]['teams_key'] list
# & teams[j][1]['teams_key'] list (N-teams x 1)
def teams_column(all_games, team_number, teams_key):
    teams_list = []
    for game in range(all_games):
        teams_list.append((teams_only['teams'][game][team_number][teams_key]))
    return teams_list

team0_list = teams_column(rows, 0, 'school')
team1_list = teams_column(rows, 1, 'school')
team0_location_list = teams_column(rows, 0, 'homeAway')
team1_location_list = teams_column(rows, 1, 'homeAway')
team0_points_list = teams_column(rows, 0, 'points')
team1_points_list = teams_column(rows, 1, 'points')
team0_school_id_list = teams_column(rows, 0, 'schoolId')
team1_school_id_list = teams_column(rows, 1, 'schoolId')

# Combine the 4 list-Series into a DataFrame (N-teams x 4)
teams_detail = pd.DataFrame({'team0': team0_list, 'team0.id': team0_school_id_list, 'team0.location': team0_location_list,
                             'team0.points': team0_points_list, 'team1' : team1_list,  'team1.id': team1_school_id_list,
                             'team1.location' : team1_location_list, 'team1.points': team1_points_list})

# Join games id with team_0 and team_1 information
id_teams_locations_df = id_only.join(teams_detail, on=None)
id_teams_locations_df


Input college football year? 2024
Input regular season week number 2


78 games


Unnamed: 0,id,team0,team0.id,team0.location,team0.points,team1,team1.id,team1.location,team1.points
0,401643710,Wyoming,2751,home,13,Idaho,70,away,17
1,401628977,Northern Illinois,2459,away,16,Notre Dame,87,home,14
2,401628336,Arkansas,8,away,31,Oklahoma State,197,home,39
3,401636616,Tulane,2655,home,27,Kansas State,2306,away,34
4,401636618,Utah,254,home,23,Baylor,239,away,12
...,...,...,...,...,...,...,...,...,...
73,401628473,Eastern Michigan,2199,away,9,Washington,264,home,30
74,401628469,Boise State,68,away,34,Oregon,2483,home,37
75,401628344,Missouri,142,home,38,Buffalo,2084,away,0
76,401643705,Air Force,2005,home,7,San José State,23,away,17


In [37]:
# practicing reading JSON file with nested dictionaries
# read detailed category stats from game #1, team0 [game0 until last row][team0 or team1] (read values from a "stats" dictionary)
# detailed stats list for each team will vary

# loop through every game as a new row; join rows with game id (id_detailed_game_stats_df);
#   join "id_team_locations_df" with "id_detailed_game_stats_df"

accumulator = []
for game in range(rows):
    for team in range(2):
        team_detailed_stats = teams_only['teams'][game][team]['stats']
        
        # convert team_detailed_stats into a DataFrame
        team_detailed_stats_df = pd.DataFrame(team_detailed_stats)
        
        # convert team_detailed_stats_df into a numpy nested array
        team_detailed_stats_np = team_detailed_stats_df.to_numpy()
        
        # transpose nested array
        team_detailed_stats_np = np.transpose(team_detailed_stats_np)
        
        # create team name (either 'team0.' or 'team1.')
        team_int = 'team' + str(team) + '.'
        
        # convert back to DataFrame with header as stats columns
        team_detailed_stats_df2 = pd.DataFrame(team_detailed_stats_np)
        new_header = team_detailed_stats_df2.iloc[0]
        team_detailed_stats_df2 = team_detailed_stats_df2[1:]
        team_detailed_stats_df2.columns = new_header
        
        # insert team0. or team1. as a prefix in the header row
        team_detailed_stats_df2 = team_detailed_stats_df2.add_prefix(team_int)
        accumulator.append(team_detailed_stats_df2)

# accumulator includes all teams, odd rows are team0, even rows are team1
big_df = pd.concat(accumulator)
big_df


Unnamed: 0,team0.rushingTDs,team0.puntReturnYards,team0.puntReturnTDs,team0.puntReturns,team0.passingTDs,team0.kickReturnYards,team0.kickReturnTDs,team0.kickReturns,team0.kickingPoints,team0.fumblesRecovered,...,team1.totalYards,team1.fourthDownEff,team1.thirdDownEff,team1.firstDowns,team0.interceptionYards,team0.interceptionTDs,team0.passesIntercepted,team1.interceptionYards,team1.interceptionTDs,team1.passesIntercepted
1,0,5,0,2,1,28,0,1,7,0,...,,,,,,,,,,
1,,,,,,,,,,,...,225,2-4,1-13,14,,,,,,
1,0,11,0,2,1,41,0,3,10,0,...,,,,,33,0,2,,,
1,,,,,,,,,,,...,286,0-0,3-10,17,,,,,,
1,3,1,0,1,1,16,0,1,7,0,...,,,,,0,0,1,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,,,,,,,,,,,...,169,1-3,1-12,9,,,,16,0,1
1,1,11,0,1,0,,,,1,0,...,,,,,41,0,1,,,
1,,,,,,,,,,,...,312,0-0,5-16,15,,,,0,0,2
1,1,4,0,2,2,17,0,1,7,0,...,,,,,38,1,2,,,


group big_df every two rows

In [38]:
group_big_df = big_df.groupby(np.arange(len(big_df))//2).sum()
group_big_df


Unnamed: 0,team0.rushingTDs,team0.puntReturnYards,team0.puntReturnTDs,team0.puntReturns,team0.passingTDs,team0.kickReturnYards,team0.kickReturnTDs,team0.kickReturns,team0.kickingPoints,team0.fumblesRecovered,...,team1.totalYards,team1.fourthDownEff,team1.thirdDownEff,team1.firstDowns,team0.interceptionYards,team0.interceptionTDs,team0.passesIntercepted,team1.interceptionYards,team1.interceptionTDs,team1.passesIntercepted
0,0,5,0,2,1,28,0,1,7,0,...,225,2-4,1-13,14,0,0,0,0,0,0
1,0,11,0,2,1,41,0,3,10,0,...,286,0-0,3-10,17,33,0,2,0,0,0
2,3,1,0,1,1,16,0,1,7,0,...,385,1-2,5-16,21,0,0,1,73,1,1
3,1,2,0,1,2,152,0,6,9,0,...,396,1-1,2-10,19,0,0,0,0,0,1
4,0,37,0,4,2,14,0,1,5,1,...,223,2-4,4-16,12,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,0,6,0,2,0,34,0,2,9,1,...,501,1-1,4-9,23,0,0,0,0,0,0
74,3,43,0,3,1,0,0,0,10,2,...,353,0-0,4-12,16,0,0,0,0,0,0
75,5,0,0,0,0,0,0,0,8,0,...,169,1-3,1-12,9,15,0,1,16,0,1
76,1,11,0,1,0,0,0,0,1,0,...,312,0-0,5-16,15,41,0,1,0,0,2


In [39]:
# view header stats
group_big_df.columns


Index(['team0.rushingTDs', 'team0.puntReturnYards', 'team0.puntReturnTDs',
       'team0.puntReturns', 'team0.passingTDs', 'team0.kickReturnYards',
       'team0.kickReturnTDs', 'team0.kickReturns', 'team0.kickingPoints',
       'team0.fumblesRecovered', 'team0.totalFumbles', 'team0.tacklesForLoss',
       'team0.defensiveTDs', 'team0.tackles', 'team0.sacks', 'team0.qbHurries',
       'team0.passesDeflected', 'team0.possessionTime', 'team0.interceptions',
       'team0.fumblesLost', 'team0.turnovers', 'team0.totalPenaltiesYards',
       'team0.yardsPerRushAttempt', 'team0.rushingAttempts',
       'team0.rushingYards', 'team0.yardsPerPass', 'team0.completionAttempts',
       'team0.netPassingYards', 'team0.totalYards', 'team0.fourthDownEff',
       'team0.thirdDownEff', 'team0.firstDowns', 'team1.rushingTDs',
       'team1.puntReturnYards', 'team1.puntReturnTDs', 'team1.puntReturns',
       'team1.passingTDs', 'team1.kickReturnYards', 'team1.kickReturnTDs',
       'team1.kickReturns', '

In [40]:
# Concatenate weekly regular season ID DataFrame and detailed stats DataFrame
cfb_weekly_games = pd.concat([id_teams_locations_df, group_big_df], axis=1)
cfb_weekly_games


Unnamed: 0,id,team0,team0.id,team0.location,team0.points,team1,team1.id,team1.location,team1.points,team0.rushingTDs,...,team1.totalYards,team1.fourthDownEff,team1.thirdDownEff,team1.firstDowns,team0.interceptionYards,team0.interceptionTDs,team0.passesIntercepted,team1.interceptionYards,team1.interceptionTDs,team1.passesIntercepted
0,401643710,Wyoming,2751,home,13,Idaho,70,away,17,0,...,225,2-4,1-13,14,0,0,0,0,0,0
1,401628977,Northern Illinois,2459,away,16,Notre Dame,87,home,14,0,...,286,0-0,3-10,17,33,0,2,0,0,0
2,401628336,Arkansas,8,away,31,Oklahoma State,197,home,39,3,...,385,1-2,5-16,21,0,0,1,73,1,1
3,401636616,Tulane,2655,home,27,Kansas State,2306,away,34,1,...,396,1-1,2-10,19,0,0,0,0,0,1
4,401636618,Utah,254,home,23,Baylor,239,away,12,0,...,223,2-4,4-16,12,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,401628473,Eastern Michigan,2199,away,9,Washington,264,home,30,0,...,501,1-1,4-9,23,0,0,0,0,0,0
74,401628469,Boise State,68,away,34,Oregon,2483,home,37,3,...,353,0-0,4-12,16,0,0,0,0,0,0
75,401628344,Missouri,142,home,38,Buffalo,2084,away,0,5,...,169,1-3,1-12,9,15,0,1,16,0,1
76,401643705,Air Force,2005,home,7,San José State,23,away,17,1,...,312,0-0,5-16,15,41,0,1,0,0,2


In [41]:
# create a DataFrame by each team and display the teams's total offense
# id | team0 | team0.points | team0.totalYards | team0.totalDefenseYards |
# team1 | team1.points | team1.totalYards | team1.totalDefenseYards
cfb_weekly_total_Yards = cfb_weekly_games[['id', 'team0', 'team0.id', 'team0.totalYards', 'team1.totalYards',
                                                  'team1', 'team1.id', 'team1.totalYards', 'team0.totalYards']]
cfb_weekly_total_Yards.columns = ['id', 'team0', 'team0.id', 'team0.totalYards', 'team0.defenseYards',
                                                  'team1', 'team1.id', 'team1.totalYards', 'team1.defenseYards']
cfb_weekly_total_Yards


Unnamed: 0,id,team0,team0.id,team0.totalYards,team0.defenseYards,team1,team1.id,team1.totalYards,team1.defenseYards
0,401643710,Wyoming,2751,270,225,Idaho,70,225,270
1,401628977,Northern Illinois,2459,388,286,Notre Dame,87,286,388
2,401628336,Arkansas,8,648,385,Oklahoma State,197,385,648
3,401636616,Tulane,2655,491,396,Kansas State,2306,396,491
4,401636618,Utah,254,292,223,Baylor,239,223,292
...,...,...,...,...,...,...,...,...,...
73,401628473,Eastern Michigan,2199,204,501,Washington,264,501,204
74,401628469,Boise State,68,369,353,Oregon,2483,353,369
75,401628344,Missouri,142,518,169,Buffalo,2084,169,518
76,401643705,Air Force,2005,197,312,San José State,23,312,197


In [42]:
# Create a DataFrame sorted by team0.id (N rows x 4 columns)
# Columns: team | team_id | offense_yards | defense_yards
team0_total_yards = cfb_weekly_total_Yards[['team0.id', 'team0', 'team0.totalYards', 'team1.totalYards']]
team0_total_yards.columns = ['id', 'team', 'actual_Off', 'actual_Def']

# Create a DataFrame sorted by team1.id (N rows x 4 columns)
team1_total_yards = cfb_weekly_total_Yards[['team1.id', 'team1', 'team1.totalYards', 'team0.totalYards']]
team1_total_yards.columns = ['id', 'team', 'actual_Off', 'actual_Def']

# Append rows of team0 & team1 DataFrames
actual_off_def = pd.concat([team0_total_yards, team1_total_yards])
actual_off_def


Unnamed: 0,id,team,actual_Off,actual_Def
0,2751,Wyoming,270,225
1,2459,Northern Illinois,388,286
2,8,Arkansas,648,385
3,2655,Tulane,491,396
4,254,Utah,292,223
...,...,...,...,...
73,264,Washington,501,204
74,2483,Oregon,353,369
75,2084,Buffalo,169,518
76,23,San José State,312,197


In [43]:
# find certain team(s)
actual_off_def.query('id == 41 or id == 152 or id == 201 or id == 326 or id == 2390')


Unnamed: 0,id,team,actual_Off,actual_Def
34,2390,Miami,549,190
45,201,Oklahoma,252,318
31,326,Texas State,511,334
37,41,UConn,624,279
62,152,NC State,143,460
