Import Python modules.

In [1]:
import json
import pandas as pd
import numpy as np

1. Read regular and postseason basic game stats JSON file from an input year into pandas objects.
2. Concatinate regular and postseason games DataFrame into a larger DataFrame (N-games x 13), with game results and filtered game information.

In [2]:
cfb_year = input('Input college football year?')

json_regular_season = cfb_year + '_cfb_JSON-CSV/' + cfb_year + '-basicGameStatsCFB-JSON-regular.json'
json_post_season = cfb_year + '_cfb_JSON-CSV/' + cfb_year + '-basicGameStatsCFB-JSON-postseason.json'

regular_season = pd.read_json(json_regular_season)
post_season = pd.read_json(json_post_season)

# New DataFrame, but select 13 columns, not 33 columns
regular_season_games = pd.DataFrame(regular_season, columns=['id', 'season', 'season_type', 'week', 'completed', 'home_team', 'home_division', 'home_conference', 'home_points', 'away_team', 'away_division', 'away_conference', 'away_points'])
postseason_games = pd.DataFrame(post_season, columns=['id', 'season', 'season_type', 'week', 'completed', 'home_team', 'home_division', 'home_conference', 'home_points', 'away_team', 'away_division', 'away_conference', 'away_points'])

# Concatenate regular_season_games DataFrame and postseason_games DataFrame
cfb_season_games = pd.concat([regular_season_games, postseason_games], axis=0)


Input college football year? 2015


In [3]:
cfb_season_games.head()

Unnamed: 0,id,season,season_type,week,completed,home_team,home_division,home_conference,home_points,away_team,away_division,away_conference,away_points
0,400603840,2015,regular,1,True,South Carolina,fbs,SEC,17,North Carolina,fbs,ACC,13
1,400763593,2015,regular,1,True,UCF,fbs,American Athletic,14,Florida International,fbs,Conference USA,15
2,400756896,2015,regular,1,True,Wake Forest,fbs,ACC,41,Elon,fcs,CAA,3
3,400787299,2015,regular,1,True,Ball State,fbs,Mid-American,48,VMI,fcs,Southern,36
4,400763399,2015,regular,1,True,Central Michigan,fbs,Mid-American,13,Oklahoma State,fbs,Big 12,24


1. Read detailed postseason game stats JSON file into a viewable pandas object.
2. Split detailed postseason panda object into an [1] "id" and [2] "teams" pandas objects.
3. Split "teams" pandas for each game into four (4) lists...
   * [1] team_0, [2] team_0.location, [3] team_1, and [4] team_1.location

In [4]:
# load detailed postseason game stats JSON as a pandas object
json_detailed_postseason = cfb_year + '_cfb_JSON-CSV/' + cfb_year + '-postseason-1-detailedGamesCFB-JSON.json'
detailed_postseason = pd.read_json(json_detailed_postseason)

# split detailed_postseason pandas object to list id_only (N-games x 1)
id_only = detailed_postseason.drop(['teams'], axis=1)
rows = id_only.shape[0]
print(rows)

# split detailed_postseason pandas object to list teams (N-games x 1)
teams_only = detailed_postseason.drop(['id'], axis=1)

# split detailed_postseason 'DataFrame' into a [1] teams[j][0]['teams_key'] list
# & teams[j][1]['teams_key'] list (N-teams x 1)
def teams_column(all_games, team_number, teams_key):
    teams_list = []
    for game in range(all_games):
        teams_list.append((teams_only['teams'][game][team_number][teams_key]))
    return teams_list

team0_list = teams_column(rows, 0, 'school')
team1_list = teams_column(rows, 1, 'school')
team0_location_list = teams_column(rows, 0, 'homeAway')
team1_location_list = teams_column(rows, 1, 'homeAway')
team0_points_list = teams_column(rows, 0, 'points')
team1_points_list = teams_column(rows, 1, 'points')

# Combine the 4 list-Series into a DataFrame (N-teams x 4)
teams_detail = pd.DataFrame({'team_0': team0_list, 'team_0.location': team0_location_list, 'team_0.points': team0_points_list, 'team_1' : team1_list,  'team_1.location' : team1_location_list, 'team_1.points': team1_points_list})

# Join games id with team_0 and team_1 information
id_teams_locations_df = id_only.join(teams_detail, on=None)
id_teams_locations_df


41


Unnamed: 0,id,team_0,team_0.location,team_0.points,team_1,team_1.location,team_1.points
0,400852723,Air Force,home,36,California,away,55
1,400852725,Nevada,away,28,Colorado State,home,23
2,400852730,USC,away,21,Wisconsin,home,23
3,400852727,Memphis,home,10,Auburn,away,31
4,400852728,NC State,away,28,Mississippi State,home,51
5,400852729,Texas A&M,away,21,Louisville,home,27
6,400852733,Clemson,home,37,Oklahoma,away,17
7,400852732,Michigan State,away,0,Alabama,home,38
8,400852731,Houston,away,38,Florida State,home,24
9,400852737,Iowa,home,16,Stanford,away,45


In [22]:
# practicing reading JSON file with nested dictionaries
# read detailed category stats from 1 game, team_1 (get values from a "stats" dictionary)
# detailed stats list for each team will vary
test = detailed_postseason.drop(['id'], axis=1)
test02 = test['teams'][0][1]['stats']
test03 = pd.DataFrame(test02)

test04 = test03.to_numpy()
# print(test04)
# test04[0][0]

# number of statistics element *each team detailed stats will vary
stats_elements = test04.shape[0]

# will add if/else condition in "team_stats" function to update header list with either "team0." or "team1."
def team_stats(all_stats, row):
    team_stats_header = []
    for element in range(all_stats):
        team_stats_header.append((test04[element][row]))
    return team_stats_header
    
headers_list = team_stats(stats_elements, 0)
values_list = team_stats(stats_elements, 1)

# insert string to identify which team stat
headers_list = ['team0.' + i for i in headers_list]

# Transpose detailed stats values, add headers_list to DataFrame
team_detailed_stats = pd.DataFrame(values_list)
team_detailed_stats = team_detailed_stats.transpose()
team_detailed_stats.columns = headers_list
team_detailed_stats


Unnamed: 0,team0.fumblesRecovered,team0.rushingTDs,team0.puntReturnYards,team0.puntReturnTDs,team0.puntReturns,team0.passingTDs,team0.kickReturnYards,team0.kickReturnTDs,team0.kickReturns,team0.kickingPoints,...,team0.completionAttempts,team0.yardsPerPass,team0.rushingYards,team0.rushingAttempts,team0.yardsPerRushAttempt,team0.totalPenaltiesYards,team0.turnovers,team0.fumblesLost,team0.interceptions,team0.possessionTime
0,2,1,36,0,3,6,136,0,6,13,...,25-37,12.6,119,34,3.5,5-29,1,1,0,28:53
