In [44]:
import json
import urllib
import pandas as pd
import numpy as np
import http
from urllib.error import URLError, HTTPError, ContentTooShortError
from datetime import datetime
import time

def download(url, num_retries=5): 
#     print('Downloading:', url)
    try: 
        time.sleep(1)
        html = urllib.request.urlopen(url).read()
    except (URLError, HTTPError, ContentTooShortError, http.client.HTTPException) as e: 
        print('Download error:', e.reason,url)
        html = None 
        if num_retries > 0: 
            if hasattr(e, 'code') and 500 <= e.code < 600: 
                time.sleep(10)
                # recursively retry 5xx HTTP errors 
                return download(url, num_retries - 1) 
    return html


def nfl_calendar(season):
    url = "http://site.api.espn.com/apis/site/v2/sports/football/nfl/scoreboard?dates={}".format(season)
    resp = urllib.request.urlopen(url)
    txt = json.loads(resp.read())['leagues'][0]['calendar']
    full_schedule = pd.DataFrame()
    
    
    for val in range(0,len(txt)):
        if 'entries' in txt[val].keys():
            pre = pd.json_normalize(
                data = txt[val],
                record_path = 'entries',
                meta = ['label', 'value', 'startDate','endDate'],
                meta_prefix = 'seasontype.',
                errors = 'ignore'
            )
            full_schedule = pd.concat([full_schedule, pre], ignore_index=True)
        else:
            full_schedule = pd.concat([full_schedule, pd.json_normalize(txt[val])], ignore_index=True)
    full_schedule['year'] = season
    full_schedule['daysForward'] = (pd.to_datetime(full_schedule['endDate'])-pd.to_datetime(full_schedule['startDate'])).dt.days+1
    date = pd.to_datetime(pd.to_datetime(full_schedule['startDate']).dt.date,format='%Y-%m-%d')
    full_schedule['str_startDate']=date.apply(lambda x: x.strftime('%Y%m%d'))
    full_schedule['url']="http://site.api.espn.com/apis/site/v2/sports/football/nfl/scoreboard?groups=20&limit=1000&dates="
    full_schedule['url'] = full_schedule[['url','str_startDate']].apply(lambda x: 
                        ''.join(x),axis=1)
    full_schedule['daysForward'] = full_schedule['daysForward'].apply(str)
    full_schedule['url'] = full_schedule[['url','daysForward']].apply(lambda x: 
                        '&daysForward='.join(x),axis=1)
    full_schedule = full_schedule[['year','value','str_startDate','daysForward','url']+
                                  [col for col in full_schedule.columns 
                                   if col not in ['year','value','str_startDate','daysForward','url']]]
    
    full_schedule['group'] = 20
    
    return full_schedule
   



pd.set_option('display.max_columns',None)

calendar_table = pd.DataFrame()
calendar = {}
for x in range(2001,2022):
    year_calendar = nfl_calendar(x)
    calendar_table = calendar_table.append(year_calendar)
    year_calendar.to_json(orient='records')
    calendar[x]=year_calendar
    
data_dict = {
    key: calendar[key].to_dict(orient='records')
    for key in calendar.keys()
}

with open('nfl_calendar.json','w') as fp:
    json.dump(
    data_dict,
    fp,
    indent=4,
    sort_keys=True)    
calendar_table.to_csv('nfl_calendar_2002_2021.csv')

In [88]:
def nfl_schedule(year):
    ev = pd.DataFrame()
    print(f"Working on {year}")
    season_types = ['pre','reg','post']
    for i in season_types:
        url = "http://site.api.espn.com/apis/site/v2/sports/football/nfl/scoreboard?seasontype={}&limit=1000&dates={}".format(i, year)
        resp = download(url=url)
        events_txt = json.loads(resp)
        events = events_txt['events']
        for event in events:
            bad_keys = ['linescores', 'statistics', 'leaders',  'records']
            for k in bad_keys:
                if k in event['competitions'][0]['competitors'][0].keys():
                    del event['competitions'][0]['competitors'][0][k]
                if k in event['competitions'][0]['competitors'][1].keys():
                    del event['competitions'][0]['competitors'][1][k]
            if 'links' in event['competitions'][0]['competitors'][0]['team'].keys():
                del event['competitions'][0]['competitors'][0]['team']['links']
            if 'links' in event['competitions'][0]['competitors'][1]['team'].keys():
                del event['competitions'][0]['competitors'][1]['team']['links']    
            if event['competitions'][0]['competitors'][0]['homeAway']=='home':
                event['competitions'][0]['home'] = event['competitions'][0]['competitors'][0]['team']    
            else: 
                event['competitions'][0]['away'] = event['competitions'][0]['competitors'][0]['team']
            if event['competitions'][0]['competitors'][1]['homeAway']=='away':
                event['competitions'][0]['away'] = event['competitions'][0]['competitors'][1]['team']
            else: 
                event['competitions'][0]['home'] = event['competitions'][0]['competitors'][1]['team']

            del_keys = ['notes','leaders','competitors', 'broadcasts','geoBroadcasts', 'headlines']
            for k in del_keys:
                if k in event['competitions'][0].keys():
                    del event['competitions'][0][k]
                    
            ev = ev.append(pd.json_normalize(event['competitions'][0]))
    ev['season']=year

    return ev.drop_duplicates()

f = nfl_schedule(2020)
pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows',None)
f

Working on 2020
pre
reg
post


In [90]:
schedule_table = pd.DataFrame()
schedule = {}
for x in range(2001,2022):
    year_schedule = nfl_schedule(x)
    year_schedule.to_csv(f"nfl/schedules/nfl_games_info_{x}.csv")
    schedule_table = schedule_table.append(year_schedule)
    schedule[x]=year_schedule


schedule_table.to_csv('nfl_games_info_2002_2021.csv')
    
# event['competitions'][0]

Working on 2001
pre
reg
post
Working on 2002
pre
reg
post
Working on 2003
pre
Download error: Bad Gateway http://site.api.espn.com/apis/site/v2/sports/football/nfl/scoreboard?seasontype=pre&limit=1000&dates=2003
Download error: Bad Gateway http://site.api.espn.com/apis/site/v2/sports/football/nfl/scoreboard?seasontype=pre&limit=1000&dates=2003
Download error: Bad Gateway http://site.api.espn.com/apis/site/v2/sports/football/nfl/scoreboard?seasontype=pre&limit=1000&dates=2003
reg
post
Working on 2004
pre
reg
post
Working on 2005
pre
reg
post
Working on 2006
pre
reg
post
Working on 2007
pre
reg
post
Working on 2008
pre
reg
post
Working on 2009
pre
reg
post
Working on 2010
pre
reg
post
Working on 2011
pre
reg
post
Working on 2012
pre
reg
post
Working on 2013
pre
reg
post
Working on 2014
pre
reg
post
Working on 2015
pre
reg
post
Working on 2016
pre
reg
post
Working on 2017
pre
reg
post
Working on 2018
pre
reg
post
Working on 2019
pre
reg
post
Working on 2020
pre
reg
post
Working on 2021
pr