In [1]:
# imports
import pandas as pd
import numpy as np
import sys
import argparse
from NFL_Dataloader import GameSummary,NFLAPI_Processor
from scrapers import ApiGameLinks,LoadProjections,LoadRankings
from NFL_RefMaps import TableColumns
from NFL_Metrics import SkillPoints
from sqlalchemy import create_engine
from sqlalchemy.types import VARCHAR

In [2]:
def update_table(conn,table,temp_table):
    sql = "REPLACE INTO "+table
    sql += " (select * from "+temp_table+")"
    conn.execute(sql)

def check_table(conn,table):
    sql = '''SELECT COUNT(*)
        FROM information_schema.tables
        WHERE table_name = '{}'
        '''.format(table)
    result = conn.execute(sql)
    return True if result.fetchone()[0] == 1 else False

def remove_tmp_tables(conn):
    sql = '''SELECT table_name
            FROM information_schema.tables
            WHERE table_name like '%%_tmp'
            '''

    result = conn.execute(sql)
    for row in result:
        conn.execute('DROP TABLE IF EXISTS %s'%row[0])

In [3]:
def load_fp_metrics(data,metric,week):
    prefix = 'fpros_'
    for key,val in data.items():
        table = prefix+key+'_'+metric
        tmp = table+'_tmp'
        if check_table(conn,table):
            val.to_sql(tmp, con=conn, if_exists='replace',dtype={'idx': VARCHAR(val.index.get_level_values('idx').str.len().max())})
            update_table(conn,table,tmp)
        else:
            val.to_sql(table, con=conn, if_exists='replace',dtype={'idx': VARCHAR(val.index.get_level_values('idx').str.len().max())})

In [4]:
# scrape play by play from API for week
def scrape_api(conn,season,week):
    print("Getting game links for "+str(season)+" week "+str(week)+" . . .")
    api_games = ApiGameLinks(season,week)
    print("Getting game ids . . .")
    gameids = api_games.get_gameids()
    pbp_df = pd.DataFrame(columns=TableColumns().nflapi['pbp_cols'])
    pbp_df = pbp_df.set_index('idx')
    for game in gameids:
        print("Processing "+str(game)+" ...")
        pbp = NFLAPI_Processor(game).process_nflapi()
        pbp_df = pd.concat([pbp_df,pbp],verify_integrity=True)
        pbp_df.to_sql('nfl_pbp_tmp', con=conn, if_exists='replace',dtype={'idx': VARCHAR(pbp_df.index.get_level_values('idx').str.len().max())})
        update_table(conn,'nfl_pbp','nfl_pbp_tmp')
        print("Table updated with "+str(game)+" stats.")

In [5]:
# generate game summaries
def generate_game_summaries(conn,season,week):
    gs = GameSummary(season,week)
    game_summary = gs.get_summary()
    skillpoints = gs.get_skillpoints()
    game_summary.to_sql('nfl_game_summary_tmp', con=conn, if_exists='replace',index='gameid')
    update_table('nfl_game_summary','nfl_game_summary_tmp')
    skillpoints.to_sql('nfl_team_skillpoints_tmp', con=conn, if_exists='replace',index='idx')
    update_table('nfl_team_skillpoints','nfl_team_skillpoints_tmp')

In [6]:
def scrape_fpros_stats(conn,season,week):
    proj = LoadProjections(season,week).projections
    rank = LoadRankings(season,week).rankings
    load_fp_metrics(proj,'projections',week)
    load_fp_metrics(rank,'rankings',week)

In [7]:
nfldb_engine = create_engine('mysql+pymysql://root:@localhost:3306/nfl_db')
conn = nfldb_engine.connect()

In [8]:
# scrape 2019 stats
# season = 2019
# weeks = np.arange(1,17).tolist()

# for week in weeks:
#     scrape_api(conn,season,week)

In [9]:
# scrape missing games from 2018
# season = 2018
# weeks = [16,17]
# for week in weeks:
#     scrape_api(conn,season,week)

In [10]:
# playoff weeks, omit pro bowl (week 21)
# seasons = np.arange(2010,2020).tolist()
# weeks = [17,18,19,20,22]
# for season in seasons:
#     for week in weeks:
#         scrape_api(conn,season,week)

In [11]:
# generate_game_summaries(conn,season,week)
# scrape_fpros_stats(conn,season,week)
season = 2019
weeks = [18,19,20,22]
for week in weeks:
    scrape_api(conn,season,week)

Getting game links for 2019 week 17 . . .


KeyError: '17'