# NFL Player Data Scraping

In [10]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from adjustText import adjust_text

import requests
import csaps

sns.set(style="whitegrid", 
        palette="pastel", 
        font='Circular Spotify Text', 
        font_scale = 1.1, 
        rc= {'grid.color': '.95', 'xtick.color': '.3', 'ytick.color': '.3'})


In [2]:
%%capture
from tqdm import tqdm_notebook as tqdm
tqdm().pandas()

## Data fetching functions

In [3]:
def get_teams(start=1970, stop=2019):


    years = list(range(start, stop+1))
    team_dict = {}

    for year in years:

        try:
            response = requests.get('http://www.nfl.com/feeds-rs/teams/{}.json'.format(year)).json()
        except requests.exceptions.RequestException as e:
            print(e)

        teams = response['teams']
        team_list = []

        for team in teams:

            team_list.append(team['teamId'])
            
        team_dict.update({'{}'.format(year): team_list})
            

    return team_dict
    

In [4]:
def get_players(team_dict):
    
    df = pd.DataFrame(columns=['season',
                                   'nflId',
                                   'status',
                                   'displayName',
                                   'firstName',
                                   'lastName',
                                   'esbId',
                                   'gsisId',
                                   'middleName',
                                   'birthDate',
                                   'homeTown',
                                   'collegeId',
                                   'collegeName',
                                   'positionGroup',
                                   'position',
                                   'height',
                                   'weight',
                                   'teamAbbr',
                                   'teamSeq',
                                   'teamId',
                                   'teamFullName'
                                  ])
    
    length = 0
    
    for k, v in team_dict.items():
        length += len(v)
    
    with tqdm(total=length, desc='Requests') as pbar:
    
        for season, teams in team_dict.items():        
            for teamId in teams:

                try:
                    response = requests.get('http://www.nfl.com/feeds-rs/roster/{}/{}.json'.format(teamId, season)).json()
                    pbar.update(1)
                except requests.exceptions.RequestException as e:
                    print(e)
                    continue

                players = response['teamPlayers']

                for player in players:
                    df = df.append(player, ignore_index=True)

    return df
    

In [13]:
start = 2010
end = 2019

teams = get_teams(start, end)

players = get_players(teams)

players.to_csv('nfl_players_{}-{}.csv'.format(start, end))

HBox(children=(IntProgress(value=0, description='Requests', max=340, style=ProgressStyle(description_width='in…


