# Week 15 -- Kicking Data Collection

In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import time
import warnings
warnings.simplefilter('ignore')

In [2]:
%%capture

from tqdm import tqdm_notebook as tqdm
from tqdm import tnrange
tqdm().pandas()

In [3]:
pd.set_option('display.max_columns', None)

In [4]:
pd.set_option('display.float_format', lambda x: '%.2f' % x)

## Initiating DataFrame

In [5]:
kicking_df = pd.DataFrame(columns = ['PLAYER', 'POSITION', 'TEAM', 'GP', 'FGM_A', 'FG%', 'LNG', '_1_19', '_20_29', 
                                     '_30_39', '_40_49', 'OVER50', 'XPM_A', 'XP%', 'PTS'])

## Scraping Functions

In [6]:
def get_name_pos_team_missing_team(soup):
    """
    This function grabs the name, position and team for a given kicker from a webpage you are scraping.
    
    Parameters:
        soup: the parsed html searching through.
        
    Returns:
        completed list of names, positions and teams for the entire soup.
    """
    container = soup.find('tbody')
    names = []
    positions = []
    teams = []
    for i in range(len(container.findAll('span', class_ = 'CellPlayerName--long'))):
        name = container.findAll('span', class_ = 'CellPlayerName--long')[i].get_text()
        name = name.lstrip('\n')
        name = name.partition('\n')[0]
        names.append(name)
        
        position = container.findAll('span', class_ = 'CellPlayerName--long')[i].get_text()
        position = position.lstrip('\n')
        position = position.split('\n')
        position = position[2]
        position = position.replace(' ', '')
        positions.append(position)
        
        team = container.findAll('span', class_ = 'CellPlayerName--long')[i].get_text()
        team = team.lstrip('\n')
        team = team.split('\n')
        team = team[5]
        team = team.replace(' ', '')
        teams.append(team)
        
    return names, positions, teams

In [7]:
def get_12first(soup):
    """
    This function scrapes the stats from the first column in a soup.
    
    Parameters:
        soup: the parsed html searching through.
        
    Returns:
        completed list for the first column.
    """
    container = soup.find('tbody')
    first_col = []
    count = 0
    length = len(container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number'))
    for i in range(length):
        if count in range(length):
            val = container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number')[count].get_text()
            val = val.replace('\n', '')
            val = val.replace(' ', '')
            first_col.append(val)
        
        count += 12    
    return first_col

In [8]:
def get_12second(soup):
    """
    This function scrapes the stats from the second column in a soup.
    
    Parameters:
        soup: the parsed html searching through.
        
    Returns:
        completed list for the second column.
    """
    container = soup.find('tbody')
    second_col = []
    count = 1
    length = len(container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number'))
    for i in range(length):
        if count in range(length):
            val = container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number')[count].get_text()
            val = val.replace('\n', '')
            val = val.replace(' ', '')
            second_col.append(val)
        
        count += 12    
    return second_col

In [9]:
def get_12third(soup):
    """
    This function scrapes the stats from the third column in a soup.
    
    Parameters:
        soup: the parsed html searching through.
        
    Returns:
        completed list for the third column.
    """
    container = soup.find('tbody')
    third_col = []
    count = 2
    length = len(container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number'))
    for i in range(length):
        if count in range(length):
            val = container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number')[count].get_text()
            val = val.replace('\n', '')
            val = val.replace(' ', '')
            third_col.append(val)
        
        count += 12    
    return third_col

In [10]:
def get_12fourth(soup):
    """
    This function scrapes the stats from the fourth column in a soup.
    
    Parameters:
        soup: the parsed html searching through.
        
    Returns:
        completed list for the fourth column.
    """
    container = soup.find('tbody')
    fourth_col = []
    count = 3
    length = len(container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number'))
    for i in range(length):
        if count in range(length):
            val = container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number')[count].get_text()
            val = val.replace('\n', '')
            val = val.replace(' ', '')
            fourth_col.append(val)
        
        count += 12    
    return fourth_col

In [11]:
def get_12fifth(soup):
    """
    This function scrapes the stats from the fifth column in a soup.
    
    Parameters:
        soup: the parsed html searching through.
        
    Returns:
        completed list for the fifth column.
    """
    container = soup.find('tbody')
    fifth_col = []
    count = 4
    length = len(container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number'))
    for i in range(length):
        if count in range(length):
            val = container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number')[count].get_text()
            val = val.replace('\n', '')
            val = val.replace(' ', '')
            fifth_col.append(val)
        
        count += 12    
    return fifth_col

In [12]:
def get_12sixth(soup):
    """
    This function scrapes the stats from the sixth column in a soup.
    
    Parameters:
        soup: the parsed html searching through.
        
    Returns:
        completed list for the sixth column.
    """
    container = soup.find('tbody')
    sixth_col = []
    count = 5
    length = len(container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number'))
    for i in range(length):
        if count in range(length):
            val = container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number')[count].get_text()
            val = val.replace('\n', '')
            val = val.replace(' ', '')
            sixth_col.append(val)
        
        count += 12    
    return sixth_col

In [13]:
def get_12seventh(soup):
    """
    This function scrapes the stats from the seventh column in a soup.
    
    Parameters:
        soup: the parsed html searching through.
        
    Returns:
        completed list for the seventh column.
    """
    container = soup.find('tbody')
    seventh_col = []
    count = 6
    length = len(container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number'))
    for i in range(length):
        if count in range(length):
            val = container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number')[count].get_text()
            val = val.replace('\n', '')
            val = val.replace(' ', '')
            seventh_col.append(val)
        
        count += 12    
    return seventh_col

In [14]:
def get_12eighth(soup):
    """
    This function scrapes the stats from the eigth column in a soup.
    
    Parameters:
        soup: the parsed html searching through.
        
    Returns:
        completed list for the eigth column.
    """
    container = soup.find('tbody')
    eighth_col = []
    count = 7
    length = len(container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number'))
    for i in range(length):
        if count in range(length):
            val = container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number')[count].get_text()
            val = val.replace('\n', '')
            val = val.replace(' ', '')
            eighth_col.append(val)
        
        count += 12    
    return eighth_col

In [15]:
def get_12ninth(soup):
    """
    This function scrapes the stats from the ninth column in a soup.
    
    Parameters:
        soup: the parsed html searching through.
        
    Returns:
        completed list for the ninth column.
    """
    container = soup.find('tbody')
    ninth_col = []
    count = 8
    length = len(container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number'))
    for i in range(length):
        if count in range(length):
            val = container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number')[count].get_text()
            val = val.replace('\n', '')
            val = val.replace(' ', '')
            ninth_col.append(val)
        
        count += 12    
    return ninth_col

In [16]:
def get_12tenth(soup):
    """
    This function scrapes the stats from the tenth column in a soup.
    
    Parameters:
        soup: the parsed html searching through.
        
    Returns:
        completed list for the tenth column.
    """
    container = soup.find('tbody')
    tenth_col = []
    count = 9
    length = len(container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number'))
    for i in range(length):
        if count in range(length):
            val = container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number')[count].get_text()
            val = val.replace('\n', '')
            val = val.replace(' ', '')
            tenth_col.append(val)
        
        count += 12    
    return tenth_col

In [17]:
def get_12eleventh(soup):
    """
    This function scrapes the stats from the eleventh column in a soup.
    
    Parameters:
        soup: the parsed html searching through.
        
    Returns:
        completed list for the eleventh column.
    """
    container = soup.find('tbody')
    eleventh_col = []
    count = 10
    length = len(container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number'))
    for i in range(length):
        if count in range(length):
            val = container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number')[count].get_text()
            val = val.replace('\n', '')
            val = val.replace(' ', '')
            eleventh_col.append(val)
        
        count += 12    
    return eleventh_col

In [18]:
def get_12twelfth(soup):
    """
    This function scrapes the stats from the twelfth column in a soup.
    
    Parameters:
        soup: the parsed html searching through.
        
    Returns:
        completed list for the twelfth column.
    """
    container = soup.find('tbody')
    twelfth_col = []
    count = 11
    length = len(container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number'))
    for i in range(length):
        if count in range(length):
            val = container.findAll('td', class_ = 'TableBase-bodyTd TableBase-bodyTd--number')[count].get_text()
            val = val.replace('\n', '')
            val = val.replace(' ', '')
            twelfth_col.append(val)
        
        count += 12    
    return twelfth_col

In [19]:
def get_kicking(url):
    """
    This function scrapes all player info and stats for the kicking dataframe.
    
    Parameters:
        url: the site you need to scrape.
        
    Returns:
        completed kicking dataframe.
    """
    html = requests.get(url)
    soup = BeautifulSoup(html.content, 'html.parser')
    
    kicking_df['PLAYER'], kicking_df['POSITION'], kicking_df['TEAM'] = get_name_pos_team_missing_team(soup)
    kicking_df['GP'] = get_12first(soup)
    kicking_df['FGM_A'] = get_12second(soup)
    kicking_df['FG%'] = get_12third(soup)
    kicking_df['LNG'] = get_12fourth(soup)
    kicking_df['_1_19'] = get_12fifth(soup)
    kicking_df['_20_29'] = get_12sixth(soup)
    kicking_df['_30_39'] = get_12seventh(soup)
    kicking_df['_40_49'] = get_12eighth(soup)
    kicking_df['OVER50'] = get_12ninth(soup)
    kicking_df['XPM_A'] = get_12tenth(soup)
    kicking_df['XP%'] = get_12eleventh(soup)
    kicking_df['PTS'] = get_12twelfth(soup)

## Scraping Kicking Data

In [20]:
url = 'https://www.cbssports.com/nfl/stats/player/kicking/nfl/regular/all/'
get_kicking(url)

In [21]:
kicking_df.head()

Unnamed: 0,PLAYER,POSITION,TEAM,GP,FGM_A,FG%,LNG,_1_19,_20_29,_30_39,_40_49,OVER50,XPM_A,XP%,PTS
0,Younghoe Koo,K,ATL,12,34,97.1,54,0-0,8-8,10-10,8-9,7-7,28,89.3,124
1,Jason Sanders,K,MIA,13,32,93.8,56,1-1,7-7,4-4,10-12,8-8,30,100.0,120
2,Rodrigo Blankenship,K,IND,13,30,90.0,44,0-0,7-7,11-12,9-10,0-1,37,94.6,116
3,Daniel Carlson,K,LV,13,28,92.9,54,0-0,13-13,6-6,3-5,4-4,39,97.4,116
4,Tyler Bass,K,BUF,13,31,80.6,58,1-1,6-6,6-9,8-9,4-6,40,95.0,113


## Pickle DataFrame

In [22]:
kicking_df.to_pickle('kicking_data')