# Web Scraping Rosters

## Setup Environment

In [1]:
import requests
import lxml.html as lh
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time

## Create Team List

In [None]:
# Team list
team_ls = ['Arizona Cardinals','Atlanta Falcons','Baltimore Ravens','Buffalo Bills','Carolina Panthers','Chicago Bears','Cincinnati Bengals',
           'Cleveland Browns','Dallas Cowboys','Denver Broncos','Detroit Lions','Green Bay Packers','Houston Texans','Indianapolis Colts',
           'Jacksonville Jaguars','Kansas City Chiefs','Las Vegas Raiders','Los Angeles Chargers','Los Angeles Rams','Miami Dolphins','Minnesota Vikings','New England Patriots',
           'New Orleans Saints','New York Giants','New York Jets','Philadelphia Eagles','Pittsburgh Steelers','San Francisco 49ers','Seattle Seahawks',
           'Tampa Bay Buccaneers','Tennessee Titans','Washington Redskins']

# Format list for URL
team_ls = [team.lower().replace(' ','-') for team in team_ls]

## Create Year-Toggle Function

In [None]:
# Changes the year parameter on a given pages
def next_year(driver, year_idx):
    
    driver.find_element_by_xpath('//*[@id="main-dropdown"]').click()
    parentElement = driver.find_element_by_xpath('/html/body/app-root/app-nfl/app-roster/div/div/div[2]/div/div/div[1]/div/div/div')
    elementList = parentElement.find_elements_by_tag_name("button")
    elementList[year_idx].click()
    time.sleep(3)

## Create Scraping Function

In [None]:
# Create scraping function
def sel_scrape(driver, team, year):
    
    # Get main table
    main_table = driver.find_element_by_tag_name('table')
    
    # Scrape rows and header
    rows = [[td.text.strip() for td in row.find_elements_by_xpath(".//td")] for row in main_table.find_elements_by_xpath(".//tr")][1:]
    header = [[th.text.strip() for th in row.find_elements_by_xpath(".//th")] for row in main_table.find_elements_by_xpath(".//tr")][0]
    
    # compile in dataframe
    df=pd.DataFrame(rows,columns=header)
    
    # Edit data frame
    df['Merge Name'] = df['Name'].str.split(' ',1).str[0].str[0] + '.' + df['Name'].str.split(' ').str[1]
    df['Team'] = team.replace('-',' ').title()
    df['Year'] = year
    
    return df

## Run Script

In [401]:
if __main__ =='__name__':
    
    # Initialize basic objects needed: url, df and year list
    url='https://www.lineups.com/nfl/roster/'
    df = pd.DataFrame()
    years = [2020,2019,2018,2017]

    # Keep track of run time    
    start_time = time.time()
    
    # For every team in the list
    for team in team_ls:
        
        # Open the link
        driver = webdriver.Chrome()
        driver.get(url+team)

        # For each of the four years
        for idx in range(0,4):
            
            print("Starting {} {}".format(team, years[idx]))
            
            # Scrape the page
            df = pd.concat([df, sel_scrape(driver, team, years[idx])])

            # Change to next year
            next_year(driver, idx)
            
        driver. close()

    print("--- %s seconds ---" % (time.time() - start_time))
    
    # Export csv
    df.to_csv('../../1. Raw-Data/nfl-roster.csv', index=False)

1592946784.8835697
Starting arizona-cardinals 2020
Starting arizona-cardinals 2019
Starting arizona-cardinals 2018
Starting arizona-cardinals 2017
Starting atlanta-falcons 2020
Starting atlanta-falcons 2019
Starting atlanta-falcons 2018
Starting atlanta-falcons 2017
Starting baltimore-ravens 2020
Starting baltimore-ravens 2019
Starting baltimore-ravens 2018
Starting baltimore-ravens 2017
Starting buffalo-bills 2020
Starting buffalo-bills 2019
Starting buffalo-bills 2018
Starting buffalo-bills 2017
Starting carolina-panthers 2020
Starting carolina-panthers 2019
Starting carolina-panthers 2018
Starting carolina-panthers 2017
Starting chicago-bears 2020
Starting chicago-bears 2019
Starting chicago-bears 2018
Starting chicago-bears 2017
Starting cincinnati-bengals 2020
Starting cincinnati-bengals 2019
Starting cincinnati-bengals 2018
Starting cincinnati-bengals 2017
Starting cleveland-browns 2020
Starting cleveland-browns 2019
Starting cleveland-browns 2018
Starting cleveland-browns 2017
S

Unnamed: 0,Pos,Name,Number,Rating,Ranking,Height,Weight,Age,Birthday,Exp.,Drafted,Draft Round,Draft Pick,College,Merge Name,Team,Year
0,QB,Kyler Murray,1,77,#27 QB,"5'10""",207,22,8/6/97,2,2019,1.0,1.0,Oklahoma,K.Murray,Arizona Cardinals,2020
1,QB,Drew Anderson,3,70,#62 QB,"6'4""",223,24,10/17/95,1,2019,,,Murray State,D.Anderson,Arizona Cardinals,2020
2,QB,Brett Hundley,7,67,#120 QB,"6'3""",226,27,6/14/93,6,2015,5.0,147.0,UCLA,B.Hundley,Arizona Cardinals,2020
3,RB,Kenyan Drake,41,82,#36 RB,"6'1""",210,26,1/25/94,5,2016,3.0,73.0,Alabama,K.Drake,Arizona Cardinals,2020
4,RB,Chase Edmonds,29,74,#128 RB,"5'9""",205,24,4/12/96,3,2018,4.0,134.0,Fordham,C.Edmonds,Arizona Cardinals,2020
