# Web Scraping Positions

## Setup Environment

In [1]:
import requests
import lxml.html as lh
import pandas as pd
# Using as guideline: https://towardsdatascience.com/web-scraping-html-tables-with-python-c9baba21059

## Create Function to Extract Table

In [99]:
def scrape_table(url, team, year=2019):

    #Create a handle, page, to handle the contents of the website
    page = requests.get(url)
    
    #Store the contents of the website under doc
    doc = lh.fromstring(page.content)
    
    #Parse data that are stored between <tr>..</tr> of HTML
    tr_elements = doc.xpath('//tr')
    
    ls = [len(T) for T in tr_elements]
    mode = max(set(ls), key=ls.count)
    
    #For each row, store each first element (header) and an empty list
    col = [(t.text_content().strip(),[]) for t in tr_elements[0]]

    #Since out first row is the header, data is stored on the second row onwards
    for j in range(1,len(tr_elements)):

        #T is our j'th row
        T=tr_elements[j]

        #If row is not of size mode, the //tr data is not from our table 
        if len(T)!=mode:
            break

        #i is the index of our column
        i=0

        #Iterate through each element of the row
        for t in T.iterchildren():
            data=str(t.text_content()).strip()
            #Append the data to the empty list of the i'th column
            col[i][1].append(data)
            #Increment i for the next column
            i+=1

    # [len(C) for (title,C) in col]
    df=pd.DataFrame({title:column for (title,column) in col})
    df['Name'] = df['Name'].str.split('\n').str[0].str.strip()
    df['Merge Name'] = df['Name'].str.split(' ',1).str[0].str[0] + '.' + df['Name'].str.split(' ').str[1]
    df['Team'] = team.replace('-',' ').title()
    df['Year'] = year
    
    return df

## Define Team List

In [None]:
team_ls = ['Arizona Cardinals','Atlanta Falcons','Baltimore Ravens','Buffalo Bills','Carolina Panthers','Chicago Bears','Cincinnati Bengals',
           'Cleveland Browns','Dallas Cowboys','Denver Broncos','Detroit Lions','Green Bay Packers','Houston Texans','Indianapolis Colts',
           'Jacksonville Jaguars','Kansas City Chiefs','Las Vegas Raiders','Los Angeles Chargers','Los Angeles Rams','Miami Dolphins','Minnesota Vikings','New England Patriots',
           'New Orleans Saints','New York Giants','New York Jets','Philadelphia Eagles','Pittsburgh Steelers','San Francisco 49ers','Seattle Seahawks',
           'Tampa Bay Buccaneers','Tennessee Titans','Washington Redskins']

team_ls = [team.lower().replace(' ','-') for team in team_ls]

## Iterate through List

In [100]:
url='https://www.lineups.com/nfl/roster/'

df = pd.DataFrame()

for team in team_ls:
    print("Starting {}".format(team))
    df = pd.concat([df, scrape_table(url+team, team)])
    
df.head()

Starting arizona-cardinals
https://www.lineups.com/nfl/roster/arizona-cardinals
Starting atlanta-falcons
https://www.lineups.com/nfl/roster/atlanta-falcons
Starting baltimore-ravens
https://www.lineups.com/nfl/roster/baltimore-ravens
Starting buffalo-bills
https://www.lineups.com/nfl/roster/buffalo-bills
Starting carolina-panthers
https://www.lineups.com/nfl/roster/carolina-panthers
Starting chicago-bears
https://www.lineups.com/nfl/roster/chicago-bears
Starting cincinnati-bengals
https://www.lineups.com/nfl/roster/cincinnati-bengals
Starting cleveland-browns
https://www.lineups.com/nfl/roster/cleveland-browns
Starting dallas-cowboys
https://www.lineups.com/nfl/roster/dallas-cowboys
Starting denver-broncos
https://www.lineups.com/nfl/roster/denver-broncos
Starting detroit-lions
https://www.lineups.com/nfl/roster/detroit-lions
Starting green-bay-packers
https://www.lineups.com/nfl/roster/green-bay-packers
Starting houston-texans
https://www.lineups.com/nfl/roster/houston-texans
Starting

Unnamed: 0,Pos,Name,Number,Rating,Ranking,Height,Weight,Age,Birthday,Exp.,Drafted,Draft Round,Draft Pick,College,Merge Name,Team,Year
0,QB,Kyler Murray,1,77,#27 QB,"5'10""",207,22,8/7/97,2,2019,1.0,1.0,Oklahoma,K.Murray,Arizona Cardinals,2019
1,QB,Drew Anderson,3,70,#62 QB,"6'4""",223,24,10/18/95,1,2019,,,Murray State,D.Anderson,Arizona Cardinals,2019
2,QB,Brett Hundley,7,67,#120 QB,"6'3""",226,27,6/15/93,6,2015,5.0,147.0,UCLA,B.Hundley,Arizona Cardinals,2019
3,RB,Kenyan Drake,41,82,#36 RB,"6'1""",210,26,1/26/94,5,2016,3.0,73.0,Alabama,K.Drake,Arizona Cardinals,2019
4,RB,Chase Edmonds,29,74,#128 RB,"5'9""",205,24,4/13/96,3,2018,4.0,134.0,Fordham,C.Edmonds,Arizona Cardinals,2019
