In [1]:
# Import standard libraries
import time
# Import layer dependencies
from requests_html import AsyncHTMLSession
import numpy as np
import pandas as pd

In [2]:
# Config
# Create requests_html session
session = AsyncHTMLSession()

In [16]:
# Scrape the OurLads site to find out which players are punt/kick returners
r = await session.get("https://www.ourlads.com/nfldepthcharts/depthcharts.aspx")
# Render the JavaScript
await r.html.arender()
# Find the table to scrape
element = r.html.find('#ctl00_phContent_gvChart')[0].html

# Transform data
# Convert to dataframe
ourlads = pd.read_html(element)[0]
# Filter for only the needed columns
ourlads = ourlads[['Team', 'Pos', 'Player 1', 'Player 2', 'Player 3', 'Player 4', 'Player 5']]
# Rename columns of Position Ranks; limit number of ranks to three
ourlads = ourlads.rename(columns={
    'Player 1':'1',
    'Player 2':'2',
    'Player 3':'3',
    'Player 4':'3',
    'Player 5':'3',
})
# Filter only relevant positions
posList = ['LWR', 'RWR', 'SWR', 'TE', 'QB', 'RB', 'PK', 'PR', 'KR', 'RES']
ourlads = ourlads.loc[ourlads['Pos'].isin(posList)]

# Transpose columns to rows to get position ranks in row form rather than column
ourlads = ourlads.melt(id_vars=["Team", "Pos"], 
    var_name="posRank", 
    value_name="playerName")
# Create id_ourlads column
ourlads = ourlads.dropna(subset='playerName')
# ourlads['lName'] = ourlads['playerName'].str.split(", ", expand=True)[0]
ourlads['fName'] = ourlads['playerName'].str.split(", ", expand=True)[1].str.split(" ", expand=True)[0]
# ourlads['id_ourlads'] = ourlads['fName'] + " " + ourlads['lName']
# ourlads['id_ourlads'] = ourlads['id_ourlads'].str.replace(".", "")
# ourlads['id_ourlads'] = ourlads['id_ourlads'].str.title()
# ourlads['id_ourlads'] = [" ".join(x.split(" ")[:2]) for x in ourlads['id_ourlads']]
ourlads['id_ourlads'] = ourlads['playerName']

# Find Punt Returners & Kick Returners
condition1 = ourlads['Pos']=='PR'
condition2 = ourlads['Pos']=='KR'
condition3 = ourlads['posRank']=="1"
prs = ourlads.loc[condition1 & condition3]['id_ourlads'].unique()
krs = ourlads.loc[condition2 & condition3]['id_ourlads'].unique()
# Make columns to mark players who are punt returners or kick returners
ourlads.loc[ourlads['id_ourlads'].isin(prs), 'PR'] = True
ourlads.loc[ourlads['id_ourlads'].isin(krs), 'KR'] = True

# Filter out players who are not punt returners or kick returners
ourlads = ourlads.loc[(ourlads['PR']==True) | (ourlads['KR']==True)]
ourlads = ourlads.drop_duplicates(subset='id_ourlads', ignore_index=True)

ourlads.head()

  ourlads = pd.read_html(element)[0]


Unnamed: 0,Team,Pos,posRank,playerName,fName,id_ourlads,PR,KR
0,ARZ,SWR,1,"Dortch, Greg SF21",Greg,"Dortch, Greg SF21",True,
1,ARZ,KR,1,"Dallas, DeeJay U/Sea",DeeJay,"Dallas, DeeJay U/Sea",,True
2,ATL,PR,1,"Williams, Avery 21/5",Avery,"Williams, Avery 21/5",True,True
3,BAL,PR,1,"Wallace, Tylan 21/4",Tylan,"Wallace, Tylan 21/4",True,
4,BAL,KR,1,"Hill, Justice 19/4",Justice,"Hill, Justice 19/4",,True


In [17]:
# Identify any ourlads players who are not in the ids dataset
ids = pd.read_csv("test_data/lu_ids.csv")
ourlads_to_be_added = ourlads.loc[~ourlads['id_ourlads'].isin(ids['id_ourlads'])]
ourlads_to_be_added.to_csv("test_data/ourlads_to_be_added.csv", index=False)
ourlads_to_be_added

Unnamed: 0,Team,Pos,posRank,playerName,fName,id_ourlads,PR,KR
0,ARZ,SWR,1,"Dortch, Greg SF21",Greg,"Dortch, Greg SF21",True,
1,ARZ,KR,1,"Dallas, DeeJay U/Sea",DeeJay,"Dallas, DeeJay U/Sea",,True
2,ATL,PR,1,"Williams, Avery 21/5",Avery,"Williams, Avery 21/5",True,True
3,BAL,PR,1,"Wallace, Tylan 21/4",Tylan,"Wallace, Tylan 21/4",True,
4,BAL,KR,1,"Hill, Justice 19/4",Justice,"Hill, Justice 19/4",,True
5,BUF,SWR,1,"Shakir, Khalil 22/5",Khalil,"Shakir, Khalil 22/5",,True
6,BUF,PR,1,"Hardy, Daequan 24/6",Daequan,"Hardy, Daequan 24/6",True,
7,CAR,PR,1,"Smith-Marsette, Ihmir T/KC",Ihmir,"Smith-Marsette, Ihmir T/KC",True,
8,CAR,KR,1,"Blackshear, Raheem P/Buf",Raheem,"Blackshear, Raheem P/Buf",,True
9,CHI,PR,1,"CARTER, DEANDRE U/LV",DEANDRE,"CARTER, DEANDRE U/LV",True,


In [18]:
# Write to csv
ourlads_output = ourlads[['id_ourlads', 'PR', 'KR']]
ourlads_output.to_csv('test_data/ourlads.csv', index=False)
# This file can now be uploaded to s3