In [96]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
from datetime import date
import json
import os

import numpy as np

import re

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By

from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import WebDriverWait

from selenium.common.exceptions import TimeoutException

In [14]:
# Get MyFantasyLeague players' name, team name, position
urlString = "https://api.myfantasyleague.com/2022/export?TYPE=players"
response = requests.get(urlString)
soup = BeautifulSoup(response.content,'xml')
data = []
players = soup.find_all('player')
for i in range(len(players)):
    rows = [players[i].get("id"), players[i].get("name"), players[i].get("position"), players[i].get("team")]
    data.append(rows)
player_df = pd.DataFrame(data)
player_df.columns=['PlayerID','Name', 'Position', 'Team']
player_df

Unnamed: 0,PlayerID,Name,Position,Team
0,0151,"Bills, Buffalo",TMWR,BUF
1,0152,"Colts, Indianapolis",TMWR,IND
2,0153,"Dolphins, Miami",TMWR,MIA
3,0154,"Patriots, New England",TMWR,NEP
4,0155,"Jets, New York",TMWR,NYJ
...,...,...,...,...
2733,15994,"Weatherford, Sterling",LB,IND
2734,15995,"Turner, DJ",WR,LVR
2735,15996,"Virgil, Jalen",WR,DEN
2736,15997,"Davis, Danny",WR,GBP


In [53]:
# Clean MFL data
mfl_df = player_df.copy()
# Select only relevant positions
mfl_df = mfl_df.loc[mfl_df['Position'].isin(['QB', 'WR', 'RB', 'TE', 'PK', 'Def'])]
mfl_df = mfl_df.reset_index(drop=True)

# Clean Name column
to_join = mfl_df['Name'].str.split(", ", n=1, expand=True)
to_join.columns = ['lname', 'fname']
to_join['Name'] = to_join['fname'] + " " + to_join['lname']
mfl_df['Name'] = to_join['Name']
# Change to Title Case
mfl_df.Name = mfl_df.Name.str.upper()
# Drop punctuation
mfl_df.Name = mfl_df.Name.str.replace(".", "")
mfl_df.Name = mfl_df.Name.str.replace(",", "")
mfl_df.Name = mfl_df.Name.str.replace("'", "")

# Clean position column
mfl_df['Position'] = mfl_df['Position'].replace('Def', 'DF')

# Clean Team column
mfl_df['Team'] = mfl_df['Team'].replace('FA*', 'FA')

# Change column names
mfl_df.columns = ['id_mfl', 'player', 'pos_mfl', 'team']
mfl_df


  from ipykernel import kernelapp as app


Unnamed: 0,id_mfl,player,pos_mfl,team
0,0501,BUFFALO BILLS,DF,BUF
1,0502,INDIANAPOLIS COLTS,DF,IND
2,0503,MIAMI DOLPHINS,DF,MIA
3,0504,NEW ENGLAND PATRIOTS,DF,NEP
4,0505,NEW YORK JETS,DF,NYJ
...,...,...,...,...
1089,15992,BJ BAYLOR,RB,FA
1090,15993,LANCE MCCUTCHEON,WR,LAR
1091,15995,DJ TURNER,WR,LVR
1092,15996,JALEN VIRGIL,WR,DEN


In [43]:
# Get OurLads data
# Set Selenium settings
capa = DesiredCapabilities.CHROME
capa["pageLoadStrategy"] = "none"
# Scrape web for stats
url = f"https://www.ourlads.com/nfldepthcharts/depthcharts.aspx"

PATH = "/Applications/chromedriver"
driver = webdriver.Chrome(service=Service(PATH), desired_capabilities=capa)
wait = WebDriverWait(driver, 20)
driver.get(url)

wait.until(EC.presence_of_element_located((By.XPATH, "//table[@id='ctl00_phContent_gvChart']")))
driver.execute_script("window.stop();")

ourlads_scrape = pd.read_html(driver.find_element(By.XPATH, value="//table[@id='ctl00_phContent_gvChart']").get_attribute("outerHTML"))
ourlads_scrape = ourlads_scrape[0]

In [90]:
# Clean ourlads_df data
df = ourlads_scrape.copy()
df = df[['Team', 'Pos', 'Player 1', 'Player 2','Player 3', 'Player 4', 'Player 5']]

# Transform columns into rows
df1 = df[['Team', 'Pos', 'Player 1']]
df1 = df1.rename(columns={'Player 1':'Player'})
df1['posRank'] = "1"

df2 = df[['Team', 'Pos', 'Player 2']]
df2 = df2.rename(columns={'Player 2':'Player'})
df2['posRank'] = "2"

df3 = df[['Team', 'Pos', 'Player 3']]
df3 = df3.rename(columns={'Player 3':'Player'})
df3['posRank'] = "3"

df4 = df[['Team', 'Pos', 'Player 4']]
df4 = df4.rename(columns={'Player 4':'Player'})
df4['posRank'] = "4"

df5 = df[['Team', 'Pos', 'Player 5']]
df5 = df5.rename(columns={'Player 5':'Player'})
df5['posRank'] = "5"

complete = pd.concat([df1, df2, df3, df4, df5], axis=0, ignore_index=True)

# Clean Position column
# Select only relevant positions
posList = ['LWR', 'RWR', 'SWR', 'TE', 'QB', 'RB', 'PK', 'PR', 'KR', 'RES']
ourlads_df = complete.loc[complete['Pos'].isin(posList)]
# Convert WR roles to "WR"
ourlads_df['Pos'].replace(["LWR", "RWR", "SWR"], "WR", inplace=True)
ourlads_df['posRank'] = ourlads_df['Pos'] + ourlads_df['posRank']
ourlads_df = ourlads_df.reset_index(drop=True)
ourlads_df.dropna(inplace=True)
ourlads_df.drop_duplicates(subset=['Player', 'Team', 'Pos'], inplace=True)

# Create columns for KRs and PRs
krs = ourlads_df.loc[ourlads_df.Pos=='KR']
krs = krs.drop(columns=['Pos'])
krs.columns = ['Team', 'Player', 'KR']
prs = ourlads_df.loc[ourlads_df.Pos=='PR']
prs = prs.drop(columns=['Pos'])
prs.columns = ['Team', 'Player', 'PR']
# Join pr and pk dfs back onto main ourlads df
ourlads_df = ourlads_df.merge(krs, how='left', on=['Player', 'Team']).merge(prs, how='left', on=['Player', 'Team'])
ourlads_df['KR'].fillna("NO", inplace=True)
ourlads_df['PR'].fillna("NO", inplace=True)

# Clean name column
names = ourlads_df['Player'].str.split(" ", n=2, expand=True)
names.columns = ['a', 'b', 'c']
names['a'] = names['a'].str.replace(",", "")
ourlads_df['Player'] = names['b'] + " " + names['a']
# Change to Title Case
ourlads_df.Player = ourlads_df.Player.str.upper()
# Drop punctuation
ourlads_df.Player = ourlads_df.Player.str.replace(".", "")
ourlads_df.Player = ourlads_df.Player.str.replace(",", "")
ourlads_df.Player = ourlads_df.Player.str.replace("'", "")

# Change column names and order
ourlads_df = ourlads_df[['Player', 'Pos', 'Team', 'posRank', 'KR', 'PR']]
ourlads_df.columns = ['player', 'pos_ol', 'team', 'posRank', 'KR', 'PR']

# Remove separate rows for PRs and KRs
ourlads_df = ourlads_df.loc[(ourlads_df.pos_ol!="KR")]
ourlads_df = ourlads_df.loc[(ourlads_df.pos_ol!="PR")]

# Drop position column
ourlads_df.drop(columns=['pos_ol'], inplace=True)
ourlads_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,player,team,posRank,KR,PR
0,AJ GREEN,ARZ,WR1,NO,NO
1,MARQUISE BROWN,ARZ,WR1,NO,NO
2,RONDALE MOORE,ARZ,WR1,KR1,PR1
3,ZACH ERTZ,ARZ,TE1,NO,NO
4,KYLER MURRAY,ARZ,QB1,NO,NO
...,...,...,...,...,...
1147,JULIUS CHESTNUT,TEN,RB5,NO,NO
1148,CURTIS HODGES,WAS,TE5,NO,NO
1149,ELI WOLF,WAS,TE5,NO,NO
1150,JARET PATTERSON,WAS,RB5,KR3,NO


In [121]:
# Merge MyFantasyLeague and OurLads data
mfl_ol_df = mfl_df.merge(ourlads_df, how='left', on=['player', 'team'])
# Clean merged df
mfl_ol_df.loc[mfl_ol_df['pos_mfl']=='DF', 'posRank'] = "DF1"
mfl_ol_df['KR'].fillna("NO", inplace=True)
mfl_ol_df['PR'].fillna("NO", inplace=True)

# Clean posRanks
mfl_ol_df['posRank'] = mfl_ol_df['posRank'].map({
    'RES1':'RES',
    'RES2':'RES',
    'RES3':'RES',
    'RES4':'RES',
    'RES5':'RES',
    'QB1':'QB1', 
    'QB2':'QB2', 
    'QB3':'QB3', 
    'QB4':'QB3',
    'QB5':'QB3', 
    'RB1':'RB1', 
    'RB2':'RB2', 
    'RB3':'RB3', 
    'RB4':'RB3', 
    'RB5':'RB3',
    'RB6':'RB3', 
    'RB7':'RB3', 
    'WR1': 'WR1', 
    'WR2': 'WR1', 
    'WR3': 'WR1', 
    'WR4': 'WR2', 
    'WR5': 'WR2', 
    'WR6': 'WR2', 
    'WR7': 'WR3', 
    'WR8': 'WR3',
    'TE1':'TE1', 
    'TE2':'TE2', 
    'TE3':'TE3', 
    'TE4':'TE3', 
    'TE5':'TE3', 
    'PK1':'PK1', 
    'PK2':'PK2', 
    'PK3':'PK3',
    'DF1':'DF1'
    })

mfl_ol_df['RES'] = "NO"
mfl_ol_df.loc[mfl_ol_df['posRank']=="RES", 'RES'] = "RES"
mfl_ol_df.loc[mfl_ol_df.posRank.isna(), 'posRank'] = mfl_ol_df.loc[mfl_ol_df.posRank.isna(), 'pos_mfl'] + "3"
mfl_ol_df.loc[mfl_ol_df.posRank=="RES", 'posRank'] = mfl_ol_df.loc[mfl_ol_df.posRank=="RES", 'pos_mfl'] + "3"
mfl_ol_df

Unnamed: 0,id_mfl,player,pos_mfl,team,posRank,KR,PR,RES
0,0501,BUFFALO BILLS,DF,BUF,DF1,NO,NO,NO
1,0502,INDIANAPOLIS COLTS,DF,IND,DF1,NO,NO,NO
2,0503,MIAMI DOLPHINS,DF,MIA,DF1,NO,NO,NO
3,0504,NEW ENGLAND PATRIOTS,DF,NEP,DF1,NO,NO,NO
4,0505,NEW YORK JETS,DF,NYJ,DF1,NO,NO,NO
...,...,...,...,...,...,...,...,...
1089,15992,BJ BAYLOR,RB,FA,RB3,NO,NO,NO
1090,15993,LANCE MCCUTCHEON,WR,LAR,WR1,NO,NO,NO
1091,15995,DJ TURNER,WR,LVR,WR3,NO,NO,NO
1092,15996,JALEN VIRGIL,WR,DEN,WR2,NO,NO,NO
