# Imports

In [1]:
import re
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup as BS
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
from datetime import date, datetime, timedelta

# Create yesterday and today variables

In [2]:
yesterday = date.today()-timedelta(days=1)
today = date.today()

# Scrape current rankings

In [3]:
url = 'https://web.archive.org/web/20210302174949/https://keeptradecut.com/devy-rankings'
response = requests.get(url)
rankingsSoup = BS(response.content)

# Readable construction of dynasty-rankings dataframe

# values lists
rank_list = []
player_list = []
playerhref_list = []
positionrank_list = []
schoolabv_list = []
overalltier_list = []
value_list = []

# Relevant divs
playerlines = rankingsSoup.find_all('div', 'onePlayer')

for playerline in playerlines:
    
    # Grab current line's relevant player data
    rank = int(playerline.find('div', {'class':'rank-number'}).find('p').text)
    player = playerline.find('a').text
    playerhref = 'https://keeptradecut.com'+playerline.find('a')['href']
    positionrank = playerline.find('p', {'class':'position'}).text
    schoolabv = playerline.find('span', {'class' : 'player-team'}).text
    overalltier = playerline.find('div', 'player-info').find('p').text
    value = int(playerline.find('div', {'class':'value'}).find('p').text)
    
    # Append current line's data to value lists
    rank_list.append(rank)
    player_list.append(player)
    playerhref_list.append(playerhref)
    positionrank_list.append(positionrank)
    schoolabv_list.append(schoolabv)
    overalltier_list.append(overalltier)
    value_list.append(value)
    
# dict for df construction
rankings_dict = {
    'rank':rank_list,
    'player':player_list,
    'playerhref':playerhref_list,
    'positionrank':positionrank_list,
    'schoolabv':schoolabv_list,
    'overalltier':overalltier_list,
    'value':value_list
}

# Convert to DataFrame
wayback_devy_ranks = pd.DataFrame(rankings_dict)

# Add date column labelling today

In [4]:
wayback_devy_ranks['date'] = 'Mar. 2, 2021'

# Create historical values df, append new dynasty ranks and historical values to respective CSVs

In [5]:
wayback_devy_ranks['date'] = pd.to_datetime(wayback_devy_ranks['date'])
wayback_values = wayback_devy_ranks[['player', 'date', 'value']]

# wayback_devy_ranks.to_csv(f'../../data/ktc_historical_devy_ranks.csv', 
#                           mode = 'a', 
#                           index = False, 
#                           header = False
#                          )
# wayback_values.to_csv(f'../../data/ktc_historical_devy_values.csv', 
#                       mode = 'a', 
#                       index = False, 
#                       header = False
#                      )

In [6]:
wayback_devy_ranks['playerhref'] = wayback_devy_ranks['playerhref'].str.extract(r'https://keeptradecut.com/web/20210302174949/(.+)')

In [7]:
wayback_devy_ranks.head(50)

Unnamed: 0,rank,player,playerhref,positionrank,schoolabv,overalltier,value,date
0,1,Trevor Lawrence,https://keeptradecut.com/devy-rankings/players...,QB1,CLEM,Tier 1,9975,2021-03-02
1,2,Justin Fields,https://keeptradecut.com/devy-rankings/players...,QB2,OSU,Tier 2,9530,2021-03-02
2,3,Ja'Marr Chase,https://keeptradecut.com/devy-rankings/players...,WR1,LSU,Tier 2,9425,2021-03-02
3,4,Najee Harris,https://keeptradecut.com/devy-rankings/players...,RB1,ALA,Tier 2,9228,2021-03-02
4,5,Zach Wilson,https://keeptradecut.com/devy-rankings/players...,QB3,BYU,Tier 3,8891,2021-03-02
5,6,Travis Etienne,https://keeptradecut.com/devy-rankings/players...,RB2,CLEM,Tier 3,8890,2021-03-02
6,7,Kyle Pitts,https://keeptradecut.com/devy-rankings/players...,TE1,FLA,Tier 3,8841,2021-03-02
7,8,Trey Lance,https://keeptradecut.com/devy-rankings/players...,QB4,NDSU,Tier 3,8712,2021-03-02
8,9,DeVonta Smith,https://keeptradecut.com/devy-rankings/players...,WR2,ALA,Tier 3,8597,2021-03-02
9,10,Breece Hall,https://keeptradecut.com/devy-rankings/players...,RB3,ISU,Tier 3,8494,2021-03-02


# Player page scrapes

In [8]:
player_attributes_list = []
player_value_history_list = []
dr_no_picks = wayback_devy_ranks.loc[~wayback_devy_ranks['player'].str.contains(r'\d{4}'), ['player', 'playerhref']]
for ind, row in dr_no_picks.iterrows():
    
    # save variables from dynasty_ranks
    player = row['player']
    href = row['playerhref']
    
    # open selenium and go to player's href page, click "All Time", and get page content for soup
    driver = webdriver.Firefox(executable_path=r'C:/Users/jrior/.ipython/drivers/geckodriver.exe')
    driver.get(f"{href}")
    try:
        alltime = driver.find_element(By.ID, "all-time")
        try:
            alltime.click()
        except:
            dontknow = driver.find_element(By.ID, "dont-know")
            dontknow.click()
            alltime.click()
        content = driver.page_source
        driver.close()

        # make the soup
        soup = BS(content)

        # get player measurables and append to list
        card = soup.find('div', {'class':'pd-measurables devy-measurables'})
        player_attributes = pd.DataFrame({
                                         'name':[f'{player}' for rowtitle in card.find_all('p', {'class':'row-title'})],
                                         'attribute':[rowtitle.text.strip('\n\t') for rowtitle in card.find_all('p', {'class':'row-title'})],
                                         'value':[rowvalue.text.strip('\n\t') for rowvalue in card.find_all('p', {'class':'row-value'})]
                                        })
        player_attributes_list.append(player_attributes)

        # get player value history and append to list
        valgraphsoup = soup.find("div", {"id":"pd-value-graph"})
        hovergroups = valgraphsoup.find_all("g", {"class":"hoverGroup"})
        player_value_history = pd.DataFrame({
                                             'name':[f'{player}' for hg in hovergroups],
                                             'date':[hg.find("text", {"class":"hoverDate"}).text for hg in hovergroups],
                                             'value':[hg.find("text", {"class":"graphVal hoverVal"}).text for hg in hovergroups]
                                            })
        player_value_history_list.append(player_value_history)
    except:
        print(f'error at {player}')
        driver.close()
        
    if (ind+1)%50 == 0:
        time.sleep(10)
    else:
        pass

  driver = webdriver.Firefox(executable_path=r'C:/Users/jrior/.ipython/drivers/geckodriver.exe')


error at Joe Ngata
error at Coleridge Stroud


In [59]:
attributes = pd.concat(player_attributes_list)
for col in ['attribute', 'value']:
    attributes[col] = attributes[col].str.strip()

# concatenate the historical values together
historical_values = pd.concat(player_value_history_list)

# convert date column to datetime
historical_values['date'] = pd.to_datetime(historical_values['date'])

# drop duplicate player-date pairs from historical values (meaning only keep the first entry of today's player-date pairs)
historical_values_clean = historical_values.drop_duplicates(subset = ['name', 'date'], keep = 'first')

# Read in dynasty player data and join with historical values to identify which players were drafted

In [62]:
historical_dynasty = pd.read_csv('../../data/ktc_historical_dynasty_values.csv')
historical_devy = pd.read_csv('../../data/ktc_historical_devy_values.csv')
devy_attributes = pd.read_csv('../../data/ktc_devy_player_attributes.csv')

In [63]:
player_already_in_devy_history = historical_devy['name'].unique()
player_became_rookie = historical_dynasty['name'].unique()

In [64]:
mask_out_rookie_zeros = (historical_values_clean['name'].isin(player_became_rookie)) & (historical_values_clean['date'] < '2021-03-17')
mask_out_duplicate_devy_histories = ~historical_values_clean['name'].isin(player_already_in_devy_history)

wayback_historical_values = historical_values_clean[mask_out_rookie_zeros & mask_out_duplicate_devy_histories]

In [65]:
wayback_attributes = attributes[~attributes['name'].isin(devy_attributes['name'].unique())]

# Append back

In [70]:
wayback_devy_ranks.to_csv(f'../../data/ktc_historical_devy_ranks.csv', mode = 'a', header = False, index = False)
wayback_attributes.to_csv(f'../../data/ktc_devy_player_attributes.csv', mode = 'a', header = False, index = False)
wayback_historical_values.to_csv(f'../../data/ktc_historical_devy_values.csv', mode = 'a', header = False, index = False)