# Imports

In [1]:
import re
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup as BS
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
from datetime import date, datetime, timedelta

# Create yesterday and today variables

In [2]:
yesterday = date.today()-timedelta(days=1)
today = date.today()

# Scrape current rankings, then save to csv

In [3]:
url = 'https://keeptradecut.com/devy-rankings?page=0&filters=QB|WR|RB|TE&format=2'
response = requests.get(url)
rankingsSoup = BS(response.content)

# Readable construction of dynasty-rankings dataframe

# values lists
rank_list = []
player_list = []
playerhref_list = []
positionrank_list = []
schoolabv_list = []
overalltier_list = []
value_list = []

# Relevant divs
playerlines = rankingsSoup.find_all('div', 'onePlayer')

for playerline in playerlines:
    
    # Grab current line's relevant player data
    rank = int(playerline.find('div', {'class':'rank-number'}).find('p').text)
    player = playerline.find('a').text
    playerhref = 'https://keeptradecut.com'+playerline.find('a')['href']
    positionrank = playerline.find('p', {'class':'position'}).text
    schoolabv = playerline.find('span', {'class' : 'player-team'}).text
    overalltier = playerline.find('div', 'player-info').find('p').text
    value = int(playerline.find('div', {'class':'value'}).find('p').text)
    
    # Append current line's data to value lists
    rank_list.append(rank)
    player_list.append(player)
    playerhref_list.append(playerhref)
    positionrank_list.append(positionrank)
    schoolabv_list.append(schoolabv)
    overalltier_list.append(overalltier)
    value_list.append(value)
    
# dict for df construction
rankings_dict = {
    'rank':rank_list,
    'player':player_list,
    'playerhref':playerhref_list,
    'positionrank':positionrank_list,
    'schoolabv':schoolabv_list,
    'overalltier':overalltier_list,
    'value':value_list
}

# Convert to DataFrame
devy_ranks = pd.DataFrame(rankings_dict)
devy_ranks['date'] = today

In [4]:
# write to csv with date labelling
devy_ranks.to_csv(f'../../data/ktc_historical_devy_ranks.csv', index = False)

# Player page scrapes

In [5]:
player_attributes_list = []
player_value_history_list = []
dr_no_picks = devy_ranks.loc[~devy_ranks['player'].str.contains(r'\d{4}'), ['player', 'playerhref']]
for ind, row in dr_no_picks.iterrows():
    
    # save variables from dynasty_ranks
    player = row['player']
    href = row['playerhref']
    
    # open selenium and go to player's href page, click "All Time", and get page content for soup
    driver = webdriver.Firefox(executable_path=r'C:/Users/jrior/.ipython/drivers/geckodriver.exe')
    driver.get(f"{href}")
    alltime = driver.find_element(By.ID, "all-time")
    try:
        alltime.click()
    except:
        dontknow = driver.find_element(By.ID, "dont-know")
        dontknow.click()
        alltime.click()
    content = driver.page_source
    driver.close()

    # make the soup
    soup = BS(content)

    # get player measurables and append to list
    card = soup.find('div', {'class':'pd-measurables devy-measurables'})
    player_attributes = pd.DataFrame({
                                     'name':[f'{player}' for rowtitle in card.find_all('p', {'class':'row-title'})],
                                     'attribute':[rowtitle.text.strip('\n\t') for rowtitle in card.find_all('p', {'class':'row-title'})],
                                     'value':[rowvalue.text.strip('\n\t') for rowvalue in card.find_all('p', {'class':'row-value'})]
                                    })
    player_attributes_list.append(player_attributes)

    # get player value history and append to list
    valgraphsoup = soup.find("div", {"id":"pd-value-graph"})
    hovergroups = valgraphsoup.find_all("g", {"class":"hoverGroup"})
    player_value_history = pd.DataFrame({
                                         'name':[f'{player}' for hg in hovergroups],
                                         'date':[hg.find("text", {"class":"hoverDate"}).text for hg in hovergroups],
                                         'value':[hg.find("text", {"class":"graphVal hoverVal"}).text for hg in hovergroups]
                                        })
    player_value_history_list.append(player_value_history)
    
    if (ind+1)%50 == 0:
        time.sleep(10)
    else:
        pass

  driver = webdriver.Firefox(executable_path=r'C:/Users/jrior/.ipython/drivers/geckodriver.exe')


# Concatenate attributes and historical values, respectively, then save to csv

In [13]:
# concatenate the attributes dfs together and use strip() to clean off \n from entries
attributes = pd.concat(player_attributes_list)
for col in ['attribute', 'value']:
    attributes[col] = attributes[col].str.strip()

# concatenate the historical values together
historical_values = pd.concat(player_value_history_list)

# convert date column to datetime
historical_values['date'] = pd.to_datetime(historical_values['date'])

# drop duplicate player-date pairs from historical values (meaning only keep the first entry of today's player-date pairs)
historical_values_clean = historical_values.drop_duplicates(subset = ['name', 'date'], keep = 'first')

# write each df to csv with date labelling
attributes.to_csv(f'../../data/ktc_devy_player_attributes.csv', index = False)
historical_values_clean.to_csv(f'../../data/ktc_historical_devy_values.csv', index = False)