In [1]:
import numpy as np
import pandas as pd
import re
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select, WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [2]:
def espn_login(driver=None):  # Likely return to this to put it in another file
    '''
    If there is no active WebDriver session, open one and request ESPN login.
    Wait until login has been registered before advancing.
    Args:
        driver: selenium WebDriver instance, logged in OR out of ESPN
    Return:
        driver: selenium WebDriver instance, logged in to ESPN

    '''
    if not driver:
        driver = webdriver.Chrome('C:\ChromeDriver\chromedriver.exe')
    driver.get('http://www.espn.com/login/')
    logged_in = False
    while not logged_in:
        innerHTML = driver.execute_script(
            "return document.getElementsByTagName('html')[0].innerHTML")
        html = BeautifulSoup(innerHTML, 'html.parser')
        logged_in = not html.find_all('title')[0].get_text()=='Log In'
    return driver

In [None]:
lineup_df = pd.DataFrame(index=list(range(10)), columns=['team',''])

In [None]:
class Player:
    def __init__(self, name, position, score):
        self._name = name
        self.position = position
        self.score = score
        
    
class Team:
    def __init__(self, starters, bench):
        self.starters = starters
        self.bench = bench
    
    def compute_actual_score(self):
        score = 0
        for player in self.starters:
            score += player.score
        return score
    
    def compute_best_score(self):
        
    

In [3]:
class MyPoint:
    def __init__(self, x, y):
        self._x = x
        self._y = y

    @property
    def x(self):
        return self._x

    @property
    def y(self):
        return self._y

my_list = [MyPoint(1, 1), MyPoint(2, 2)]
print(my_list)

plane_pd = pd.DataFrame([[p.x, p.y, p] for p in my_list],
                        columns=list('XYO'))
print(plane_pd.dtypes)
print(plane_pd)

[<__main__.MyPoint object at 0x00000220D3575F28>, <__main__.MyPoint object at 0x00000220D3575EF0>]
X     int64
Y     int64
O    object
dtype: object
   X  Y                                                O
0  1  1  <__main__.MyPoint object at 0x00000220D3575F28>
1  2  2  <__main__.MyPoint object at 0x00000220D3575EF0>


In [8]:
plane_pd['O'][0].x

1

In [None]:
def scrape_lineups(driver=None):
    ''' 
    This function scrapes the weekly lineups of every team in the league and logs
    them in a pandas DataFrame.
    Args:
        driver: selenium WebDriver instance (optional), logged in OR out of ESPN
    Return:
        lineups: pandas DataFrame (n x 5) containing updated list of players 
                   for upcoming draft, but incomplete keeper_val field
    '''

    driver = espn_login(driver)
    driver.implicitly_wait(5)

    ''' Initialize an empty DataFrame, navigate to the player page, and begin to
    fill it.  Players will be added to the larger player_df DataFrame in chunks,
    with each chunk containing all the players on a single page. 
    '''
    player_df = pd.DataFrame(columns=['name', 'pos', 'team', 'owner', 
                                      'keeper_val'])
    driver.get('http://games.espn.com/ffl/freeagency?leagueId=2205911&teamId='
               '6&seasonId=2017#&seasonId=2017&avail=-1')
    last_player_logged = []
    while True:
        ''' Check to see if the page has loaded by testing if the last player
        on the page is the same as the last one logged on the previous page.
        '''
        waiting_to_load = True
        while waiting_to_load:          
            innerHTML = driver.execute_script(
                "return document.getElementsByTagName('html')[0].innerHTML")
            html = BeautifulSoup(innerHTML, 'html.parser')
            player_table = html.find(id='playertable_0')
            last_player_loaded = player_table.find_all('tr')[-1]
            last_player_loaded = last_player_loaded.find_all('td')[0].get_text()
            if last_player_logged != last_player_loaded:
                waiting_to_load = False

        ''' Fill the df_chunk DataFrame for the current page by iterating over
        every row and parsing the player data.
        '''
        players_on_page = len(player_table.find_all('tr'))-2   
        df_chunk = pd.DataFrame(
            columns=['name', 'team', 'pos', 'owner', 'keeper_val'], 
            index=range(0,players_on_page))
        row_marker = -1
        for row in player_table.find_all('tr')[2:]:
            row_marker += 1
            last_player_logged = row.find_all('td')[0].get_text()
            name_team_pos = re.split(', |\xa0', last_player_logged)
            if len(name_team_pos)==2:  # This applies only to d/st entries
                team_name = re.split(' ', name_team_pos[0])[0]
                name_team_pos = [name_team_pos[0], team_name, name_team_pos[1]]
            df_chunk.iat[row_marker, 0] = name_team_pos[0]
            df_chunk.iat[row_marker, 1] = name_team_pos[1]
            df_chunk.iat[row_marker, 2] = name_team_pos[2]
            df_chunk.iat[row_marker, 3] = row.find_all('td')[2].get_text()  
        player_df = player_df.append(df_chunk)
        ''' If another page becomes available within 3 seconds, click it to
        advance.  Otherwise, assume the final page has been reached, complete 
        the process, and exit the while loop.
        '''
        try:
            remaining_page = WebDriverWait(driver, 3).until(
            	EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT, 'NEXT')))
            remaining_page.click()
        except:
            print('scraping complete')
            break

    ''' Fix the indices on the player_df DataFrame to match the number of the
    row, since each chunk was originally indexed separately.  Finally, return 
    the result.
    '''
    player_df = player_df.set_index(np.arange(len(player_df)))
    return(player_df)