In [1]:
import time, re, csv,espn
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import WebDriverWait # available since 2.4.0
from selenium.webdriver.support import expected_conditions as EC # available since 2.26.0
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
from datetime import datetime

seasons = range(2016,datetime.now().year+1)

teamidpattern = re.compile('teamId=(?P<id>\d+)')
weekpattern = re.compile('(?P<wktype>(WEEK|ROUND))\s(?P<wknum>\d+)')
seasonidpattern = re.compile('seasonId=(?P<season>\d+)')
scoringperiodidpattern = re.compile('scoringPeriodId=(?P<id>\d+)')
bidresultpattern = re.compile('\w+(?=\.)')

def outcome(score1, score2):
        if score1 > score2:
                outcome = 'W'
        elif score1 < score2:
                outcome = 'L'
        else:
                outcome = 'T'
        return outcome

def class_not_leagueSettingsTable(tag):
        return tag.has_attr('class') and not re.match("leagueSettingsTable", ' '.join(tag['class'])) and re.match("tableBody", ' '.join(tag['class']))  # have to use the ' '.join() syntax because tag['class'] is actually a list

def class_playertablebody(tag):
    return tag.has_attr('class') and re.match("playerTableTable", ' '.join(tag['class']))  # have to use the ' '.join() syntax because tag['class'] is actually a list

def class_playerrow(tag):
    return tag.has_attr('class') and re.match("pncPlayerRow", ' '.join(tag['class']))  # have to use the ' '.join() syntax because tag['class'] is actually a list

def get_week_formatted(wk):
    weekpattern = re.compile('(?P<wktype>(WEEK|ROUND))\s(?P<wknum>\d+)', flags=re.IGNORECASE)
    wktype = weekpattern.search(wk).group('wktype')
    wknum = weekpattern.search(wk).group('wknum')
    return wknum if wktype.upper() == "WEEK" else "P"+wknum




In [2]:
pre2010 = {'1':'Scott', '2':'Brent', '3':'JMT', '4':'JJ', '5':'Tim', '6':'Jeremy', '7':'Kyle', '8':'Thomas', '9':'Schwartz', '10':'Blackwell'}
t2010 = {'1':'Scott', '2':'Brent', '3':'JMT', '4':'JJ', '5':'Tim', '6':'Jeremy', '7':'Kyle', '8':'Thomas', '9':'Schwartz', '10':'Blackwell', '11':'Tony', '12':'Doogs'}
t2011 = {'1':'Scott', '2':'Brent', '3':'JMT', '4':'JJ', '5':'Tim', '6':'Jeremy', '7':'Kyle', '8':'Thomas', '9':'Schwartz', '10':'Blackwell', '11':'Tony', '12':'JonBurriss'}
t2012 = {'1':'Scott', '2':'Brent', '3':'JMT', '4':'JJ', '5':'Tim', '6':'Jeremy', '7':'Kyle', '8':'Thomas', '9':'Schwartz', '10':'Blackwell', '11':'Tony', '12':'Paul'}
t2016 = {'1':'Scott', '2':'Brent', '3':'JMT', '4':'JJ', '5':'Tim', '6':'Jeremy', '7':'Kyle', '8':'Thomas', '9':'Schwartz', '10':'Goss', '11':'Tony', '12':'Paul'}

teams = {2008:pre2010, 2009:pre2010, 2010:t2010, 2011:t2011, 2012:t2012, 2013:t2012, 2014:t2012, 2015:t2012, 2016:t2016, 2017:t2016}

## Create a Selenium webdriver for Chrome & Login 
##### Originally used Requests but ESPN site redesign broke login form.

In [3]:
driver = espn.getdriver()

Press Enter to continue...


##  Create and Parse Soups for Weekly Team Results

In [4]:
results = [('SEASON','SCORINGPERIOD', 'WEEK_NM','TEAM','TEAMNAME','SCORE','OPPONENT','OPPONENTNAME','OPPONENTSCORE', 'OUTCOME')]
quickboxurls = []

for season in seasons:
        print('')
        print(season)
        url = 'http://games.espn.go.com/ffl/schedule?leagueId=111414&seasonId='+str(season)
        driver.get(url) 
        cur_season = BeautifulSoup(driver.page_source, "lxml")

        first_row = True
        while True:
                if first_row:
                        current_row = cur_season.find(class_not_leagueSettingsTable).tr
                        first_row = False
                else:
                        current_row = current_row.next_sibling
                
                if current_row is None: # Past last row; exit
                        break
                if current_row == '\n': # Line feed, do not process
                        continue

                try:    # this try block must come before the raw_score or else week never gets set.
                        class_ = current_row['class']
                except KeyError:
                        class_ = ""
                
                if 'tableSubHead' in class_:    # Header row, do not process
                        continue
                if 'tableHead' in class_:       # Weekly header.  Grab week # and move on
                        week = get_week_formatted(current_row.td.text)
                        print(week, end=" ")
                        continue

                try:
                        raw_score = current_row.contents[11].text.rstrip('*')
                        
                        if raw_score == 'Preview' or raw_score == 'Box':      # Game has not been played yet
                                continue        
                except IndexError:      # Spacer row
                        continue
                        
                
                quickboxurls.append('http://games.espn.com'+current_row.contents[11].a['href'])
                scoringperiod = scoringperiodidpattern.search(current_row.contents[11].a['href']).group('id')
                        
                team1 = teamidpattern.search(current_row.contents[1].a.get('href')).group('id')         
                team1score = float(raw_score.split('-')[0])
                
                team2 = teamidpattern.search(current_row.contents[7].a.get('href')).group('id')
                team2score = float(raw_score.split('-')[1])     
                
                results.append((season, scoringperiod, week, team1, teams[int(season)][team1], team1score, team2, teams[int(season)][team2], team2score, outcome(team1score, team2score)))
                results.append((season, scoringperiod, week, team2, teams[int(season)][team2], team2score, team1, teams[int(season)][team1], team1score, outcome(team2score, team1score)))



2016
1 2 3 4 5 6 7 8 9 10 11 12 13 14 P1 P2 
2017
1 2 3 4 5 6 7 8 9 10 11 12 13 14 P1 P2 

In [5]:
with open('data/matchup_results_'+str(seasons[0])+'_thru_'+str(seasons[len(seasons)-1])+'.txt', 'w', newline = '\n') as f:
        writer = csv.writer(f)
        writer.writerows(results)

In [6]:
boxresults = [('SEASON','SCORINGPERIOD', 'WEEK_NM','TEAM','TEAMNAME','SLOT','PLAYERID','PLAYERNAME','PLAYEROPP','GAMEOUTCOME', 'PLAYERPOINTS', 'STARTERPOINTS', 'BENCHPOINTS')]

for quickboxurl in quickboxurls:
    driver.get(quickboxurl)
    cur_matchup = BeautifulSoup(driver.page_source, "lxml")
    
    season = seasonidpattern.search(driver.current_url).group('season')
    scoringperiod = scoringperiodidpattern.search(driver.current_url).group('id')
    week = get_week_formatted(cur_matchup.select('.games-pageheader')[0].em.text)

    allscores = cur_matchup.select('.playertableTableHeader') # Grab the table header with team name because the class=playerTable is used for both bench and starters.  Aka get double results. 
    for box in allscores:    
        cur_team_box = box.parent.parent.parent
        left_or_right_box = 0 if re.search('left',cur_team_box['style'],re.IGNORECASE) else 1

        cur_team_id = teamidpattern.search(cur_matchup.find(id='teamInfos').find_all('a')[left_or_right_box].get('href')).group('id')

        if int(season) >= 2015:
            starterpts = cur_team_box.select('.totalScore')[0].text
        else:
            starterpts = cur_team_box.select('.playerTableBgRowTotals')[0].select('.appliedPoints')[0].text
        try: 
            benchpts = cur_matchup.find(id='tmInactivePts_'+str(cur_team_id)).text 
        except:
            benchpts = '0'

        players = cur_team_box.select('.pncPlayerRow')
        for player in players:  # will be iterable
            slot = player.select('.playerSlot')[0].text if int(season) >= 2015 else player.select('.playertablePlayerName')[0].text.split()[-1]
            if slot.upper() == 'IR' or player.select('td')[1].text.strip()=='':
                break
            playerid = player.find('a')['playerid'] if int(season) >= 2015 else 'null'
            playername = player.find('a').text if int(season) >= 2015 and player.select('td')[1].text.strip()!='' else player.select('.playertablePlayerName')[0].text
            if re.search('BYE', player.select('.playertablePlayerName')[0].next_sibling.text, re.IGNORECASE):
                playeropp = 'BYE'
                gameoutcome = 'BYE'
            else:
                playeropp = player.select('.playertablePlayerName')[0].next_sibling.text if int(season) >= 2015 else player.find_all('a')[0].text
                gameoutcome = player.select('.gameStatusDiv')[0].text[2:] if int(season) >= 2015 else player.find_all('a')[1].text
            playerpoints = player.select('.playertableStat')[0].text
                
            boxresults.append((season, scoringperiod, week, cur_team_id, teams[int(season)][cur_team_id],slot, playerid, playername, playeropp, gameoutcome, playerpoints, starterpts, benchpts))

with open('data/quickbox.txt', 'w', newline = '\n') as f:
        writer = csv.writer(f)
        writer.writerows(boxresults)


In [None]:
slot = player.select('.playerSlot')[0].text if int(season) >= 2015 else player.select('.playertablePlayerName')[0].text.split()[-1]

if slot.upper() == 'IR' or player.select('td')[1].text.strip()=='':
#    break
    print('break')
    
playerid = player.find('a')['playerid'] if int(season) >= 2015 and player.select('td')[1].text.strip()!='' else 'null'
playername = player.find('a').text if int(season) >= 2015 and player.select('td')[1].text.strip()!='' else 'null'#player.select('.playertablePlayerName')[0].text
if re.search('BYE', player.select('.playertablePlayerName')[0].next_sibling.text, re.IGNORECASE):
    playeropp = 'BYE'
    gameoutcome = 'BYE'
else:
    playeropp = player.select('.playertablePlayerName')[0].next_sibling.text if int(season) >= 2015 else player.find_all('a')[0].text
    gameoutcome = player.select('.gameStatusDiv')[0].text[2:] if int(season) >= 2015 else player.find_all('a')[1].text
playerpoints = player.select('.playertableStat')[0].text

In [18]:


bids = [('SEASON','SCORINGPERIOD','AUCTIONDATE', 'WEEK_NM','TEAM','TEAMNAME','TEAMID','PLAYERID','NAME','BID','BIDRESULT')]

url = 'http://games.espn.go.com/ffl/waiverreport?leagueId=111414'
driver.get(url)
auction_result_page = BeautifulSoup(driver.page_source, "lxml")


for option in auction_result_page.find_all('option'):
    auction_date = option.get('value')
    url = 'http://games.espn.go.com/ffl/waiverreport?leagueId=111414&date='+auction_date
    auction_response = driver.get(url)
    cur_auction = BeautifulSoup(driver.page_source, "lxml")
    bidTbl = cur_auction.find_all('tr', attrs={'class':'tableBody'})
    for bid in bidTbl:
        owner = row.contents[2].a
        teamId = teamidpattern.search(owner.get('href')).group('id')
        player = row.contents[4].a
        bidAmt = row.contents[6].string.lstrip('$')
        bidresult = bidresultpattern.search(row.contents[7].text).group()
        bids.append((auction_date , teamId ,  player.get('playerid') , player.string , bidAmt , bidresult))



SyntaxError: can't assign to function call (<ipython-input-18-9678c0a5f04f>, line 12)

In [None]:
with open('data/fa_auction_bids.txt', 'w', newline = '') as f:
    writer = csv.writer(f)
    writer.writerows(bids)