In [1]:
import time, re, csv,espn
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import WebDriverWait # available since 2.4.0
from selenium.webdriver.support import expected_conditions as EC # available since 2.26.0
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
from datetime import datetime

teamidpattern = re.compile('teamId=(?P<id>\d+)')
weekpattern = re.compile('(?P<wktype>(WEEK|ROUND))\s(?P<wknum>\d+)')

def outcome(score1, score2):
        if score1 > score2:
                outcome = 'W'
        elif score1 < score2:
                outcome = 'L'
        else:
                outcome = 'T'
        return outcome

def class_not_leagueSettingsTable(tag):
        return tag.has_attr('class') and not re.match("leagueSettingsTable", ' '.join(tag['class'])) and re.match("tableBody", ' '.join(tag['class']))  # have to use the ' '.join() syntax because tag['class'] is actually a list


In [2]:
teams = {'1':'Scott', '2':'Brent', '3':'JMT', '4':'JJ', '5':'Tim', '6':'Jeremy', '7':'Kyle', '8':'Thomas', '9':'Schwartz', '10':'Blackwell/Goss', '11':'Tony', '12':'Paul'}


## Create a Selenium webdriver for Chrome & Login 
##### Originally used Requests but ESPN site redesign broke login form.

In [3]:
driver = espn.getdriver()

##  Create and Parse Soups for Weekly Team Results

In [4]:
results = [('SEASON','WEEK','TEAM','TEAMNAME','SCORE','OPPONENT','OPPONENTNAME','OPPONENTSCORE', 'OUTCOME')]

for season in range(2016,datetime.now().year+1):
        print('')
        print(season)
        url = 'http://games.espn.go.com/ffl/schedule?leagueId=111414&seasonId='+str(season)
        driver.get(url) 
        cur_season = BeautifulSoup(driver.page_source, "lxml")

        first_row = True
        while True:
                if first_row:
                        current_row = cur_season.find(class_not_leagueSettingsTable).tr
                        first_row = False
                else:
                        current_row = current_row.next_sibling
                
                if current_row is None: # Past last row; exit
                        break
                if current_row == '\n': # Line feed, do not process
                        continue

                try:    # this try block must come before the raw_score or else week never gets set.
                        class_ = current_row['class']
                except KeyError:
                        class_ = ""
                
                if 'tableSubHead' in class_:    # Header row, do not process
                        continue
                if 'tableHead' in class_:       # Weekly header.  Grab week # and move on
                        wktype = weekpattern.search(current_row.td.text).group('wktype')
                        wknum = weekpattern.search(current_row.td.text).group('wknum')
                        week = wknum if wktype == "WEEK" else "P"+wknum
                        print(week, end=" ")
                        continue

                try:
                        raw_score = current_row.contents[11].text.rstrip('*')
                        
                        if raw_score == 'Preview':      # Game has not been played yet
                                continue        
                except IndexError:      # Spacer row
                        continue
                        
                
                #print(current_row)
                        
                team1 = teamidpattern.search(current_row.contents[1].a.get('href')).group('id')         
                team1score = float(raw_score.split('-')[0])
                
                team2 = teamidpattern.search(current_row.contents[7].a.get('href')).group('id')
                team2score = float(raw_score.split('-')[1])     
                
                results.append((season, week, team1, teams[team1], team1score, team2, teams[team2], team2score, outcome(team1score, team2score)))
                results.append((season, week, team2, teams[team2], team2score, team1, teams[team1], team1score, outcome(team2score, team1score)))



2016
1 2 3 4 5 6 7 8 9 10 11 12 13 14 P1 P2 
2017
1 2 3 4 5 6 7 8 9 10 11 12 13 14 P1 P2 

In [5]:
with open('data/matchup_results_2016_2017w10.txt', 'w', newline = '\n') as f:
        writer = csv.writer(f)
        writer.writerows(results)

     SEASON WEEK TEAM        TEAMNAME  SCORE OPPONENT    OPPONENTNAME  \
0      2016    1    1           Scott   86.0        8          Thomas   
1      2016    1    8          Thomas   77.0        1           Scott   
2      2016    1    4              JJ  138.0        3             JMT   
3      2016    1    3             JMT   84.0        4              JJ   
4      2016    1   11            Tony  113.0        7            Kyle   
5      2016    1    7            Kyle  117.5       11            Tony   
6      2016    1    5             Tim  139.5        2           Brent   
7      2016    1    2           Brent   91.0        5             Tim   
8      2016    1    9        Schwartz  125.5       12            Paul   
9      2016    1   12            Paul  114.0        9        Schwartz   
10     2016    1    6          Jeremy  107.0       10  Blackwell/Goss   
11     2016    1   10  Blackwell/Goss  151.5        6          Jeremy   
12     2016    2    1           Scott  111.5       

Unnamed: 0_level_0,ACTUAL,EXPECTED,LUCK
TEAMNAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jeremy,3.0,5.242084,-2.242084
Tony,2.0,4.135005,-2.135005
Thomas,3.0,3.656543,-0.656543
Schwartz,5.0,5.318632,-0.318632
Scott,6.0,6.063151,-0.063151
JJ,5.0,4.999648,0.000352
Paul,6.0,5.954416,0.045584
Tim,5.0,4.936495,0.063505
Blackwell/Goss,4.0,3.633566,0.366434
JMT,6.0,5.24968,0.75032
