In [7]:
from webUtils import getWebData, getHTML
from ioUtils import getFile, saveFile
from fsUtils import isFile
from time import sleep
from pandas import DataFrame

# Download Standings Website to Get Full List of Teams

In [None]:
url = "https://www.espn.com/mens-college-basketball/standings"
savename = "data/standings.p"
getWebData(base=url, savename=savename, useSafari=False)

## Parse Standings Data

In [None]:
teamsInfo = {}
bsdata = getHTML('data/standings.p')


tables = bsdata.findAll("table")
for table in tables:
    trs = table.findAll("tr")    
    for tr in trs:
        tds = tr.findAll("td")
        if len(tds) == 1:
            #print(tds)
            refs = tds[0].findAll("a")
            if len(refs) == 3:
                primeRef = refs[2]
                teamName = primeRef.text
                href     = primeRef.attrs['href']
                href     = "".join(["http://www.espn.com", href])
                teamID   = href.split("/")
                try:
                    int(teamID[-2])
                except:
                    raise ValueError("Cannot get team ID for {0} -> {1}".format(teamName, href))
                teamsInfo[teamID[-2]] = {"Name": teamName, "Ref": href}

print("Found {0} Division I Teams".format(len(teamsInfo)))
saveFile(idata=teamsInfo, ifile="data/teams.p", debug=True)

# Download Team Sites

In [None]:
teamsInfo = getFile("data/teams.p")
for teamID, teamData in teamsInfo.items():
    name = teamData["Name"]
    href = teamData["Ref"]
    print("Downloading {0} Data".format(name))
    url = href    
    savename = "data/teams/{0}.p".format(name)
    if not isFile(savename):
        try:
            getWebData(base=url, savename=savename, useSafari=False)
            sleep(2)
        except:
            print("Trouble with {0}".format(name))
            sleep(1)

## Parse Team Site Data

In [None]:
def getTeamID(ref, shift):    
    try:
        teamID = ref.split("/")[shift]
    except:
        raise ValueError("Cannot parse team ID [{0}]".format(ref))
    return teamID


def getGameID(ref):
    try:
        gameID = int(ref.split("=")[-1])
    except:
        raise ValueError("Cannot parse game ID [{0}]".format(ref))
    return gameID
        

def parseGameOpp(gameOpp):
    img = gameOpp.find("img")
    if img is None:
        return [None]
    
    if img is None:
        raise ValueError("Cannot find opponent logo")
    src = img.attrs['src']
    try:
        vals = src.split(".png")
    except:
        raise ValueError("Cannot parse opponent src")
    
    opp = getTeamID(vals[0], -1)
        
    try:
        int(opp)
    except:
        raise ValueError("Opponent is not an integer [{0}]".format(opp))
        
    return opp
        
        

def parseGameInfo(gameInfo):
    isHomeGame = None
    text = gameInfo.text
    if text.startswith("@ "):
        isHomeGame = False
        opp = text[1:].strip()
    elif text.startswith("vs "):
        isHomeGame = True
        opp = text[2:].strip()
    else:
        raise ValueError("Not sure how to parse game Info [{0}]".format(gameInfo))
        
    return {"IsHomeGame": isHomeGame, "Opponent": opp}
        
        
        
def parseGameResult(gameResult):
    isWin = None
    text = gameResult.text
    if text == "L":
        isWin = False
    elif text == "W":
        isWin = True
    elif len(text) == 0:
        isWin = None
    else:
        raise ValueError("Not sure how to parse game Result [{0}]".format(gameResult))
        
    return {"IsWin": isWin, "IsDone": True}

        
    
def parseGameScore(gameScore, info, result, opp):
    isDone     = result['IsDone']    
    if isDone is False:
        return {"Team Score": None, "Opp Score": None}

    isHomeGame = info['IsHomeGame']

    text = gameScore.text
    scores = text.split('-')
    if len(scores) == 2:
        try:
            scores = [int(x) for x in scores]
        except:
            raise ValueError("Scores are not integers [{0}]".format(text))
    else:
        if text == "SUSP":
            result["IsDone"] = None
            return {"Team Score": None, "Opp Score": None}
        else:
            raise ValueError("Not sure how to parse scores [{0}]".format(text))
        
    isWin = result["IsWin"]
    if isWin is True:
        teamScore = max(scores)
        oppScore  = min(scores)
    elif isWin is False:
        teamScore = min(scores)
        oppScore  = max(scores)
    
    return {"Team Score": teamScore, "Opp Score": oppScore}
    
    

def getSchedule(bsdata):
    scheduleSections = bsdata.findAll("section", {"class": "club-schedule"})
    scheduleData = []
    for scheduleSection in scheduleSections:
        uls = scheduleSection.findAll("ul")
        if len(uls) == 2:
            ul = uls[1]
            lis = ul.findAll('li')
            for li in lis:
                ref = li.find("a")
                if ref is None:
                    raise ValueError("There is no link in the schedule")
                gameRef  = ref.attrs['href']
                gameID = getGameID(gameRef)
                
                
                gameOpp = ref.find("div", {"class": "logo"})
                if gameOpp is None:
                    raise ValueError("There is no logo div in the schedule")
                opp = parseGameOpp(gameOpp)
                
                
                gameInfo = ref.find("div", {"class": "game-info"})
                if gameInfo is None:
                    raise ValueError("There is no game-info div in the schedule")
                info = parseGameInfo(gameInfo)
                
                
                gameResult = ref.find("div", {"class": "game-result"})
                if gameResult is not None:
                    result = parseGameResult(gameResult)
                else:
                    result = {"IsWin": None, "IsDone": False}
                    
                
                gameScore = ref.find("div", {"class": "score"})
                score = parseGameScore(gameScore, info, result, opp)
                    
                #print(opp,'\t',info,'\t',result,'\t',score)
                gameResult = {"GameID": gameID, "OppNo": opp}
                gameResult.update(info)
                gameResult.update(result)
                gameResult.update(score)
                scheduleData.append(gameResult)
                #print("")
                
        else:
            raise ValueError("Cannot parse schedule")
            
            
    return scheduleData


def getConference(bsdata):
    
    confData = {"Conference": None, "Teams": {}}
    standings = bsdata.find("article", {"class": "sub-module standings"})
    if standings is None:
        return confData
        raise ValueError("Could not find standings article!")
        
    h1 = standings.find("h1")
    if h1 is None:
        return confData
        raise ValueError("Could not find standings-conference article!")
    confText = h1.text
    confData = {"Conference": confText, "Teams": {}}
        
    table = standings.find("table", {"class": "mod-data"})
    if table is None:
        raise ValueError("Could not find standings-conference table!")
    trs = table.findAll("tr")
    
    headings = ["Name", "CONF", "GB", "OVR"]
    for tr in trs:
        tds = tr.findAll("td")
        if len(tds) == 4:
            confVals = [x.text for x in tds]
            confDatum = dict(zip(headings, confVals))
            teamID = getTeamID(tds[0].find("a").attrs['href'], -2)
            confData["Teams"][teamID] = confDatum
                        
    return confData


seasonData = {}
for teamID, teamData in teamsInfo.items():
    name = teamData["Name"]
    href = teamData["Ref"]
    print("==>",name)
    savename = "data/teams/{0}.p".format(name)
    
    bsdata     = getHTML(savename)    
    schedule   = getSchedule(bsdata)
    conference = getConference(bsdata)

    seasonData[teamID] = {"Name": name, "Ref": href, "Schedule": schedule, "Conference": conference}

print("Parsed {0} Division I Team Schedules".format(len(seasonData)))
saveFile(idata=seasonData, ifile="data/schedules.p", debug=True)

# Organize Team/Game Data

In [2]:
seasonData = getFile("data/schedules.p")

In [3]:
teamNameIDMap = {}

In [4]:
fullTeamData = {}
gamesData = {}
conferenceData = {}

In [13]:
for teamID, teamData in seasonData.items():
    name = teamData["Name"]
    ref  = teamData["Ref"]
    schedule   = teamData["Schedule"]
    conference = teamData["Conference"]
    
    teamNameIDMap[name] = teamID
    confName  = conference["Conference"]
    if confName is not None:
        confName  = confName.replace("Standings", "").strip()
        confName  = confName.replace("2019-20", "").strip()
    else:
        confName  = None
        
    confTeams = conference["Teams"]
    if confTeams is not None:
        confData = {k: v["Name"] for k,v in confTeams.items()}
    else:
        confData = None

    #conferenceData[teamID] = confName

    for gameData in schedule:
        gameID = gameData["GameID"]
        if gamesData.get(gameID) is not None:
            continue

        oppID  = gameData['OppNo']
        isHome = gameData["IsHomeGame"]
        isDone = gameData["IsDone"]
        if isDone is False or isDone is None:
            continue

        gameResult = {"ID": gameID}
        if isHome:
            gameResult["HomeTeam"] = teamID
            gameResult["AwayTeam"] = oppID
            gameResult["HomeTeamScore"] = gameData["Team Score"]
            gameResult["AwayTeamScore"] = gameData["Opp Score"]
        else:
            gameResult["HomeTeam"] = oppID
            gameResult["AwayTeam"] = teamID
            gameResult["HomeTeamScore"] = gameData["Opp Score"]
            gameResult["AwayTeamScore"] = gameData["Team Score"]

        gamesData[gameID] = gameResult
    fullTeamData[teamID] = {"Name": name, "Conference": confName}
    conferenceData[teamID] = {"Conference": confName}

In [20]:
gamesDF = DataFrame(gamesData).T
gamesDF.index.name = "GameID"
gamesDF = gamesDF.drop(["ID"], axis=1)
gamesDF.to_pickle("data/gamesDF.p")
gamesDF.to_csv("data/gamesDF.csv")

In [21]:
teamsDF = DataFrame(fullTeamData).T
teamsDF.index.name = "TeamID"
teamsDF.to_pickle("data/teamsDF.p")
teamsDF.to_csv("data/teamsDF.csv")

In [12]:
from pandas import DataFrame

Unnamed: 0,Name
261,Vermont Catamounts
2619,Stony Brook Seawolves
42,Hartford Hawks
399,Albany Great Danes
2378,UMBC Retrievers
160,New Hampshire Wildcats
2349,UMass Lowell River Hawks
311,Maine Black Bears
2066,Binghamton Bearcats
248,Houston Cougars


# Download Game Data

In [None]:
gamesData

In [None]:
#https://www.espn.com/mens-college-basketball/boxscore?gameId=401169817

In [10]:
for gameID in gamesData.keys():
    url="https://www.espn.com/mens-college-basketball/boxscore?gameId={0}".format(gameID)
    savename = "data/games/{0}.p".format(gameID)
    print("Downloading {0} Game Data".format(gameID))
    if not isFile(savename):
        try:
            getWebData(base=url, savename=savename, useSafari=False)
            sleep(2)
        except:
            print("Trouble with {0}".format(name))
            sleep(1)

Downloading 401170104 Game Data
Downloading 401170985 Game Data
Downloading 401170986 Game Data
Downloading 401168458 Game Data
Downloading 401168197 Game Data
Downloading 401170987 Game Data
Downloading 401183489 Game Data
Downloading 401170988 Game Data
Downloading 401170989 Game Data
Downloading 401169454 Game Data
Downloading 401170990 Game Data
Downloading 401170991 Game Data
Downloading 401170426 Game Data
Downloading 401170211 Game Data
Downloading 401170992 Game Data
Downloading 401170979 Game Data
Downloading 401170993 Game Data
Downloading 401170845 Game Data
Downloading 401170873 Game Data
Downloading 401170921 Game Data
Downloading 401170940 Game Data
Downloading 401170962 Game Data
Downloading 401170994 Game Data
Downloading 401170923 Game Data
Downloading 401170878 Game Data
Downloading 401170964 Game Data
Downloading 401170853 Game Data
Downloading 401170982 Game Data
Downloading 401170995 Game Data
Downloading 401170967 Game Data
Downloading 401168442 Game Data
Download

Downloading 401170421 Game Data
Downloading 401170432 Game Data
Downloading 401170442 Game Data
Downloading 401170462 Game Data
Downloading 401170480 Game Data
Downloading 401170494 Game Data
Downloading 401170341 Game Data
Downloading 401168424 Game Data
Downloading 401170357 Game Data
Downloading 401170363 Game Data
Downloading 401170379 Game Data
Downloading 401166098 Game Data
Downloading 401170400 Game Data
Downloading 401170413 Game Data
Downloading 401170428 Game Data
Downloading 401170455 Game Data
Downloading 401168353 Game Data
Downloading 401170343 Game Data
Downloading 401170352 Game Data
Downloading 401172034 Game Data
Downloading 401171979 Game Data
Downloading 401170395 Game Data
Downloading 401170402 Game Data
Downloading 401170404 Game Data
Downloading 401170411 Game Data
Downloading 401170416 Game Data
Downloading 401170422 Game Data
Downloading 401170429 Game Data
Downloading 401166134 Game Data
Downloading 401169601 Game Data
Downloading 401169615 Game Data
Download

  --> This file is 41.6kB.
data/games/401166418.p size -> 42 kB
Downloading 401166423 Game Data
  --> This file is 41.5kB.
data/games/401166423.p size -> 41 kB
Downloading 401166431 Game Data
  --> This file is 41.6kB.
data/games/401166431.p size -> 42 kB
Downloading 401166454 Game Data
  --> This file is 41.8kB.
data/games/401166454.p size -> 42 kB
Downloading 401186434 Game Data
  --> This file is 41.8kB.
data/games/401186434.p size -> 42 kB
Downloading 401166470 Game Data
  --> This file is 41.5kB.
data/games/401166470.p size -> 41 kB
Downloading 401166487 Game Data
  --> This file is 41.5kB.
data/games/401166487.p size -> 42 kB
Downloading 401166494 Game Data
  --> This file is 41.7kB.
data/games/401166494.p size -> 42 kB
Downloading 401166501 Game Data
  --> This file is 41.3kB.
data/games/401166501.p size -> 41 kB
Downloading 401166512 Game Data
  --> This file is 41.0kB.
data/games/401166512.p size -> 41 kB
Downloading 401166520 Game Data
  --> This file is 40.7kB.
data/games/40

Downloading 401166437 Game Data
  --> This file is 34.4kB.
data/games/401166437.p size -> 34 kB
Downloading 401166450 Game Data
  --> This file is 34.7kB.
data/games/401166450.p size -> 35 kB
Downloading 401186195 Game Data
  --> This file is 34.9kB.
data/games/401186195.p size -> 35 kB
Downloading 401186437 Game Data
  --> This file is 34.7kB.
data/games/401186437.p size -> 35 kB
Downloading 401166469 Game Data
  --> This file is 34.4kB.
data/games/401166469.p size -> 34 kB
Downloading 401166478 Game Data
  --> This file is 34.4kB.
data/games/401166478.p size -> 34 kB
Downloading 401166486 Game Data
  --> This file is 34.3kB.
data/games/401166486.p size -> 34 kB
Downloading 401166493 Game Data
  --> This file is 34.3kB.
data/games/401166493.p size -> 34 kB
Downloading 401166506 Game Data
  --> This file is 34.6kB.
data/games/401166506.p size -> 35 kB
Downloading 401166514 Game Data
  --> This file is 34.1kB.
data/games/401166514.p size -> 34 kB
Downloading 401166533 Game Data
  --> Th

Downloading 401171533 Game Data
  --> This file is 34.2kB.
data/games/401171533.p size -> 34 kB
Downloading 401171034 Game Data
  --> This file is 34.5kB.
data/games/401171034.p size -> 34 kB
Downloading 401171518 Game Data
  --> This file is 34.1kB.
data/games/401171518.p size -> 34 kB
Downloading 401171545 Game Data
  --> This file is 34.6kB.
data/games/401171545.p size -> 35 kB
Downloading 401171547 Game Data
  --> This file is 35.6kB.
data/games/401171547.p size -> 36 kB
Downloading 401171548 Game Data
  --> This file is 35.4kB.
data/games/401171548.p size -> 35 kB
Downloading 401171549 Game Data
  --> This file is 34.5kB.
data/games/401171549.p size -> 34 kB
Downloading 401171550 Game Data
  --> This file is 34.8kB.
data/games/401171550.p size -> 35 kB
Downloading 401171551 Game Data
  --> This file is 34.8kB.
data/games/401171551.p size -> 35 kB
Downloading 401171552 Game Data
  --> This file is 34.9kB.
data/games/401171552.p size -> 35 kB
Downloading 401171553 Game Data
  --> Th

Downloading 401171479 Game Data
  --> This file is 34.4kB.
data/games/401171479.p size -> 34 kB
Downloading 401171417 Game Data
  --> This file is 34.0kB.
data/games/401171417.p size -> 34 kB
Downloading 401171032 Game Data
  --> This file is 34.1kB.
data/games/401171032.p size -> 34 kB
Downloading 401171502 Game Data
  --> This file is 34.3kB.
data/games/401171502.p size -> 34 kB
Downloading 401171503 Game Data
  --> This file is 34.5kB.
data/games/401171503.p size -> 34 kB
Downloading 401171483 Game Data
  --> This file is 34.3kB.
data/games/401171483.p size -> 34 kB
Downloading 401169802 Game Data
  --> This file is 35.7kB.
data/games/401169802.p size -> 36 kB
Downloading 401171505 Game Data
  --> This file is 34.9kB.
data/games/401171505.p size -> 35 kB
Downloading 401171506 Game Data
  --> This file is 34.4kB.
data/games/401171506.p size -> 34 kB
Downloading 401171507 Game Data
  --> This file is 34.7kB.
data/games/401171507.p size -> 35 kB
Downloading 401171508 Game Data
  --> Th

Downloading 401166020 Game Data
  --> This file is 35.8kB.
data/games/401166020.p size -> 36 kB
Downloading 401166072 Game Data
  --> This file is 35.9kB.
data/games/401166072.p size -> 36 kB
Downloading 401166079 Game Data
  --> This file is 35.9kB.
data/games/401166079.p size -> 36 kB
Downloading 401182561 Game Data
  --> This file is 35.6kB.
data/games/401182561.p size -> 36 kB
Downloading 401166104 Game Data
  --> This file is 35.4kB.
data/games/401166104.p size -> 35 kB
Downloading 401166110 Game Data
  --> This file is 35.4kB.
data/games/401166110.p size -> 35 kB
Downloading 401166140 Game Data
  --> This file is 36.2kB.
data/games/401166140.p size -> 36 kB
Downloading 401166149 Game Data
  --> This file is 34.7kB.
data/games/401166149.p size -> 35 kB
Downloading 401166154 Game Data
  --> This file is 35.3kB.
data/games/401166154.p size -> 35 kB
Downloading 401166161 Game Data
  --> This file is 35.2kB.
data/games/401166161.p size -> 35 kB
Downloading 401166170 Game Data
  --> Th

Downloading 401166106 Game Data
  --> This file is 34.7kB.
data/games/401166106.p size -> 35 kB
Downloading 401166112 Game Data
  --> This file is 34.4kB.
data/games/401166112.p size -> 34 kB
Downloading 401166127 Game Data
  --> This file is 35.2kB.
data/games/401166127.p size -> 35 kB
Downloading 401169910 Game Data
  --> This file is 35.1kB.
data/games/401169910.p size -> 35 kB
Downloading 401166144 Game Data
  --> This file is 35.1kB.
data/games/401166144.p size -> 35 kB
Downloading 401166148 Game Data
  --> This file is 34.9kB.
data/games/401166148.p size -> 35 kB
Downloading 401166158 Game Data
  --> This file is 34.3kB.
data/games/401166158.p size -> 34 kB
Downloading 401166182 Game Data
  --> This file is 34.4kB.
data/games/401166182.p size -> 34 kB
Downloading 401166187 Game Data
  --> This file is 34.7kB.
data/games/401166187.p size -> 35 kB
Downloading 401166209 Game Data
  --> This file is 34.0kB.
data/games/401166209.p size -> 34 kB
Downloading 401166217 Game Data
  --> Th

Downloading 401166132 Game Data
  --> This file is 35.2kB.
data/games/401166132.p size -> 35 kB
Downloading 401166146 Game Data
  --> This file is 34.0kB.
data/games/401166146.p size -> 34 kB
Downloading 401166153 Game Data
  --> This file is 34.3kB.
data/games/401166153.p size -> 34 kB
Downloading 401166240 Game Data
  --> This file is 34.6kB.
data/games/401166240.p size -> 35 kB
Downloading 401166023 Game Data
  --> This file is 35.6kB.
data/games/401166023.p size -> 36 kB
Downloading 401166071 Game Data
  --> This file is 35.2kB.
data/games/401166071.p size -> 35 kB
Downloading 401166092 Game Data
  --> This file is 35.3kB.
data/games/401166092.p size -> 35 kB
Downloading 401166108 Game Data
  --> This file is 34.3kB.
data/games/401166108.p size -> 34 kB
Downloading 401166119 Game Data
  --> This file is 34.3kB.
data/games/401166119.p size -> 34 kB
Downloading 401172763 Game Data
  --> This file is 35.7kB.
data/games/401172763.p size -> 36 kB
Downloading 401166131 Game Data
  --> Th

Downloading 401171639 Game Data
  --> This file is 34.3kB.
data/games/401171639.p size -> 34 kB
Downloading 401171640 Game Data
  --> This file is 33.8kB.
data/games/401171640.p size -> 34 kB
Downloading 401171618 Game Data
  --> This file is 34.2kB.
data/games/401171618.p size -> 34 kB
Downloading 401171641 Game Data
  --> This file is 34.1kB.
data/games/401171641.p size -> 34 kB
Downloading 401171599 Game Data
  --> This file is 34.1kB.
data/games/401171599.p size -> 34 kB
Downloading 401171642 Game Data
  --> This file is 33.8kB.
data/games/401171642.p size -> 34 kB
Downloading 401171574 Game Data
  --> This file is 34.0kB.
data/games/401171574.p size -> 34 kB
Downloading 401171603 Game Data
  --> This file is 33.9kB.
data/games/401171603.p size -> 34 kB
Downloading 401171646 Game Data
  --> This file is 34.0kB.
data/games/401171646.p size -> 34 kB
Downloading 401171579 Game Data
  --> This file is 34.0kB.
data/games/401171579.p size -> 34 kB
Downloading 401170556 Game Data
  --> Th

Downloading 401174439 Game Data
  --> This file is 34.6kB.
data/games/401174439.p size -> 35 kB
Downloading 401174440 Game Data
  --> This file is 34.1kB.
data/games/401174440.p size -> 34 kB
Downloading 401170628 Game Data
  --> This file is 34.8kB.
data/games/401170628.p size -> 35 kB
Downloading 401173708 Game Data
  --> This file is 34.9kB.
data/games/401173708.p size -> 35 kB
Downloading 401173710 Game Data
  --> This file is 34.8kB.
data/games/401173710.p size -> 35 kB
Downloading 401173715 Game Data
  --> This file is 35.0kB.
data/games/401173715.p size -> 35 kB
Downloading 401174441 Game Data
  --> This file is 34.5kB.
data/games/401174441.p size -> 34 kB
Downloading 401171571 Game Data
  --> This file is 34.2kB.
data/games/401171571.p size -> 34 kB
Downloading 401171580 Game Data
  --> This file is 34.3kB.
data/games/401171580.p size -> 34 kB
Downloading 401171559 Game Data
  --> This file is 34.6kB.
data/games/401171559.p size -> 35 kB
Downloading 401171560 Game Data
  --> Th

Downloading 401171814 Game Data
  --> This file is 34.2kB.
data/games/401171814.p size -> 34 kB
Downloading 401171789 Game Data
  --> This file is 34.1kB.
data/games/401171789.p size -> 34 kB
Downloading 401171815 Game Data
  --> This file is 34.1kB.
data/games/401171815.p size -> 34 kB
Downloading 401171816 Game Data
  --> This file is 34.2kB.
data/games/401171816.p size -> 34 kB
Downloading 401171817 Game Data
  --> This file is 34.1kB.
data/games/401171817.p size -> 34 kB
Downloading 401171819 Game Data
  --> This file is 34.4kB.
data/games/401171819.p size -> 34 kB
Downloading 401172425 Game Data
  --> This file is 34.5kB.
data/games/401172425.p size -> 35 kB
Downloading 401172426 Game Data
  --> This file is 34.9kB.
data/games/401172426.p size -> 35 kB
Downloading 401169825 Game Data
  --> This file is 35.3kB.
data/games/401169825.p size -> 35 kB
Downloading 401182571 Game Data
  --> This file is 34.8kB.
data/games/401182571.p size -> 35 kB
Downloading 401172427 Game Data
  --> Th

Downloading 401172373 Game Data
  --> This file is 35.0kB.
data/games/401172373.p size -> 35 kB
Downloading 401172374 Game Data
  --> This file is 34.3kB.
data/games/401172374.p size -> 34 kB
Downloading 401172375 Game Data
  --> This file is 34.7kB.
data/games/401172375.p size -> 35 kB
Downloading 401172376 Game Data
  --> This file is 34.6kB.
data/games/401172376.p size -> 35 kB
Downloading 401176593 Game Data
  --> This file is 34.6kB.
data/games/401176593.p size -> 35 kB
Downloading 401169819 Game Data
  --> This file is 35.6kB.
data/games/401169819.p size -> 36 kB
Downloading 401172740 Game Data
  --> This file is 34.8kB.
data/games/401172740.p size -> 35 kB
Downloading 401175804 Game Data
  --> This file is 34.9kB.
data/games/401175804.p size -> 35 kB
Downloading 401173674 Game Data
  --> This file is 35.9kB.
data/games/401173674.p size -> 36 kB
Downloading 401174735 Game Data
  --> This file is 34.8kB.
data/games/401174735.p size -> 35 kB
Downloading 401174742 Game Data
  --> Th

Downloading 401175987 Game Data
  --> This file is 34.7kB.
data/games/401175987.p size -> 35 kB
Downloading 401169801 Game Data
  --> This file is 35.6kB.
data/games/401169801.p size -> 36 kB
Downloading 401176057 Game Data
  --> This file is 34.2kB.
data/games/401176057.p size -> 34 kB
Downloading 401172459 Game Data
  --> This file is 35.0kB.
data/games/401172459.p size -> 35 kB
Downloading 401174066 Game Data
  --> This file is 35.0kB.
data/games/401174066.p size -> 35 kB
Downloading 401176058 Game Data
  --> This file is 97.3kB.
data/games/401176058.p size -> 97 kB
Downloading 401176059 Game Data
  --> This file is 34.1kB.
data/games/401176059.p size -> 34 kB
Downloading 401176002 Game Data
  --> This file is 34.2kB.
data/games/401176002.p size -> 34 kB
Downloading 401176033 Game Data
  --> This file is 34.5kB.
data/games/401176033.p size -> 35 kB
Downloading 401175981 Game Data
  --> This file is 34.2kB.
data/games/401175981.p size -> 34 kB
Downloading 401176046 Game Data
  --> Th

Downloading 401170632 Game Data
  --> This file is 35.0kB.
data/games/401170632.p size -> 35 kB
Downloading 401173560 Game Data
  --> This file is 35.3kB.
data/games/401173560.p size -> 35 kB
Downloading 401176012 Game Data
  --> This file is 34.9kB.
data/games/401176012.p size -> 35 kB
Downloading 401175992 Game Data
  --> This file is 34.3kB.
data/games/401175992.p size -> 34 kB
Downloading 401176034 Game Data
  --> This file is 34.4kB.
data/games/401176034.p size -> 34 kB
Downloading 401176003 Game Data
  --> This file is 34.5kB.
data/games/401176003.p size -> 34 kB
Downloading 401175984 Game Data
  --> This file is 34.5kB.
data/games/401175984.p size -> 35 kB
Downloading 401176036 Game Data
  --> This file is 34.5kB.
data/games/401176036.p size -> 35 kB
Downloading 401175998 Game Data
  --> This file is 34.2kB.
data/games/401175998.p size -> 34 kB
Downloading 401175740 Game Data
  --> This file is 34.5kB.
data/games/401175740.p size -> 35 kB
Downloading 401175999 Game Data
  --> Th

Downloading 401174067 Game Data
  --> This file is 34.4kB.
data/games/401174067.p size -> 34 kB
Downloading 401173558 Game Data
  --> This file is 34.8kB.
data/games/401173558.p size -> 35 kB
Downloading 401175133 Game Data
  --> This file is 34.6kB.
data/games/401175133.p size -> 35 kB
Downloading 401175134 Game Data
  --> This file is 34.3kB.
data/games/401175134.p size -> 34 kB
Downloading 401172518 Game Data
  --> This file is 34.4kB.
data/games/401172518.p size -> 34 kB
Downloading 401172494 Game Data
  --> This file is 34.2kB.
data/games/401172494.p size -> 34 kB
Downloading 401175137 Game Data
  --> This file is 34.3kB.
data/games/401175137.p size -> 34 kB
Downloading 401174467 Game Data
  --> This file is 33.9kB.
data/games/401174467.p size -> 34 kB
Downloading 401174451 Game Data
  --> This file is 34.5kB.
data/games/401174451.p size -> 35 kB
Downloading 401175139 Game Data
  --> This file is 34.3kB.
data/games/401175139.p size -> 34 kB
Downloading 401172472 Game Data
  --> Th

KeyboardInterrupt: 

## Parse Game Data

In [None]:
for gameID in gamesData.keys():    
    savename = "data/games/{0}.p".format(gameID)
    bsdata = getHTML(savename)
    break

In [None]:
gameStats = {}

div = bsdata.find("div", {"data-module": "boxscore"})
if div is None:
    raise ValueError("There is no box score!")
    
    
teamNames = div.findAll("div", {"class": "team-name"})
teamIDs = []
for teamName in teamNames:
    img = teamName.find("img")
    if img is None:
        teamID = None
    else:
        src = img.attrs['src']
        try:
            vals = src.split(".png")
        except:
            raise ValueError("Cannot parse opponent src")
    
        teamID = getTeamID(vals[0], -1)
    teamIDs.append(teamID)

    
tables = div.findAll("table", {"class": "mod-data"})
for i,table in enumerate(tables):
    teamID = teamIDs[i]
    teamGameResults = {}
    
    headers = table.findAll("th")
    headers = [x.text for x in headers]
    players = table.findAll("tr")
    teamPlayers = []
    for player in players:
        tds = player.findAll("td")
        if len(tds) != 14:
            continue
            
        ## 1st entry is name/id
        name  = tds[0]
        if name.text == "TEAM":
            tds = [x.text for x in tds[2:]]
            teamData = dict(zip(headers[2:14], tds))
            for k in teamData.keys():
                val = teamData[k]
                if len(val.split('-')) == 1:
                    try:
                        val = int(val)
                    except:
                        raise ValueError("Could not convert {0} to an integer".format(val))
                else:
                    try:
                        val = [int(v) for v in val.split('-')]
                    except:
                        raise ValueError("Could not convert {0} to an integers".format(val))
                teamData[k] = val
            
            teamGameResults["Team"] = teamData
            teamGameResults["Players"] = teamPlayers
            gameStats[teamID] = teamGameResults
            
            
            ### Now go on to the next team
            continue
            
            
        url   = name.find('a').attrs['href']
        pid   = getTeamID(url, -2)
        spans = name.findAll("span")
        playerName = spans[0].text
        playerPos  = spans[2].text
        tds = [x.text for x in tds[1:]]
        #playerValues = [{"Name": playerName, "Pos": playerPos}] + 
        playerValues = tds
        if len(teamPlayers) <= 4:
            playerData = dict(zip(headers[1:14], playerValues))
        else:
            playerData = dict(zip(headers[15:], playerValues))
            
        for k in playerData.keys():
            val = playerData[k]
            if len(val.split('-')) == 1:
                try:
                    val = int(val)
                except:
                    raise ValueError("Could not convert {0} to an integer".format(val))
            else:
                try:
                    val = [int(v) for v in val.split('-')]
                except:
                    raise ValueError("Could not convert {0} to an integers".format(val))
            playerData[k] = val
        playerData["Name"] = playerName
        playerData["Position"] = playerPos
        playerData["ID"] = pid
        if len(teamPlayers) <= 4:
            playerData["Status"] = "Starter"
        else:
            playerData["Status"] = "Bench"
            

        
        teamPlayers.append(playerData)
            
    break

In [None]:
gameStats.keys()

In [None]:

    def downloadTeamStatisticsDataByYear(self, idval, name, year, debug=False):
        baseurl  = self.getBase()
        suburl   = "college-football/team/stats/_/id/{0}/season".format(idval)
        url      = join(baseurl, suburl, str(year))
        
        outputdir = self.getYearlyStatisticsDir(year)
        savename  = setFile(outputdir, "{0}-{1}.p".format(name, year))
        if isFile(savename):
            return
        
        if debug:
            print("Downloading {0} to {1}".format(url, savename))
        getWebData(base=url, savename=savename, useSafari=False)
        sleep(15+2*random())        

In [None]:

                        
         
    ############################################################################################################
    # Team Games
    ############################################################################################################   
    def downloadGameDataByID(self, gameID, year, test=False, debug=False):        
        gamesDir   = self.getYearlyGamesDir(year)
        url="http://www.espn.com/college-football/playbyplay?gameId={0}".format(gameID)
        savename = setFile(gamesDir, "{0}.p".format(gameID))

        if isFile(savename):
            from os.path import getsize                    
            size = round(getsize(savename)/1e3)
            if size < 1:
                removeFile(savename, debug=True)

        if test:
            print("Downloading {0} to {1}".format(url,savename))
            return
        getWebData(base=url, savename=savename, dtime=6, useSafari=True, debug=True)
        sleep(6)
            
            
    def downloadGameData(self, debug=False, verydebug=False):
        resultsDir = self.getSeasonResultsDir()
        files = findExt(resultsDir, ext=".p", debug=False)

        gameType = "playbyplay"
        print("Sleeping for 5 seconds...")
        sleep(5)

        
        for ifile in files:
            seasonData = getFile(ifile)
            year       = seasonData.getYear()
            if year not in [2013,2014,2015]:
                continue
            gamesDir   = self.getYearlyGamesDir(year)
            
            teams = seasonData.teams
            for teamID,teamData in teams.items():
                teamGames = teamData.games
                for gameData in teamGames:
                    gameResult = gameData["Result"]
                    gameObject = gameData["Game"]
                    gameID     = gameObject.gameID
                    
                    if False:
                        prevLocation = "/Volumes/Seagate/Football/Games/Plays/{0}.html".format(gameID)
                        if isFile(prevLocation):
                            savename = setFile(gamesDir, "{0}.p".format(gameID))
                            if not isFile(savename) or True:
                                data = open(prevLocation, "rb").read()
                                saveFile(idata=data, ifile=savename, debug=True)
                                continue
                        continue

                    self.downloadGameDataByID(gameID, year, debug)