# Football Parsing Code

In [1]:
## Basic stuff
%load_ext autoreload
%autoreload
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("""<style>div.output_area{max-height:10000px;overflow:scroll;}</style>"""))

## Python Version
import sys
print("Python: {0}".format(sys.version))

import datetime as dt
start = dt.datetime.now()

print("Notebook Last Run Initiated: "+str(start))

Python: 3.6.6 |Anaconda custom (64-bit)| (default, Jun 28 2018, 11:07:29) 
[GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)]
Notebook Last Run Initiated: 2019-03-13 11:19:33.439669


In [2]:
from os.path import join
from fsUtils import mkSubDir, setFile, isFile, removeFile
from ioUtils import getFile, saveFile
from fileUtils import getBaseFilename, getBasename, getDirname
from webUtils import getWebData, getHTML
from timeUtils import printDateTime, getDateTime, addMonths
from searchUtils import findExt
from time import sleep
from random import random

class output:
    def __init__(self):
        self.name   = "output"
        self.dirval = "/Volumes/Blue/Football"
        
    def getSaveDir(self):
        return self.dirval

    
class espn:
    def __init__(self):
        self.name = "espn"
        self.baseurl = "http://www.espn.com"
        
    def getBase(self):
        return self.baseurl

In [3]:
############################################################################################################
# Game Class
############################################################################################################
class game:
    def __init__(self, gameID, date, teamA, teamB, teamAResult, teamBResult, teamAScore, teamBScore, location, extra={}):
        self.gameID = gameID
        self.date   = date
        self.teamA  = teamA
        self.teamB  = teamB
        self.teamAScore  = teamAScore
        self.teamBScore  = teamBScore
        self.teamAResult  = teamAResult
        self.teamBResult  = teamBResult
        self.location = location
        
        self.OT   = extra.get('OT')
        self.Bowl = extra.get('Bowl')

    def getWinner(self):
        if self.teamAScore > self.teamBScore:
            return self.teamA
        elif self.teamAScore < self.teamBScore:
            return self.teamB
        else:
            return "T"

        
    def getGame(self):
        retval = {"GameID": self.gameID}

        
        
############################################################################################################
# Team Class
############################################################################################################
class team:
    def __init__(self, year, teamName, teamMascot, teamID):
        self.year       = year
        self.teamName   = teamName
        self.teamMascot = teamMascot
        self.teamID     = teamID
        self.games      = []
        
        
    def getTeamID(self):
        return self.teamID
    
        
    def addGame(self, game):
        result = game.teamAResult
        self.games.append({"Result": result, "Game": game})

            
    def setStatistics(self):
        wins   = sum([x["Result"] == "W" for x in self.games])
        losses = sum([x["Result"] == "L" for x in self.games])
        ties   = sum([x["Result"] == "T" for x in self.games])
        ngames = len(self.games)
        if ngames != wins+losses+ties:
            raise ValueError("The sum of wins, losses, and ties does not match total number of games!")
        self.wins   = wins
        self.losses = losses
        self.ties   = ties
        self.ngames = ngames

        
    def summary(self):
        print("{0: <6}{1: <50}{2: <5}{3: <5}{4: <5}{5: <5}".format(self.year, self.teamName, 
                                                                   self.wins, self.losses, self.ties, self.ngames))
        
        
    def getGames(self):
        return self.games
        
        
############################################################################################################
# Season Class
############################################################################################################
class season:
    def __init__(self, year):
        self.year  = year
        self.teams = {}
        self.games = {}
        
    def getYear(self):
        return self.year
    
    def addTeam(self, team):
        teamID = team.getTeamID()
        self.teams[teamID] = team

In [10]:
class historical(espn, output):
    def __init__(self):
        self.name = "historical"
        espn.__init__(self)
        output.__init__(self)
        
        subdir    = "season"
        outputdir = mkSubDir(self.getSaveDir(), subdir)
        self.seasonDir = outputdir
        
        subdir    = "results"
        outputdir = mkSubDir(self.getSaveDir(), subdir)
        self.resultsDir = outputdir
        
        subdir    = "games"
        outputdir = mkSubDir(self.getSaveDir(), subdir)
        self.gamesDir = outputdir
        
        
    def getSeasonDir(self):
        return self.seasonDir
        
    def getResultsDir(self):
        return self.resultsDir
        
    def getGamesDir(self):
        return self.gamesDir
        
        
    def getYearlySeasonDir(self, year):
        outputdir = mkSubDir(self.getSeasonDir(), str(year))
        return outputdir
        
        
    def getYearlyGamesDir(self, year):
        outputdir = mkSubDir(self.getGamesDir(), str(year))
        return outputdir
        
        
    def downloadTeamStandingsByYear(self, year, debug=False):
        baseurl  = self.getBase()
        suburl   = "college-football/standings/_/season"
        url      = join(baseurl, suburl, str(year))
        
        savename  = setFile(self.getSeasonDir(), str(year)+".p")
        if isFile(savename):
            return
        
        if debug:
            print("Downloading {0}".format(url))        
        getWebData(base=url, savename=savename, useSafari=False)
        sleep(10+2*random())


    def downloadTeamStandings(self, startYear=2003, endYear=2018, debug=False):
        for year in range(startYear, endYear+1):
            self.downloadTeamStandingsByYear(year, debug)
        
        
    def downloadTeamDataByYear(self, idval, name, year, debug=False):
        baseurl  = self.getBase()
        suburl   = "college-football/team/schedule/_/id/{0}/season".format(idval)
        url      = join(baseurl, suburl, str(year))
        
        outputdir = self.getYearlySeasonDir(year)
        savename  = setFile(outputdir, "{0}-{1}.p".format(name, year))
        if isFile(savename):
            return
        
        if debug:
            print("Downloading {0} to {1}".format(url, savename))
        getWebData(base=url, savename=savename, useSafari=False)
        sleep(15+2*random())
            
            
    def parseAndDownloadTeamYearlyStandings(self):
        files = findExt(self.getSeasonDir(), ext=".p", debug=False)
        for ifile in files:
            year     = getBaseFilename(ifile)
            htmldata = getFile(ifile)
            bsdata   = getHTML(htmldata)
            
            idVals = {}
            links  = bsdata.findAll("a")
            for link in links:
                attrs = link.attrs
                if attrs.get("data-clubhouse-uid") is not None:
                    href  = attrs['href']
                    name  = getBasename(href)
                    idval = getBasename(getDirname(href))
                    
                    if idVals.get(idval) is not None:
                        if idVals[idval] != name:
                            raise ValueError("Error in ID for this year!")
                    idVals[idval] = name

            for idVal,name in idVals.items():
                self.downloadTeamDataByYear(idVal, name, season=str(year), debug=True)
    #http://www.espn.com/college-football/team/schedule/_/id/201/season/2005"
            
            
    def parseTeamYearlyStandings(self, startYear=2003, endYear=2018, debug=False, verydebug=False):
        for year in range(startYear, endYear+1):
            seasonDir = self.getYearlySeasonDir(year)
            files = findExt(seasonDir, ext=".p", debug=False)
            
            seasonData = season(year)
            
            for ifile in files:
                nameyear = getBaseFilename(ifile)
                htmldata = getFile(ifile)
                bsdata   = getHTML(htmldata)
                teamName = nameyear.replace("-{0}".format(year), "")
                
                
                metadata = bsdata.find("meta", {"property": "og:url"})
                if metadata is None:
                    raise ValueError("Could not find basic team meta data for this file! {0}".format(ifile))
                    
                try:
                    content = metadata.attrs['content']
                    year    = getBasename(content)
                    teamID  = getBasename(getDirname(getDirname(content)))
                except:
                    raise ValueError("Could not get team year and ID from meta data: {0}".format(metadata))
                    
                if verydebug:
                    print(year,'\t',teamID,'\t',ifile)
                
                
                ## Create Team Object
                teamData = team(year=year, teamName=teamName, teamMascot=None, teamID=teamID)
                
                tables = bsdata.findAll("table", {"class": "Table2__table"})
                if verydebug:
                    print("\tFound {0} game tables".format(len(tables)))
                for it,table in enumerate(tables):
                    trs = table.findAll("tr")
                    
                    headers = trs[1]
                    headers = [x.text for x in headers.findAll("td") if x is not None]
                    
                    gameRows = trs[2:]
                    totalGames = len(gameRows)
                    
                    if verydebug:
                        print("\tFound {0} potential games".format(totalGames))
                    
                    for ig,tr in enumerate(gameRows):
                        tds = tr.findAll("td")
                        gameData = dict(zip(headers, tds))
                        extra    = {"OT": False, "Bowl": False}
                        
                        
                        ## Get the Date
                        try:
                            date = gameData["Date"]
                        except:
                            print(ifile)
                            raise ValueError("No date for this game! {0}".format(gameData))                            
                        date = date.text
                        
                        ## Only Keep Games With Regular Dates
                        try:
                            dateval = "{0} {1}".format(date.split(", ")[-1], year)
                            date    = getDateTime(dateval)
                        except:
                            date    = None
                        
                        if date is None:
                            continue
                        
                        ## Check for January Games (in the following year)
                        if date.month == 1:
                            date = addMonths(date, 12)
                            
                        
                        ## Get the Opponent
                        try:
                            opponent = gameData["Opponent"]
                        except:
                            raise ValueError("No opponent for this game! {0}".format(game))   
                            
                        try:
                            oppolink = opponent.find("a")
                            oppohref = oppolink.attrs['href']
                            opponame = getBasename(oppohref)
                            oppoID   = getBasename(getDirname(oppohref))
                        except:
                            opponame = opponent.text
                            oppoID   = 0
                            #raise ValueError("Could not find href in link! {0}".format(opponent))


                        
                        try:
                            gamespan = opponent.find("span", {"class": "pr2"})
                            gametype = gamespan.text
                        except:
                            raise ValueError("Could not find game type from {0}".format(opponent))
                        
                        if gametype == "vs":
                            location = teamID
                        elif gametype == "@":
                            location = oppoID
                        else:
                            raise ValueError("Location --> {0}".format(gametype))
                            
                            
                        if verydebug:
                            print("\t{0}/{1}\t{2}\t{3: <4}{4: <50}".format(ig, totalGames, printDateTime(date), gametype, opponame), end="\t")


                        
                        ## Get the Result
                        try:
                            result = gameData["Result"]
                        except:
                            raise ValueError("No result for this game! {0}".format(game))
                            
                        spans = result.findAll("span")
                        if len(spans) == 0:
                            continue
                        if len(spans) != 2:
                            raise ValueError("There are {0} spans in this row!: {1}".format(len(spans), result))
                        outcome = spans[0].text.strip()
                        score   = spans[1].text.strip()
                        
                        if score.endswith("OT"):
                            extra = {"OT": True}
                            score = score[:-3].strip()
                            
                        try:
                            scores  = [int(x) for x in score.split('-')]
                        except:
                            raise ValueError("Could not create integer scores from {0}".format(spans))

                        if outcome == 'W':                            
                            teamScore  = scores[0]
                            oppoScore  = scores[1]
                            teamResult = "W"
                            oppoResult = "L"
                        elif outcome == "L":
                            teamScore = scores[1]
                            oppoScore = scores[0]
                            teamResult = "L"
                            oppoResult = "W"
                        elif outcome == "T":
                            teamScore = scores[0]
                            oppoScore = scores[1]
                            teamResult = "T"
                            oppoResult = "T"
                        else:
                            raise ValueError("Did not recognize game outcome {0}".format(outcome))


                        ## Get the Game
                        try:
                            gamelink = result.find("a")
                            gamehref = gamelink.attrs['href']
                            gameID   = getBasename(gamehref)
                        except:
                            raise ValueError("Could not find href in link! {0}".format(result))

                            
                        if verydebug:
                            print("{0}  {1}".format(teamResult, "-".join(str(x) for x in [teamScore,oppoScore])))
                            
                            
                        ## Create game object
                        gameData = game(gameID=gameID, date=date, teamA=teamID, teamB=oppoID,
                                        teamAResult=teamResult, teamBResult=oppoResult,
                                        teamAScore=teamScore, teamBScore=oppoScore, location=location)
                        
                        
                        ## Append game to team data
                        teamData.addGame(gameData)
                        

                ## Show Summary
                teamData.setStatistics()
                if debug:
                    teamData.summary()
                    if teamData.ngames == 0:
                        removeFile(ifile, debug=True)
                        
                seasonData.addTeam(teamData)
                
            #http://www.espn.com/college-football/team/schedule/_/id/201/season/2005"

            savename = setFile(self.getResultsDir(), "{0}.p".format(year))            
            saveFile(idata=seasonData, ifile=savename, debug=True)
            
            
    def downloadGameData(self, debug=False, verydebug=False):
        resultsDir = self.getResultsDir()
        files = findExt(resultsDir, ext=".p", debug=False)

        gameType = "playbyplay"
        print("Sleeping for 5 seconds...")
        sleep(5)

        for ifile in files:
            seasonData = getFile(ifile)
            year = seasonData.getYear()
            if year not in [2016,2017,2018]:
                continue
            gamesDir = self.getYearlyGamesDir(year)
            
            teams = seasonData.teams
            for teamID,teamData in teams.items():
                teamGames = teamData.games
                for gameData in teamGames:
                    gameResult = gameData["Result"]
                    gameObject = gameData["Game"]
                    gameID     = gameObject.gameID
                    
                    prevLocation = "/Volumes/Seagate/Football/Games/Plays/{0}.html".format(gameID)
                    if isFile(prevLocation):
                        savename = setFile(gamesDir, "{0}.p".format(gameID))
                        if not isFile(savename):
                            data = open(prevLocation, "r").read()
                            saveFile(idata=data, ifile=savename, debug=True)
                            continue
                            

                    url="http://www.espn.com/college-football/playbyplay?gameId={0}".format(gameID)
                    savename = setFile(gamesDir, "{0}.p".format(gameID))
                    
                    if isFile(savename):
                        from os.path import getsize                    
                        size = round(getsize(savename)/1e3)
                        if size < 1:
                            removeFile(savename, debug=True)
                    
                    if not isFile(savename):
                        getWebData(base=url, savename=savename, dtime=6, useSafari=True, debug=True)
                        sleep(6)

In [11]:
hist = historical()

In [12]:
hist.downloadGameData()
#http://www.espn.com/college-football/playbyplay?gameId=401022514

Sleeping for 5 seconds...
Removing /Volumes/Blue/Football/games/2017/400933853.p
  --> This file is 77.0kB.
/Volumes/Blue/Football/games/2017/400933853.p size -> 77 kB
Removing /Volumes/Blue/Football/games/2018/401013032.p
  --> This file is 77.0kB.
/Volumes/Blue/Football/games/2018/401013032.p size -> 77 kB
Removing /Volumes/Blue/Football/games/2018/401013180.p
  --> This file is 75.7kB.
/Volumes/Blue/Football/games/2018/401013180.p size -> 76 kB
Removing /Volumes/Blue/Football/games/2018/401032054.p
  --> This file is 77.4kB.
/Volumes/Blue/Football/games/2018/401032054.p size -> 77 kB
Removing /Volumes/Blue/Football/games/2018/401013092.p
  --> This file is 74.6kB.
/Volumes/Blue/Football/games/2018/401013092.p size -> 75 kB


In [None]:

########### Python 3.2 #############
import http.client, urllib.request, urllib.parse, urllib.error, base64

headers = {
    # Request headers
    'Ocp-Apim-Subscription-Key': '{subscription key}',
}

params = urllib.parse.urlencode({
})

try:
    conn = http.client.HTTPSConnection('api.fantasydata.net')
    conn.request("GET", "/v3/cfb/stats/{format}/GamesByDate/{date}?%s" % params, "{body}", headers)
    response = conn.getresponse()
    data = response.read()
    print(data)
    conn.close()
except Exception as e:
    print("[Errno {0}] {1}".format(e.errno, e.strerror))

####################################

In [None]:
http://www.espn.com/college-football/game/_/gameId/401022514
data = getFile("/Volumes/Blue/Football/games/2003/232562006.p")
data

In [None]:
#hist.downloadTeamStandings()

In [None]:
#hist.parseAndDownloadTeamYearlyStandings()

In [None]:
#hist.parseTeamYearlyStandings(debug=True, verydebug=False)

In [None]:
def fillConv():
    convs={}
    convs["Miami (FL)"] = "Miami (Florida)"
    convs["Miami (OH)"] = "Miami (Ohio)"
    convs["Florida Intl"] = "Florida International"
    convs["UCF"] = "Central Florida"
    convs["San José State"] = "San Jose State"
    convs["San Jos\xe9 State"] = "San Jose State"
    convs["UNLV"] = "Nevada-Las Vegas"
    convs["Kent State"] = "Kent"
    convs["BYU"] = "Brigham Young"
    convs["LSU"] = "Louisiana State"
    convs["Louisiana Monroe"] = "Louisiana-Monroe"
    convs["Louisiana Lafayette"] = "Louisiana-Lafayette"
    convs["TCU"] = "Texas Christian"
    convs["MTSU"] = "Middle Tennessee State"
    convs["Middle Tennessee"] = "Middle Tennessee State"
    convs["SMU"] = "Southern Methodist"
    convs["UTEP"] = "Texas-El Paso"
    convs["Texas San Antonio"] = "Texas-San Antonio"
    convs["BGSU"] = "Bowling Green State"
    convs["Bowling Green"] = "Bowling Green State"
    #convs["NCSU"] = "North Carolina State"
    convs["NC State"] = "North Carolina State"
    convs["USC"] = "Southern California"
    convs["Ole Miss"] = "Mississippi"
    
    convs["Presbyterian College"] = "Presbyterian"
    convs["The Citadel"] = "Citadel"
    convs["UC Davis"] = "California-Davis"
    convs["VMI"] = "Virginia Military Institute"
    convs["Stephen F Austin"] = "Stephen F. Austin"
    
    invconvs={}
    for k,v in convs.iteritems():
        invconvs[v] = k
    return convs, invconvs

def InvTeamConv(team):
    convs, invconvs = fillConv()
    if invconvs.get(team):
        return invconvs[team]
    return team
    
def TeamConv(team):
    convs, invconvs = fillConv()
    if convs.get(team):
        return convs[team]
    return team

In [None]:
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Sun Dec  3 15:00:12 2017

@author: tgadfort
"""

import sys
if '/Users/tgadfort/Python' not in sys.path:
    sys.path.insert(0, '/Users/tgadfort/Python')

from fsio import setFile, isFile
from fileio import save
from path import getTeamsDBDir
from htmlParser import getHTML
from download import getURL

def getConferenceBaseURL():
    return "http://www.espn.com/college-football/standings/"

def getFBSURL():
    return getConferenceBaseURL()+"_/season/"
    
def getFCSURL():
    return getConferenceBaseURL()+"_/view/fcs/season/"
    
def getDIIURL():
    return getConferenceBaseURL()+"_/view/d2/season/"
    
def getDIIIURL():
    return getConferenceBaseURL()+"_/view/d3/season/"


def downloadConferenceData(yearID, debug = True):
    url = getFBSURL()+str(yearID)
    savename = setFile(getTeamsDBDir(), "FBS-"+str(yearID)+".p")
    if not isFile(savename):
        getURL(url, savename, debug)

    url = getFCSURL()+str(yearID)
    savename = setFile(getTeamsDBDir(), "FCS-"+str(yearID)+".p")
    if not isFile(savename):
        getURL(url, savename, debug)

    url = getDIIURL()+str(yearID)
    savename = setFile(getTeamsDBDir(), "DII-"+str(yearID)+".p")
    if not isFile(savename):
        getURL(url, savename, debug)

    url = getDIIIURL()+str(yearID)
    savename = setFile(getTeamsDBDir(), "DIII-"+str(yearID)+".p")
    if not isFile(savename):
        getURL(url, savename, debug)


def getConferenceData(yearID, confName, debug = True):
    savename = setFile(getTeamsDBDir(), confName+"-"+str(yearID)+".p")
    bsdata   = getHTML(savename)

    conferences = []
    h2s = bsdata.findAll("h2", {"class": "table-caption"})
    for h2 in h2s:
        span = h2.find("span", {"class": "long-caption"})
        if span:
            conferences.append(span.text)

    teamData = {}
    teamMap  = {}
                    
    tables = bsdata.findAll("table")
    for i,table in enumerate(tables):
        conferenceName = conferences[i]
        headers = []
        headers.append("Team")
        ths = table.findAll("th")
        for th in ths:
            span = th.find("span", {"class": "tooltip"})
            if span:
                headers.append(span.text)

        trs = table.findAll("tr", {"class": "standings-row"})
        for j,tr in enumerate(trs):
            tds = tr.findAll("td")
            
            ## Name
            try:
                ref      = tds[0].find("a").attrs['href']
                teamID   = ref.split("/")[-2]
                #teamName = ref.split("/")[-1]
                niceName = tds[0].find("span", {"class": "team-names"}).text
            except:
                #if debug:
                #print "Unknown team",tds[0]
                #print "Skipping..."
                continue

            teamMap[teamID] = {"CONF": conferenceName, "DIV": confName}
            if teamMap.get(conferenceName) == None:
                teamMap[conferenceName] = []
            teamMap[conferenceName].append(teamID)

            if teamMap.get(confName) == None:
                teamMap[confName] = []
            teamMap[confName].append(teamID)
            
            teamData[teamID] = {}

            localData = []
            localData.append(niceName)
            #print niceName,'\t',teamID
            for k in range(1,len(tds)):
                #print j,'\t',k,'\t',tds[k].text
                localData.append(tds[k].text)
            teamData[teamID] = dict(zip(headers, localData))
            teamData[teamID]["Conference"] = conferenceName
            teamData[teamID]["Division"]   = confName
                    
            

    return teamMap, teamData


def createConferenceData(yearID, debug = False):
    teamMapFBS,teamDataFBS   = getConferenceData(yearID, 'FBS', debug)
    teamMapFCS,teamDataFCS   = getConferenceData(yearID, 'FCS', debug)
    teamMapDII,teamDataDII   = getConferenceData(yearID, 'DII', debug)
    teamMapDIII,teamDataDIII = getConferenceData(yearID, 'DIII', debug)
    
    teamMap  = teamMapFBS.copy()
    teamMap.update(teamMapFCS)
    teamMap.update(teamMapDII)
    teamMap.update(teamMapDIII)

    teamData = teamDataFBS.copy()
    teamData.update(teamDataFCS)
    teamData.update(teamDataDII)
    teamData.update(teamDataDIII)
    
    savename = setFile(getTeamsDBDir(), str(yearID)+"-Map.p")
    save(savename, teamMap, debug = True)
    
    savename = setFile(getTeamsDBDir(), str(yearID)+"-Data.p")
    save(savename, teamData, debug = True)

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Wed Dec  9 17:36:45 2015

@author: tgadfort
"""

import TeamConv as tc
import teamNum as tn
import getHTML as web
import os
import glob
import random


def getURL(basepath, url, htmlid, force):
#    result = web.checkURL(url)
#    if result == False:
#        print ' --->',url,' = ',result
#        return result


    if os.path.exists(basepath):
        savehtml=basepath+"/"+str(htmlid)+".html"
    else:
        print ' ---> No save path'
        print basepath
        print htmlid
        return False
    
    if os.path.exists(savehtml) and force == False:
        print ' ---> Save path exists, but force -> FALSE'
        return False
        
    result=web.getHTML(url, savehtml)
    if result:
        print ' --->',url,'  in  ',savehtml
    else:
        print " Did not download",url



def parseHistoricalSchedule(schedule):
    vals = schedule.split("recap?id=")
    games={}
    for val in vals:
        pos    = val.find("\">")
        gameid = val[:pos]
        try:
            games[gameid] = int(gameid)
        except:
            continue
        
    return games
    
    


def parseSchedule(base, schedule):
    vals = schedule.split("<li>")
    games={}
    for val in vals:
        if val.find("gameId=") != -1:
            idval="href=\"/college-football/game?gameId="
            pos1=val.find(idval)
            pos2=val.find("\">", pos1+1)
            gamehtml=val[pos1+6:pos2]
            gameid=gamehtml.split("=")[1]
            gamehtml = base + gamehtml
            if games.get(gameid) == None:
                games[gameid] = gamehtml
    return games
    
 
def getHistoricalSchedule(basepath, teamid, force):
    years=range(2002, 2015)
    for year in years:
        yearid = str(year)
        url="http://espn.go.com/college-football/team/schedule/_/id/"+teamid+"/year/"+yearid + "/"
        htmlid = teamid + "-" + yearid
        getURL(basepath, url, htmlid, force)

    
def getTeamURL(teamnum):
    ttypes={}
    ttypes["Team"] = "team"
    urls=[]
    for k,v in ttypes.iteritems():
        url="http://espn.go.com/college-football/"+v+"/_/id/"+str(teamnum)
        urls.append([k,url])
    return urls


def getGameURLs(gameid):
    gtypes={}
    gtypes["Plays"]    = "playbyplay"
    gtypes["Matchup"]  ="matchup"
    gtypes["BoxScore"] = "boxscore"
    urls=[]
    for k,v in gtypes.iteritems():
        url="http://scores.espn.go.com/college-football/"+v+"?gameId="+str(gameid)
        urls.append([k,url])
    return urls



def getTeam(teamnum, locbase, test=False, force=False):
    urls = getTeamURL(teamnum)
    for url in urls:
        basepath = locbase + "/" + url[0]
        getURL(basepath, url[1], teamnum, force)
    
    
def getGame(gameid, locbase, test=False, force=False):
    urls = getGameURLs(gameid)
    for url in urls:
        basepath = locbase + "/" + url[0]
        getURL(basepath, url[1], gameid, force)
        


def getTeamHistoricalGames(schedulebase, teamnum):
    teamhtmls = glob.glob(schedulebase + "/" + str(teamnum) + "-*")
    teamgames={}
    for teamhtml in teamhtmls:
        teamyear  = os.path.basename(teamhtml).split(".")[0]
        team,year = teamyear.split("-")

        #print team,year
        #print teamhtml

        fdata = open(teamhtml).readlines()
        fdata = [x.strip('\r\n') for x in fdata]
        fdata = [x.strip('\t') for x in fdata]
        for line in fdata:
            if line.find("<ul class=\"game-schedule\">") != -1:
                games = parseHistoricalSchedule(line)
                teamgames.update(games)
#                for k,v in games.iteritems():
#                    teamgames[k] = getGameURLs(k)
                    
    return teamgames
    

def getTeamGames(locbase, webbase, teamnum):
    teamhtml=locbase + "/" + str(teamnum) + ".html"
    if not os.path.exists(teamhtml):
        print "No team file:",teamhtml
        return False
    fdata = open(teamhtml).readlines()
    fdata = [x.strip('\r\n') for x in fdata]
    fdata = [x.strip('\t') for x in fdata]
    for line in fdata:
        if line.find("<section class=\"club-schedule\" data-module=\"schedule\"") != -1:
            games = parseSchedule(webbase, line)
            return games
    return None


def parseTeam(base, teamhtml, fbsteams):
    fdata = open(teamhtml).readlines()
    fdata = [x.strip('\r\n') for x in fdata]
    fdata = [x.strip('\t') for x in fdata]

    
    for line in fdata:
        if line.find("<meta property=\"og:title\" content=\"") != -1:
            pos1=line.find("content=")
            pos2=line.find("College Football")
            name=line[pos1+9:pos2].strip()
            vals=name.split()
            fbsteam=False
            test1=vals[0]
            test1=tc.TeamConv(test1)
            test2=" ".join(vals[:2])
            test2=tc.TeamConv(test2)
            test3=test2+" State"
            test3=tc.TeamConv(test3)
            if fbsteams.get(test1):
                fbsteam = True
                print test1,"is an FBS team"
            if fbsteams.get(test2):
                fbsteam = True
                print test2,"is an FBS team"
            if fbsteams.get(test3):
                fbsteam = True
                print test3,"is an FBS team"
            if not fbsteam:
                print "I don't think [",name,"] is an FBS team"
                return
                
        if line.find("<section class=\"club-schedule\" data-module=\"schedule\"") != -1:
            games = parseSchedule(base, line)
            for game in games:
                idval = game.split("=")[1]
                playbyplay = game.replace("game?", "playbyplay?")
                saveval="Games/Plays/"+idval+".html"
                if os.path.exists(saveval):
                    #print "Already have",saveval
                    continue
                gamehtml = playbyplay
                print gamehtml,'--->',saveval
                getHTML(gamehtml, saveval)

                

def stripTeamNum(line):
    pos1=line.find("src=\"")
    pos2=line.find(".png", pos1+1)
    logoaddr=line[pos1+5:pos2+4]
    pos=logoaddr.rfind("/")
    try:
        teamnum=int(logoaddr[pos+1:-4])
    except:
        print "Could not get team number",logoaddr
        f()
    return teamnum,logoaddr    
    
    
def getTeamNum(line):
    logoname="class=\"logo\">"
    if line.find(logoname) != -1:
        teamnum,logoaddr=stripTeamNum(line)
        return teamnum,logoaddr    
    return -1,"NoLogoAddr"





In [None]:
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 14 15:27:02 2015

@author: tgadfort
"""


import TeamConv as tc
import teamNum as tn
import getHTML as web
import os
import json


def parseHistoricalScores(scorepath, teampath, histjsonfile):
    
    historicaldata={}
    years=range(1869, 2020)
    for year in years:
        yearid=str(year)
        key = yearid
        
        teamfile  = teampath + "/" + yearid + ".html"
        scorefile = scorepath + "/" + yearid + ".html"

        if os.path.exists(teamfile) and os.path.exists(scorefile):
            teamdata = open(teamfile).readlines()
            teamdata = [x.strip('\r\n') for x in teamdata]

            scoredata = open(scorefile).readlines()
            scoredata = [x.strip('\r\n') for x in scoredata]
        else:
            continue
        

        yearscores=[]
        yearteams={}
        
        conference=None
        for team in teamdata[1:]:
            if str.isalpha(team[0]) == True:
                continue
            if str.isalpha(team[1]) == True:
                conference=team.strip()
                continue
            if str.isalpha(team[2]) == True:
                teamname=tc.TeamConv(team.strip())
                yearteams[teamname] = conference
        
        for score in scoredata:
            vals = score.split()
            
            date = None
            team1 = None
            team2 = None
            score1 = None
            score2 = None
            
            
            ## Date
            date = vals[0]
            vals = vals[1:]

            ## Team1
            team1=[]
            j = 0
            while j < len(vals):
                try:
                    score1 = int(vals[j])
                    team1  = " ".join(team1)
                    vals   = vals[j+1:]
                    break
                except:
                    team1.append(vals[j])
                j += 1

            ## Team2
            team2=[]
            j = 0
            while j < len(vals):
                try:
                    score2 = int(vals[j])
                    team2  = " ".join(team2)
                    vals   = vals[j+1:]
                    break
                except:
                    team2.append(vals[j])
                j += 1
                
            comment = None
            if len(vals) > 0:
                comment = " ".join(vals)

            if score2 == None:
                lastval = vals[-1]
                testscore = lastval[-4:]
                try:
                    testscore = int(testscore)                    
                    team2     = lastval[:-4]
                    score2    = testscore
                    vals      = []
                except:
                    testscore = lastval[-3:]
                if not score2:
                    try:
                        testscore = int(testscore)
                        team2     = lastval[:-3]
                        score2    = testscore
                        vals      = []
                    except:
                        testscore = lastval[-2:]
                if not score2:
                    try:
                        testscore = int(testscore)
                        team2     = lastval[:-2]
                        score2    = testscore
                        vals      = []
                    except:
                        testscore = lastval[-1:]
                                        
                    
            team1 = tc.TeamConv(team1)
            team2 = tc.TeamConv(team2)
            
            sval={}
            sval['date']    = date
            sval['team1']   = team1
            try:
                sval['conf1']   = yearteams[team1]
            except:
                yearteams[team1] = "DivII"
                sval['conf1']   = yearteams[team1]
            sval['score1']  = score1
            sval['team2']   = team2
            try:
                sval['conf2']   = yearteams[team2]
            except:
                yearteams[team2] = "DivII"
                sval['conf2']   = yearteams[team2]
            sval['score2']  = score2
            sval['comment'] = comment
            yearscores.append(sval)


        historicaldata[key] = {}
        historicaldata[key]['teams']  = yearteams
        historicaldata[key]['scores'] = yearscores
        print '\t',key
        
    print '\tWrote',len(historicaldata),"scores to",histjsonfile
    json.dump(historicaldata, open(histjsonfile, "w"))

        

def getHistoricalTeams(basepath, force):
    years=range(1869, 2014)
    for year in years:
        yearid=str(year)
        url="http://wilson.engr.wisc.edu/rsfc/history/howell/cf"+yearid+"tms.txt"
        result = web.checkURL(url)
        if not result:
            continue
        
        if os.path.exists(basepath):
            savehtml=basepath+"/"+yearid+".html"
        else:
            print ' ---> No save path',basepath,'\t',yearid
            return False
        
        if os.path.exists(savehtml) and force == False:
            print ' ---> Save path exists, but force -> FALSE'
            return False
        
        result=web.getHTML(url, savehtml)
        if result:
            print ' --->',url,'  in  ',savehtml
        else:
            print " Did not download",url



def getHistoricalScores(basepath, force):
    years=range(1869, 2014)
    for year in years:
        yearid=str(year)
        url="http://wilson.engr.wisc.edu/rsfc/history/howell/cf"+yearid+"gms.txt"
        result = web.checkURL(url)
        if not result:
            continue
        
        if os.path.exists(basepath):
            savehtml=basepath+"/"+yearid+".html"
        else:
            print ' ---> No save path',basepath,'\t',yearid
            return False
        
        if os.path.exists(savehtml) and force == False:
            print ' ---> Save path exists, but force -> FALSE'
            return False
        
        result=web.getHTML(url, savehtml)
        if result:
            print ' --->',url,'  in  ',savehtml
        else:
            print " Did not download",url



def getScores(basepath, savehtml, yearid, force):
    url = 'http://www.jhowell.net/cf/scores/Sked2015.htm'
    result = web.checkURL(url)
    if result == False:
        print ' --->',url,' = ',result
        return result

    if os.path.exists(savehtml) and force == False:
        print ' ---> Save path exists, but force -> FALSE'
        return False
        
    result=web.getHTML(url, savehtml)
    if result:
        print ' --->',url,'  in  ',savehtml
    else:
        print " Did not download",url
        
    return True


def parseScores(txt):
    fdata = open(txt).readlines()
    fdata = [x.strip('\r\n') for x in fdata]

    yeardata={}

    i=0
    while i < len(fdata):
        line = fdata[i]
        tdata=[]
        if line.find("<table") != -1:
            while line.find("</table>") == -1:
                tdata.append(line)
                i += 1
                line = fdata[i]

            teamdata = parseTable(tdata)
            yeardata[teamdata['name']] = {}
            yeardata[teamdata['name']]['conf'] = teamdata['conf']
            yeardata[teamdata['name']]['games'] = teamdata['games']
        i += 1
    return yeardata



def parseTable(table):
    table = [x.replace("</tr>", "") for x in table]
    table = [x.replace("</td>", "") for x in table]

    teamdata={}
    teamdata["games"] = []
    
    table = table[1:]
    name,conf = getName(table[0])
    teamdata["name"] = name
    teamdata["conf"] = conf

    table = table[1:]

    for line in table:
        gamedata = getGame(line)
        if gamedata:
            teamdata["games"].append(gamedata)

    return teamdata
    f()

    
    
def getName(line):
    pos = line.find("<p align=\"center\">")
    if pos == -1:
        print "Could not parse:",line
        f()
    name = line[pos+18:]
    
    pos = name.rfind("(")
    conf = name[pos:]
    conf = conf.replace("(", "")
    conf = conf.replace(")", "")
    name = name[:pos-1]
    return name,conf


def getGame(line):
    line = line.replace("<td align=\"right\">", ":")
    line = line.replace("<td>", ":")
    line = line.replace("<tr>", "")
    linevals = line.split(":")
    site = None
    if len(linevals) == 8:
        try:
            dummy,date,day,home,opp,res,score,against = linevals
        except:
            print "SPLIT ERROR:",line
            f()
    elif len(linevals) == 9:
        try:
            dummy,date,day,home,opp,res,score,against,site = linevals
        except:
            print "SPLIT ERROR:",line
            f()
    elif len(linevals) == 10:
        try:
            dummy,date,day,home,opp,res,score,against,site,comment = linevals
        except:
            print "SPLIT ERROR:",line
            f()
    
    opp = opp.replace("*", "")
    if home == "vs.":
        home = 1
    elif home == "@":
        home = -1
    else:
        print "ERROR with Home:",home
        f()
    if site != None:
        home = 0

    if len(res) == 0:
        return None

    try:
        game={}
        game['date'] = date
        game['day'] = day
        game['home'] = home
        game['opponent'] = opp
        game['result'] = res
        game['score'] = int(score)
        game['against'] = int(against)
    except:
        print "DICT ERROR\t",date,day,home,opp,res,score,against
        f()
        

    return game
    
    


##########################################
#
# This is the output line to the csv file
#
##########################################
def writeLine(outfile, Date, Team1, Team2, Score1, Score2, Winner):
    lout=[]
    lout.append(Date)
    lout.append(Team1)
    lout.append(Team2)
    lout.append(Score1)
    lout.append(Score2)
    lout.append(Winner)
    lout=[str(x) for x in lout]
    outfile.write(",".join(lout))
    outfile.write("\n")
    


def writeFile(fname, yeardata):
    f=open(fname, "w")
    writeLine(f, "Date", "Team1", "Team2", "Score1", "Score2", "Winner")
    nline=0
    teams=sorted(yeardata.keys())
    for team in teams:
        games = yeardata[team]["games"]
        for game in games:
            if game['result'] == 'W':
                writeLine(f, game['date'], team, game['opponent'], game['score'], game['against'], team)
            elif game['result'] == 'L':
                writeLine(f, game['date'], team, game['opponent'], game['score'], game['against'], game['opponent'])
            else:
                writeLine(f, game['date'], team, game['opponent'], game['score'], game['against'], team)
            nline += 1
    print "\tWrote",nline,"lines to",fname
    f.close()

In [None]:
from lxml import html
import requests
import json
import csv



def TeamConv(team):
    convs={}
    convs["Miami (FL)"] = "Miami (Florida)"
    convs["Miami (OH)"] = "Miami (Ohio)"
    convs["Florida Intl"] = "Florida International"
    convs["UCF"] = "Central Florida"
    convs["San José State"] = "San Jose State"
    convs["San Jos\xe9 State"] = "San Jose State"
    convs["UNLV"] = "Nevada-Las Vegas"
    convs["Kent State"] = "Kent"
    convs["BYU"] = "Brigham Young"
    convs["LSU"] = "Louisiana State"
    convs["Louisiana Monroe"] = "Louisiana-Monroe"
    convs["Louisiana Lafayette"] = "Louisiana-Lafayette"
    convs["TCU"] = "Texas Christian"
    convs["MTSU"] = "Middle Tennessee State"
    convs["Middle Tennessee"] = "Middle Tennessee State"
    convs["SMU"] = "Southern Methodist"
    convs["UTEP"] = "Texas-El Paso"
    convs["Texas San Antonio"] = "Texas-San Antonio"
    convs["BGSU"] = "Bowling Green State"
    convs["Bowling Green"] = "Bowling Green State"
    convs["NCSU"] = "North Carolina State"
    convs["NC State"] = "North Carolina State"
    convs["USC"] = "Southern California"
    convs["Ole Miss"] = "Mississippi"
    
    convs["Presbyterian College"] = "Presbyterian"
    convs["The Citadel"] = "Citadel"
    convs["UC Davis"] = "California-Davis"
    convs["VMI"] = "Virginia Military Institute"
    convs["Stephen F Austin"] = "Stephen F. Austin"
    if convs.get(team):
        return convs[team]
    return team


##########################################
#
# This is the output line to the csv file
#
##########################################
def writeLine(outfile, Date, Team1, Team2, Score1, Score2, Winner):
    lout=[]
    lout.append(Date)
    lout.append(Team1)
    lout.append(Team2)
    lout.append(Score1)
    lout.append(Score2)
    lout.append(Winner)
    lout=[str(x) for x in lout]
    outfile.write(",".join(lout))
    outfile.write("\n")
    


##########################################
#
# Write file based on start/end date
#
##########################################
def writeAbrvFile(fname, abrvdata):
    f=open(fname, "w")
    nline = 0
    teams = sorted(abrvdata.keys())
    for team in teams:
        abrv = abrvdata[team]["Abrv"]
        city = abrvdata[team]["City"]
        state = abrvdata[team]["State"]

        f.write(team+"\t"+city+"\t"+state+"\t"+abrv+"\n")
        nline += 1
    print "\tWrote",nline,"lines to",fname
    f.close()

def writeConfFile(fname, yeardata):
    f=open(fname, "w")
    teams=sorted(yeardata.keys())
    nline = 0
    for team in teams:
        conf = yeardata[team]["conf"]
        f.write(team+"\t"+conf+"\n")
        nline += 1
    print "\tWrote",nline,"lines to",fname
    f.close()
    

def writeGraphFile(fname, yeardata):
    f=open(fname, "w")
    teams=sorted(yeardata.keys())
    nline=0
    for team in teams:
        games = yeardata[team]["games"]
        opponents=[]
        opponents.append(team)
        for game in games:
            opponent = game['opponent']
            f.write(team+"\t"+opponent+"\n")
            nline += 1
    print "\tWrote",nline,"lines to",fname
    f.close()


def writeFile(fname, yeardata):
    f=open(fname, "w")
    writeLine(f, "Date", "Team1", "Team2", "Score1", "Score2", "Winner")
    nline=0
    teams=sorted(yeardata.keys())
    for team in teams:
        games = yeardata[team]["games"]
        for game in games:
            if game['result'] == 'W':
                writeLine(f, game['date'], team, game['opponent'], game['score'], game['against'], team)
            elif game['result'] == 'L':
                writeLine(f, game['date'], team, game['opponent'], game['score'], game['against'], game['opponent'])
            else:
                writeLine(f, game['date'], team, game['opponent'], game['score'], game['against'], team)
            nline += 1
    print "\tWrote",nline,"lines to",fname
    f.close()




def getScores(htm, txt):
    page  = requests.get('http://www.jhowell.net/cf/scores/Sked2015.htm')
    fname = txt
    f = open(fname, "w")
    f.write(page.text)
    f.close()


def getAbrv(htm, txt):
    page  = requests.get('https://en.wikipedia.org/wiki/List_of_colloquial_names_for_universities_and_colleges_in_the_United_States')
    fname = txt
    f = open(fname, "w")
    f.write(page.text)
    f.close()


def getName(line):
    pos = line.find("<p align=\"center\">")
    if pos == -1:
        print "Could not parse:",line
        exit()
    name = line[pos+18:]
    
    pos = name.rfind("(")
    conf = name[pos:]
    conf = conf.replace("(", "")
    conf = conf.replace(")", "")
    name = name[:pos-1]
    return name,conf


def getGame(line):
    line = line.replace("<td align=\"right\">", ":")
    line = line.replace("<td>", ":")
    line = line.replace("<tr>", "")
    linevals = line.split(":")
    site = None
    if len(linevals) == 8:
        try:
            dummy,date,day,home,opp,res,score,against = linevals
        except:
            print "SPLIT ERROR:",line
            exit()
    elif len(linevals) == 9:
        try:
            dummy,date,day,home,opp,res,score,against,site = linevals
        except:
            print "SPLIT ERROR:",line
            exit()

    opp = opp.replace("*", "")
    if home == "vs.":
        home = 1
    elif home == "@":
        home = -1
    else:
        print "ERROR with Home:",home
        exit()
    if site != None:
        home = 0

    if len(res) == 0:
        return None

    try:
        game={}
        game['date'] = date
        game['day'] = day
        game['home'] = home
        game['opponent'] = opp
        game['result'] = res
        game['score'] = int(score)
        game['against'] = int(against)
    except:
        print "DICT ERROR\t",date,day,home,opp,res,score,against
        exit()
        

    return game    


def parseTable(table):
    table = [x.replace("</tr>", "") for x in table]
    table = [x.replace("</td>", "") for x in table]

    teamdata={}
    teamdata["games"] = []
    
    table = table[1:]
    name,conf = getName(table[0])
    teamdata["name"] = name
    teamdata["conf"] = conf

    table = table[1:]

    for line in table:
        gamedata = getGame(line)
        if gamedata:
            teamdata["games"].append(gamedata)

    return teamdata
    exit()

    

def parseScores(txt):
    fdata = open(txt).readlines()
    fdata = [x.strip('\r\n') for x in fdata]

    yeardata={}

    i=0
    while i < len(fdata):
        line = fdata[i]
        tdata=[]
        if line.find("<table") != -1:
            while line.find("</table>") == -1:
                tdata.append(line)
                i += 1
                line = fdata[i]

            teamdata = parseTable(tdata)
            yeardata[teamdata['name']] = {}
            yeardata[teamdata['name']]['conf'] = teamdata['conf']
            yeardata[teamdata['name']]['games'] = teamdata['games']
            print len(yeardata)
        i += 1
    return yeardata


def parseAbrv(txt):
    print txt
    fdata = open(txt).readlines()
    fdata = [x.strip('\r\n') for x in fdata]
    fdata = [x.strip('\t') for x in fdata]
    i=0
    adata={}
    while i < len(fdata):
        line = fdata[i]
        if line.find('----') != -1:
            i += 1
            continue
        vals = line.split(' = ')
        if len(vals) == 2:
            adata[vals[0]] = vals[1]
        else:
            print line,'\t',vals
            exit()
        i += 1

    return adata

def parseLocation(locdata, abrvdata):
    print locdata
    fdata = open(locdata).readlines()
    fdata = [x.strip('\r\n') for x in fdata]
    fdata = [x.strip('\t') for x in fdata]
    states={}
    missing={}
    print abrvdata.keys()
    fteam={}
    for team in abrvdata.keys():
        fteam[team] = {}
        fteam[team]["Abrv"] = abrvdata[team]
        fteam[team]["City"] = None
        fteam[team]["State"] = None
    
    for line in fdata:
        lvals = line.split('\t')
        name = lvals[0]
        mascot = lvals[1]
        city = lvals[2]
        state = lvals[3]
        if state == "Hawai'i":
            state = "Hawaii"
        conf = lvals[4]
        states[state] = 1
        
        if fteam.get(name) == None:
            print "#",name,city,state
            print "            fteam[\""+name+"\"][\"City\"] = "
            print "            fteam[\""+name+"\"][\"State\"] = "
        else:               
            fteam[name]["City"] = city
            fteam[name]["State"] = state
        
    print sorted(fteam.keys())
    
    fteam["Army"]["City"] = "West Point"
    fteam["Army"]["State"] = "New York"
    fteam["Bowling Green State"]["City"] = "Bowling Green"
    fteam["Bowling Green State"]["State"] = "Ohio"
    fteam["Brigham Young"]["City"] = "Provo"
    fteam["Brigham Young"]["State"] = "Utah"
    fteam["Central Florida"]["City"] = "Orlando"
    fteam["Central Florida"]["State"] = "Florida"
    fteam["Kent"]["City"] = "Kent"
    fteam["Kent"]["State"] = "Ohio"
    fteam["Louisiana State"]["City"] = "Baton Rouge"
    fteam["Louisiana State"]["State"] = "Louisiana"
    fteam["Miami (Florida)"]["City"] = "Coral Gables"
    fteam["Miami (Florida)"]["State"] = "Florida"
    fteam["Middle Tennessee State"]["City"] = "Murfreesboro"
    fteam["Middle Tennessee State"]["State"] = "Tennessee"
    fteam["Mississippi"]["City"] = "Oxford"
    fteam["Mississippi"]["State"] = "Mississippi"
    fteam["North Carolina State"]["City"] = "Raleigh"
    fteam["North Carolina State"]["State"] = "North Carolina"
    fteam["Northern Illinois"]["City"] = "DeKalb"
    fteam["Northern Illinois"]["State"] = "Illinois"
    fteam["Southern California"]["City"] = "Los Angeles"
    fteam["Southern California"]["State"] = "California"
    fteam["Southern Methodist"]["City"] = "University Park"
    fteam["Southern Methodist"]["State"] = "Texas"
    fteam["Southern Mississippi"]["City"] = "Hattiesburg"
    fteam["Southern Mississippi"]["State"] = "Mississippi"
    fteam["Texas Christian"]["City"] = "Fort Worth"
    fteam["Texas Christian"]["State"] = "Texas"
    fteam["Hawaii"]["City"] = "Honolulu"
    fteam["Hawaii"]["State"] = "Hawaii"
    fteam["Miami (Ohio)"]["City"] = "Oxford"
    fteam["Miami (Ohio)"]["State"] = "Ohio"
    fteam["Texas-El Paso"]["City"] = "El Paso"
    fteam["Texas-El Paso"]["State"] = "Texas"
    fteam["Texas-San Antonio"]["City"] = "San Antonio"
    fteam["Texas-San Antonio"]["State"] = "Texas"
    fteam["Nevada-Las Vegas"]["City"] = "Las Vegas"
    fteam["Nevada-Las Vegas"]["State"] = "Nevada"
    fteam["Florida International"]["City"] = "Miami"
    fteam["Florida International"]["State"] = "Florida"
            
    for k,v in fteam.iteritems():
        if v["City"] == None:
            print k

    return fteam
            
#    print sorted(states.keys())        
#    print abrvdata

#getAbrv('', 'Abvr.dat')
abrvdata = parseAbrv('Abrv.txt')
locdata = parseLocation('Location.csv', abrvdata)
json.dump(locdata, open("Teams.json", "w"))

yeardata = parseScores('2015.dat')
json.dump(yeardata, open("2015games.json", "w"))

writeAbrvFile('Teams.csv', locdata)
#writeFile('2015.csv', yeardata)
#writeConfFile('2015.conf', yeardata)
#writeGraphFile('2015.graph', yeardata)



In [None]:

##############################################################################
#
#
# parseScoringSummary()
#
#
##############################################################################
def parseScoringSummary(gameid, scoredata, team1data, team2data, debug):
    scores=scoredata.split("<span class=\"headline\">")
    scores=scoredata.split("<div class=\"table-row\">")
    scores=scoredata.split("<img class=\"team-logo\" ")
    scores=scoredata.split("<tr><td class=\"logo\"><img class=\"team-logo\" ")

    teams={}
    teams[int(team1data[1])] = tc.TeamConv(team1data[0])
    teams[int(team2data[1])] = tc.TeamConv(team2data[0])
    teams[team1data[3]] = tc.TeamConv(team1data[0])
    teams[team2data[3]] = tc.TeamConv(team2data[0])
    summary=[]
    
    awayteam=None
    hometeam=None
    quarter=None
    for s in range(len(scores)):
        score=scores[s]
        teamnum,logoaddr=tn.stripTeamNum(score)
        
        

        ## Quarter
        flag="<th id=\"quarter-1\" class=\"quarter\" colspan=\"2\">"
        pos1=score.find(flag)
        pos2=score.find("</th>", pos1+1)
        if pos1 != -1:
            tmpquarter=score[pos1+len(flag):pos2]
        if quarter == None:
            quarter = tmpquarter            
            

        ## Score type        
        flag="<div class=\"score-type\">"
        pos1=score.find(flag)
        pos2=score.find("</div>", pos1+1)
        scoretype=score[pos1+len(flag):pos2]

        ## Drive time
        flag="<div class=\"time-stamp\">"
        pos1=score.find(flag)
        pos2=score.find("</div>", pos1+1)
        timestamp=score[pos1+len(flag):pos2]

        ## Result
        flag="<div class=\"headline\">"
        pos1=score.find(flag)
        pos2=score.find("</div>", pos1+1)
        headline=score[pos1+len(flag):pos2]
        
        ## Require timestamp and result to continue
        if len(timestamp) == 0 and len(headline) == 0:
            continue
        #print '[',timestamp,'][',headline,']'

        ## Score
        flag="<td class=\"home-score\">"
        pos1=score.find(flag)
        pos2=score.find("</td>", pos1+1)
        homescore=int(score[pos1+len(flag):pos2])

        ## Score
        flag="<td class=\"away-score\">"
        pos1=score.find(flag)
        pos2=score.find("</td>", pos1+1)
        awayscore=int(score[pos1+len(flag):pos2])

        ## Details
        flag="class=\"drive-details\">"
        details=[-1, -1, -1, -1]
        if score.find(flag) != -1 and True:
            pos1=score.find(flag)
            pos2=score.find("</div>", pos1+1)
            detail=score[pos1+len(flag):pos2]            
            dvals=detail.split(",")
            try:
                plays=dvals[0].replace("plays","")
                plays=plays.replace("play","")
                yards=dvals[1].replace("yards","")
                yards=yards.replace("yard","")
            except:
                print "Problem with drive (plays,yards) details"
                print dvals
                f()
                
            if len(dvals) == 3:
                try:
                    ptime=dvals[2].strip()
                    minutes,seconds=ptime.split(":")            
                except:
                    print "Problem with drive (time) details"
                    print dvals
                    f()
            else:
                minutes=-1
                seconds=-1
                
            try:
                details=[int(plays), int(yards), int(minutes), int(seconds)]
            except:
                print "Problem with drive details: Making ints"
                print dvals
                f()
        
        if hometeam == None:
            flag="<th class=\"home-team\">"
            pos1=score.find(flag)
            pos2=score.find("</th>", pos1+1)
            hometeam=score[pos1+len(flag):pos2]
        if awayteam == None:
            flag="<th class=\"away-team\">"
            pos1=score.find(flag)
            pos2=score.find("</th>", pos1+1)
            awayteam=score[pos1+len(flag):pos2]
        
        

        qnum=quarter.title()
        if qnum.find("First") != -1:
            qnum=1
        elif qnum.find("Second") != -1:
            qnum=2
        elif qnum.find("Third") != -1:
            qnum=3
        elif qnum.find("Fourth") != -1:
            qnum=4
        elif qnum.find("Overtime") != -1:
            qnum=5
        else:
            print "Could not extract quarter from",qnum
            f()
        summary.append([teams[teamnum], scoretype, qnum, timestamp, awayscore, homescore, headline, details])
        show=False
        if show:
            print s,'\t',teams[teamnum],'\t',scoretype,'\t',qnum,'\t',timestamp, '\t',
            print headline,'\t',awayteam,awayscore,'-',homescore,hometeam,'\t',
            print details,'\t',
            print scores[s]
        #,'\t\t',scores[s]
            
        quarter = tmpquarter
        
    scores={}
    scores[tc.TeamConv(team1data[0])] = []
    scores[tc.TeamConv(team2data[0])] = []
    summary=[]
    for i in range(len(summary)):
        teamname = tc.TeamConv(summary[i][0])
        if scores.get(teamname) == None:
            print "Key error in scores:",teamname
            print scores.keys()
            f()
        ssum = summary[i][1:]
        ssum.insert(0, i)
        scores[teamname].append(ssum)

    #print scores
    return scores

In [None]:


def parseHistoricalScores(scorepath, teampath, histjsonfile):
    
    historicaldata={}
    years=range(1869, 2020)
    for year in years:
        yearid=str(year)
        key = yearid
        
        teamfile  = teampath + "/" + yearid + ".html"
        scorefile = scorepath + "/" + yearid + ".html"

        if os.path.exists(teamfile) and os.path.exists(scorefile):
            teamdata = open(teamfile).readlines()
            teamdata = [x.strip('\r\n') for x in teamdata]

            scoredata = open(scorefile).readlines()
            scoredata = [x.strip('\r\n') for x in scoredata]
        else:
            continue
        

        yearscores=[]
        yearteams={}
        
        conference=None
        for team in teamdata[1:]:
            if str.isalpha(team[0]) == True:
                continue
            if str.isalpha(team[1]) == True:
                conference=team.strip()
                continue
            if str.isalpha(team[2]) == True:
                teamname=tc.TeamConv(team.strip())
                yearteams[teamname] = conference
        
        for score in scoredata:
            vals = score.split()
            
            date = None
            team1 = None
            team2 = None
            score1 = None
            score2 = None
            
            
            ## Date
            date = vals[0]
            vals = vals[1:]

            ## Team1
            team1=[]
            j = 0
            while j < len(vals):
                try:
                    score1 = int(vals[j])
                    team1  = " ".join(team1)
                    vals   = vals[j+1:]
                    break
                except:
                    team1.append(vals[j])
                j += 1

            ## Team2
            team2=[]
            j = 0
            while j < len(vals):
                try:
                    score2 = int(vals[j])
                    team2  = " ".join(team2)
                    vals   = vals[j+1:]
                    break
                except:
                    team2.append(vals[j])
                j += 1
                
            comment = None
            if len(vals) > 0:
                comment = " ".join(vals)

            if score2 == None:
                lastval = vals[-1]
                testscore = lastval[-4:]
                try:
                    testscore = int(testscore)                    
                    team2     = lastval[:-4]
                    score2    = testscore
                    vals      = []
                except:
                    testscore = lastval[-3:]
                if not score2:
                    try:
                        testscore = int(testscore)
                        team2     = lastval[:-3]
                        score2    = testscore
                        vals      = []
                    except:
                        testscore = lastval[-2:]
                if not score2:
                    try:
                        testscore = int(testscore)
                        team2     = lastval[:-2]
                        score2    = testscore
                        vals      = []
                    except:
                        testscore = lastval[-1:]
                                        
                    
            team1 = tc.TeamConv(team1)
            team2 = tc.TeamConv(team2)
            
            sval={}
            sval['date']    = date
            sval['team1']   = team1
            try:
                sval['conf1']   = yearteams[team1]
            except:
                yearteams[team1] = "DivII"
                sval['conf1']   = yearteams[team1]
            sval['score1']  = score1
            sval['team2']   = team2
            try:
                sval['conf2']   = yearteams[team2]
            except:
                yearteams[team2] = "DivII"
                sval['conf2']   = yearteams[team2]
            sval['score2']  = score2
            sval['comment'] = comment
            yearscores.append(sval)


        historicaldata[key] = {}
        historicaldata[key]['teams']  = yearteams
        historicaldata[key]['scores'] = yearscores
        print '\t',key
        
    print '\tWrote',len(historicaldata),"scores to",histjsonfile
    json.dump(historicaldata, open(histjsonfile, "w"))

        

def getHistoricalTeams(basepath, force):
    years=range(1869, 2014)
    for year in years:
        yearid=str(year)
        url="http://wilson.engr.wisc.edu/rsfc/history/howell/cf"+yearid+"tms.txt"
        result = web.checkURL(url)
        if not result:
            continue
        
        if os.path.exists(basepath):
            savehtml=basepath+"/"+yearid+".html"
        else:
            print ' ---> No save path',basepath,'\t',yearid
            return False
        
        if os.path.exists(savehtml) and force == False:
            print ' ---> Save path exists, but force -> FALSE'
            return False
        
        result=web.getHTML(url, savehtml)
        if result:
            print ' --->',url,'  in  ',savehtml
        else:
            print " Did not download",url



def getHistoricalScores(basepath, force):
    years=range(1869, 2014)
    for year in years:
        yearid=str(year)
        url="http://wilson.engr.wisc.edu/rsfc/history/howell/cf"+yearid+"gms.txt"
        result = web.checkURL(url)
        if not result:
            continue
        
        if os.path.exists(basepath):
            savehtml=basepath+"/"+yearid+".html"
        else:
            print ' ---> No save path',basepath,'\t',yearid
            return False
        
        if os.path.exists(savehtml) and force == False:
            print ' ---> Save path exists, but force -> FALSE'
            return False
        
        result=web.getHTML(url, savehtml)
        if result:
            print ' --->',url,'  in  ',savehtml
        else:
            print " Did not download",url



def getScores(basepath, savehtml, yearid, force):
    url = 'http://www.jhowell.net/cf/scores/Sked2015.htm'
    result = web.checkURL(url)
    if result == False:
        print ' --->',url,' = ',result
        return result

    if os.path.exists(savehtml) and force == False:
        print ' ---> Save path exists, but force -> FALSE'
        return False
        
    result=web.getHTML(url, savehtml)
    if result:
        print ' --->',url,'  in  ',savehtml
    else:
        print " Did not download",url
        
    return True


def parseScores(txt):
    fdata = open(txt).readlines()
    fdata = [x.strip('\r\n') for x in fdata]

    yeardata={}

    i=0
    while i < len(fdata):
        line = fdata[i]
        tdata=[]
        if line.find("<table") != -1:
            while line.find("</table>") == -1:
                tdata.append(line)
                i += 1
                line = fdata[i]

            teamdata = parseTable(tdata)
            yeardata[teamdata['name']] = {}
            yeardata[teamdata['name']]['conf'] = teamdata['conf']
            yeardata[teamdata['name']]['games'] = teamdata['games']
        i += 1
    return yeardata



def parseTable(table):
    table = [x.replace("</tr>", "") for x in table]
    table = [x.replace("</td>", "") for x in table]

    teamdata={}
    teamdata["games"] = []
    
    table = table[1:]
    name,conf = getName(table[0])
    teamdata["name"] = name
    teamdata["conf"] = conf

    table = table[1:]

    for line in table:
        gamedata = getGame(line)
        if gamedata:
            teamdata["games"].append(gamedata)

    return teamdata
    f()

    
    
def getName(line):
    pos = line.find("<p align=\"center\">")
    if pos == -1:
        print "Could not parse:",line
        f()
    name = line[pos+18:]
    
    pos = name.rfind("(")
    conf = name[pos:]
    conf = conf.replace("(", "")
    conf = conf.replace(")", "")
    name = name[:pos-1]
    return name,conf


def getGame(line):
    line = line.replace("<td align=\"right\">", ":")
    line = line.replace("<td>", ":")
    line = line.replace("<tr>", "")
    linevals = line.split(":")
    site = None
    if len(linevals) == 8:
        try:
            dummy,date,day,home,opp,res,score,against = linevals
        except:
            print "SPLIT ERROR:",line
            f()
    elif len(linevals) == 9:
        try:
            dummy,date,day,home,opp,res,score,against,site = linevals
        except:
            print "SPLIT ERROR:",line
            f()
    elif len(linevals) == 10:
        try:
            dummy,date,day,home,opp,res,score,against,site,comment = linevals
        except:
            print "SPLIT ERROR:",line
            f()
    
    opp = opp.replace("*", "")
    if home == "vs.":
        home = 1
    elif home == "@":
        home = -1
    else:
        print "ERROR with Home:",home
        f()
    if site != None:
        home = 0

    if len(res) == 0:
        return None

    try:
        game={}
        game['date'] = date
        game['day'] = day
        game['home'] = home
        game['opponent'] = opp
        game['result'] = res
        game['score'] = int(score)
        game['against'] = int(against)
    except:
        print "DICT ERROR\t",date,day,home,opp,res,score,against
        f()
        

    return game
    
    


##########################################
#
# This is the output line to the csv file
#
##########################################
def writeLine(outfile, Date, Team1, Team2, Score1, Score2, Winner):
    lout=[]
    lout.append(Date)
    lout.append(Team1)
    lout.append(Team2)
    lout.append(Score1)
    lout.append(Score2)
    lout.append(Winner)
    lout=[str(x) for x in lout]
    outfile.write(",".join(lout))
    outfile.write("\n")
    


def writeFile(fname, yeardata):
    f=open(fname, "w")
    writeLine(f, "Date", "Team1", "Team2", "Score1", "Score2", "Winner")
    nline=0
    teams=sorted(yeardata.keys())
    for team in teams:
        games = yeardata[team]["games"]
        for game in games:
            if game['result'] == 'W':
                writeLine(f, game['date'], team, game['opponent'], game['score'], game['against'], team)
            elif game['result'] == 'L':
                writeLine(f, game['date'], team, game['opponent'], game['score'], game['against'], game['opponent'])
            else:
                writeLine(f, game['date'], team, game['opponent'], game['score'], game['against'], team)
            nline += 1
    print "\tWrote",nline,"lines to",fname
    f.close()

In [None]:


def Fix(name, size):
    newname=name
    while len(newname) < size:
        newname += " "
    return newname
    
def driveScore(result):
    res = result.title()
    if len(result) == 0:
        return 0
    if res == "Safety":
        return -2
    if res == "End Of Game Touchdown":
        return 6
    if res == "Touchdown" or res == "Touchdown Touchdown":
        return 7
    if res == "Fumble Touchdown" or res == "Fumble Return Touchdown" or res == "Punt Touchdown" or res == "Punt Return Touchdown" or res == "End Of Half Touchdown" or res == "Downs Touchdown":
        return -7
    if res == "Interception Touchdown":
        return -7
    if res == "Missed Fg Touchdown":
        return -7
    if res == "Field Goal":
        return 3
    noPoints=["Kickoff", "Interception", "Punt", "Missed Fg", "Missed Fg Touchdown", "Downs", "End Of Game", "Fumble", "End Of Half", "Possession (For Ot Drives)"]
    for nop in noPoints:
        #print res,nop
        if res == nop:
            return 0
    print '-> ['+result.title()+']'
    if any(word in noPoints for word in res):
        print "Something is here"
    f()
    return 0

def augmentScore(drives, d, team, augscore):
    drives[d]['runningscore'][team][1] += augscore
    return drives


def driveFixes(gameid, drives, teams):
    if gameid == "400756912": # Miami vs FAU
        print "Fixing drives for this game"
        drives[6]['team'] = teams['MIA']
        drives[6]['result'] = "Punt"
        print "\tDrive6 -> Punt"
        drives[7]['team'] = teams['FAU']
        drives[7]['result'] = "Fumble"        
        print "\tDrive7 -> Fumble"
        drives[8]['team'] = teams['MIA']
    if gameid == "400603869": # Texas A&M vs Ball St
        drives[20]['team'] = teams['BALL']
    if gameid == "400603936": # Texas A&M vs Ball St
        drives[9]['team'] = teams['TA&M']
    if gameid == "400756952":
        augmentScore(drives, 8, 1, 3)
    if gameid == "400787442":
#        augmentScore(drives, 2, 0, 3)
        augmentScore(drives, 2, 1, 7)
    if gameid == "400763533": # Iowa vs Wis
        drives[9]['team'] = teams['IOWA']
        drives[15]['team'] = teams['WIS']
    if gameid == "400763552": # Iowa vs MD
        drives[19]['team'] = teams['MD']
        drives[20]['team'] = teams['MD']
    if gameid == "400787296": # Iowa vs MD
        drives[9]['team'] = teams['SDSU']
    if gameid == "400763589":
        augmentScore(drives, 26, 0, 2)
    if gameid == "400763590":
        drives.pop(15)
    if gameid == "400763645":
        drives[30]['team'] = teams['MTSU']
        drives[30]['score'] = 7
        drives[32]['runningscore'] = drives[31]['runningscore']
        drives[32]['team'] = teams['MTSU']
        drives[32]['result'] = "Field Goal"
        drives[32]['score'] = 3
        drives[31] = drives[34]
        drives[31]['result'] = "Touchdown"
        drives[31]['team'] = teams['MRSH']
        drives[31]['score'] = 7
        drives[33]['runningscore'] = drives[32]['runningscore']
        drives[33]['result'] = "Missed Field Goal"
        drives[33]['team'] = teams['MRSH']
        drives[33]['score'] = 0
        drives.pop(34)
    return drives


    if gameid == "400757042":
        drives[30]['runningscore'][0][1] += 2
    if gameid == "400757061":
        drives[10]['runningscore'][0][1] += 7
        
    if gameid == "400756952":
        drives[8]['runningscore'][1][1] += 3
        drives[14]['runningscore'][0][1] += 7
        
    if gameid == "400756951":
        print "Fixing drives for this game"
        drives[19]['runningscore'][1][1] += 2
        print "\tDrive8 -> MIA ball"
    if gameid == "400756961":
        drives[21]['runningscore'][1][1] += 8
        
    if gameid == "400757064": augmentScore(drives, 7, 1, 7)
#    if gameid == "400763457": augmentScore(drives, 25, 0, 6)
#    if gameid == "400763470": augmentScore(drives, 15, 0, 7)
#    if gameid == "400763470": augmentScore(drives, 16, 1, 7)
#    if gameid == "400763472": augmentScore(drives, 19, 0, 6)
    return drives

def analyzeDriveSummary(gameid, drives, awayteam, hometeam):
    teams={}
    #print awayteam
    #print hometeam
    teams[int(awayteam[0])] = awayteam[1]
    teams[awayteam[1]] = int(awayteam[0])
    teams[int(hometeam[0])] = hometeam[1]
    teams[hometeam[1]] = int(hometeam[0])

    ## Some are just totally messed up and require hand coding:
    #print "Game ID -->",gameid
    drives=driveFixes(gameid, drives, teams)
    
    realDebug=False
    if realDebug:
        print '\t',Fix("Driving",10),'\t','# Running Score','\t\t\tResult'
        for i in range(len(drives)-1):
            drive=drives[i]
            drivingteam=teams[drive['team']]
        
            print i,'\t',Fix(drivingteam,10),'\t',drive['score'], drive['runningscore'],'\t',drive['summary'],'\t',drive['result']

    ## Check for an extra entry due to Punt Return
    extras=[]


    ## In case we need to remove a drive    
    for i in range(len(drives)-1):
        result=drives[i]['result']
        ryards=drives[i]['summary'][1]
        rscore=drives[i]['score']        
        rtime =60*drives[i]['summary'][2]+drives[i]['summary'][3]
        nextresult=drives[i+1]['result']
        nextryards=drives[i+1]['summary'][1]
        nextrscore=drives[i+1]['score']
        nextrtime =60*drives[i+1]['summary'][2]+drives[i+1]['summary'][3]
        
        if result.find("Touchdown") -1 and nextresult.find("Touchdown") != -1:
            if nextryards == 0 and rscore < 0 and nextrscore > 0 and nextrtime == 0:
                extras.append(i+1)
                print "====================================================="
                print "====================================================="
                print drives[i]
                print drives[i+1]," <--- Removing this guy."
                print "====================================================="
                print "====================================================="
    for extra in extras:
        drives.pop(extra)
        

    debugScore=False
    if debugScore: print '#\tPrev\tCurr\tDiff\tDrive\tNext\tDiffNext'
    for i in range(len(drives)-1):
        ## Offense score
        drivingteam=teams[drives[i]['team']]
        dscore=0
        teamid=-1
        
        if i == 0:
            prevscores=[ 0,0 ]
        else:
            prevscores=[ drives[i-1]['runningscore'][0][1], drives[i-1]['runningscore'][1][1] ]
        currscores=[ drives[i]['runningscore'][0][1], drives[i]['runningscore'][1][1] ]
        nextscores=[ drives[i+1]['runningscore'][0][1], drives[i+1]['runningscore'][1][1] ]
        diffscores=[0,0]
        diffscores[0]=currscores[0] - prevscores[0]
        diffscores[1]=currscores[1] - prevscores[1]
        diffnextscores=[0,0]
        diffnextscores[0]=nextscores[0] - currscores[0]
        diffnextscores[1]=nextscores[1] - currscores[1]
        
        if drivingteam == awayteam[1]:
            drivescore=[ drives[i]['score'],0 ]
        if drivingteam == hometeam[1]:
            drivescore=[ 0, drives[i]['score'] ]
        if debugScore: print i,'\t',prevscores,'\t',currscores,'\t',diffscores,'\t',drivescore,'\t',nextscores,'\t',diffnextscores,'\t',

        
        ## Away team scored
        if drivescore[0] > 0:
            teamid=0
            if diffscores[teamid] == 0:
                if debugScore: print "Away Score, but no update. Setting to",drives[i+1]['runningscore'][teamid][1],'from',drives[i]['runningscore'][teamid][1],
                drives[i]['runningscore'][teamid][1] = drives[i+1]['runningscore'][teamid][1]

        ## Home team gave up defensive points to away team
        if drivescore[1] < 0:
            teamid=0
            if diffscores[teamid] == 0:
                if debugScore: print "Away Score on defense, but no update. Setting to",drives[i+1]['runningscore'][teamid][1],"from",drives[i]['runningscore'][teamid][1]
                drives[i]['runningscore'][teamid][1] = drives[i+1]['runningscore'][teamid][1]

                
        ## Home team scored
        if drivescore[1] > 0:
            teamid=1
            if diffscores[teamid] == 0:
                if debugScore: print "Home Score, but no update. Setting to",drives[i+1]['runningscore'][teamid][1],'from',drives[i]['runningscore'][teamid][1],
                drives[i]['runningscore'][teamid][1] = drives[i+1]['runningscore'][teamid][1]

        ## Away team gave up defensive points to home team
        if drivescore[0] < 0:
            teamid=1
            if diffscores[teamid] == 0:
                if debugScore: print "Home Score on defense, but no update. Setting to",drives[i+1]['runningscore'][teamid][1],'from',drives[i]['runningscore'][teamid][1],
                drives[i]['runningscore'][teamid][1] = drives[i+1]['runningscore'][teamid][1]

        ## Strange thing that happens in OT
        if diffnextscores[0] != 0 and diffnextscores[1] != 0:
            if i == len(drives) - 2:
                if debugScore: print "Two scoring changes. Setting to",drives[i]['runningscore'],'from',drives[i+1]['runningscore']
                drives[i+1]['runningscore'] = drives[i]['runningscore']
        if debugScore: print ''


    #for i in range(len(drives)):
    #    print i,'\t',drives[i]['runningscore']

    ## Check for higher scores incorrectly given
    for t in range(2):
        i=0
        for i in range(len(drives)-1):
            currscore = drives[i]['runningscore'][t][1]
            #print i,"\tCurrent Score:",currscore
            j = i+1
            fix=False
            while j < len(drives) - 1:
                nextscore = drives[j]['runningscore'][t][1]
                if nextscore < currscore:
                    #print '\t',j,'\tNext Score:',nextscore
                    k=j-1
                    #print "\tFixing [",i,k,"] entries."
                    fix=True
                    break
                j += 1
            if fix:
                j=i
                while j <= k:
                    print '  Fixing Score on drive',j,'\t',
                    print '  ',drives[j]['runningscore'][t][1],' -> ',nextscore
                    drives[j]['runningscore'][t][1] = nextscore
                    j += 1
                
                
            #print i,'\t',drives[i]['runningscore'][t]

    nerr=0
    scores={}
    scores[awayteam[1]]=0
    scores[hometeam[1]]=0
    showResult=False
    
    if showResult: print i,'\t',Fix("Driving",10),'\t','# Running Score','\t\t','Team Score','\tResult'
    for i in range(len(drives)):
        drive=drives[i]
        drivingteam=teams[drive['team']]
        scoreval=drive['score']
        if drivingteam == awayteam[1]:
            newscore = drive['runningscore'][0][1]
            oteamscore = drive['runningscore'][1][1]
            oteam = hometeam[1]
        else:
            newscore = drive['runningscore'][1][1]
            oteamscore = drive['runningscore'][0][1]
            oteam = awayteam[1]
           

        ## Special for negative plays
        if scoreval < 0:
            if showResult: print "Turnover ->",drivingteam,' -> '
            if drivingteam == awayteam[1]:
                if showResult: print "\t home team scores",drive['runningscore'][1][1],'  ',scores[hometeam[1]]
                if drive['runningscore'][1][1] != scores[hometeam[1]]:
                    drivingteam = hometeam[1]
                newscore = drive['runningscore'][1][1]
                #oteamscore = drive['runningscore'][0][1]
                #oteam = awayteam[1]
            else:
                if showResult: print "\t home team scores",drive['runningscore'][0][1],'  ',scores[awayteam[1]]
                if drive['runningscore'][0][1] != scores[awayteam[1]]:
                    drivingteam = awayteam[1]
                newscore = drive['runningscore'][0][1]
                #oteamscore = drive['runningscore'][1][1]
                #oteam = hometeam[1]
            if showResult: print drivingteam,"Should be the one that got points."
            scoreval *= -1
            

        err=False
        diff=0
        if newscore != scores[drivingteam] + scoreval:
            err=True
            diff = scores[drivingteam] + scoreval - newscore
            result=drive['result']
            comment = ""
            ## Missed extra point or two-point conversion
            if diff < -10 and result == "Downs":
                scoreval = 0
                comment = "End of Game"
            if scoreval == 7 and diff == 1:
                scoreval = 6
                comment = "Missed Extra Point"
            if scoreval == 7 and diff == -1:
                scoreval = 8
                comment = "Two Point Conversion"
            if scoreval == 0 and diff == -3 and result == "End of Half":
                scoreval = 3
                comment = "Field Goal"
            if scoreval == 0 and diff == -3 and result == "Fumble":
                scoreval = 3
                comment = "Field Goal"
            if scoreval == 0 and diff == -3 and result == "Downs":
                scoreval = 3
                comment = "Field Goal"
            if scoreval == 0 and diff == -7 and result == "End of Half":
                scoreval = 7
                comment = "Touchdown"
            if scoreval == 0 and diff == -7 and result == "End of Game":
                scoreval = 7
                comment = "Touchdown"

            ## Check if we need to also fix the other teams score
            if scores[oteam] != oteamscore:
                oteamdiff = oteamscore - scores[oteam]
                if oteamdiff == 2:
                    comment += " Defensive PAT"
                    scores[oteam] = oteamscore
                else:
                    print "Something happened to",oteam,"so that they got",oteamdiff,"points."
                    
        
            ## Check if this fixes things
            if newscore == scores[drivingteam] + scoreval:
                err=False
                drives[i]['score'] = 6
                drives[i]['edit'] = 1
                drives[i]['result'] += " "+comment
            else:
                nerr += 1                
            
            scores[drivingteam] = scores[drivingteam] + scoreval
        else:
            err=False
            scores[drivingteam] = scores[drivingteam] + scoreval
        
        if showResult:
            print i,'\t',Fix(drivingteam,10),'\t',drive['score'], drive['runningscore'],'\t',scores[drivingteam],'\t',drive['summary'],'\t',drive['result'],
            if err:
                print '\t\t',err,'\t(',diff,')'
            else:
                print ''

    if nerr > 3:
        print "There were",nerr,"remaining errors in the drive summary"
        f()
    return drives
    

def parseDriveSummary(drive, debug):
    vals=drive.split("<span")
    res={}
    res['summary'] = None
    res['team'] = None
    res['result'] = None
    res['score'] = None
    res['runningscore']=[]
    
    for val in vals:        
        ## Headline
        hline="class=\"headline\">"
        if val.find(hline) != -1:
            #print val
            pos=val.find(hline)
            dresult=val[pos+17:-7]
            res['result'] = dresult
            res['score'] = driveScore(dresult)

            
        ## Get Score Info
        tname="class=\"team-name\">"
        if val.find(tname) != -1:
            pos1=val.find(tname)
            if pos1 == -1:
                print "Could not find current team name when parsing..."
            pos2=val.find("</span>", pos1+1)
            teamname=val[pos1+len(tname):pos2]
            
        ## Get Score Info
        tscore="class=\"team-score\">"
        if val.find(tscore) != -1:
            pos1=val.find(tscore)
            if pos1 == -1:
                print "Could not find current team score when parsing..."
            pos2=val.find("</span>", pos1+1)
            teamscore=int(val[pos1+len(tscore):pos2])
            res['runningscore'].append([teamname, teamscore])


        ## Details
        details="class=\"drive-details\">"
        if val.find(details) != -1:
            pos1=val.find(details)
            pos2=val.find("</span>")
            detail=val[pos1+len(details):pos2]
            dvals=detail.split(",")
            try:
                plays=dvals[0].replace("plays","")
                plays=plays.replace("play","")
                yards=dvals[1].replace("yards","")
                yards=yards.replace("yard","")
            except:
                print "Problem with drive (plays,yards) details"
                print dvals
                f()
                
            if len(dvals) == 3:
                try:
                    ptime=dvals[2].strip()
                    m,s=ptime.split(":")            
                except:
                    print "Problem with drive (time) details"
                    print dvals
                    f()
            else:
                m=0
                s=0                
                
            try:
                details=[int(plays), int(yards), int(m), int(s)]
            except:
                print "Problem with drive details: Making ints"
                print dvals
                f()
            res['summary'] = details

            
        ## Game title
        logo="class=\"home-logo\">"
        if val.find(logo) != -1:
            pos2=val.find(".png")
            pos1=val.find("/", pos2-5)
            if pos1 == -1 or pos2 == -1:
                continue
                print "Problem parsing logo in drive summary:",val
                f()
            teamnum = val[pos1+1:pos2]
            try:
                teamnum=int(teamnum)
            except:
                print "Problem parsing team number:",teamnum
                print "Original value:",val[pos1+1:pos2]
                print pos1
                print pos2
                f()
            res['team'] = teamnum

    for k,v in res.iteritems():
        if v == None:
            continue
            print "Could not parse drive"
            print 'drive -->',drive
            print '==============================='
            print res
            f()

    #analyzeDriveSummary(res)

    if res['team'] == None:
        return None
        
    
    return res


def updateTime(ptime, newval):
    minval=ptime[0]
    secval=ptime[1]

    minval += newval[0]
    secval += newval[1]
    if secval > 60:
        minval += 1
        secval -= 60
    return [minval,secval]

def getDown(down):
    if down == "1st":
        down=1
    elif down == "2nd":
        down=2
    elif down == "3rd":
        down=3
    elif down == "4th":
        down=4
    else:
        print "Unknown down",down
        f()
    return down

def parsePlays(gameid, teams, allplays, dsum):
    plays=[]
    currstate=None
    quarter=1

    showPlay=False
    try:
        teamnum,addr=tn.stripTeamNum(allplays[0])
    except:
        print "Problem with getting driving team!",allplays[0]
        return None,plays
        print dsum
        f()
    drivingteam = teams[teams[teamnum]]
    teamname = teams[teamnum]
    for p in range(len(allplays)-2):
        play = allplays[p]
        if allplays[p] == "<span class=\"post-play\">" and allplays[p+2] == "</span>":
            playresult = allplays[p+1]
            if playresult.find("NO PLAY") != -1:
                continue
            if showPlay: print '--->',playresult
            if currstate:
                currplay = [currstate, playresult]
                plays.append(currplay)
                currstate = None
            else:
                print "No information about the drive state!"
                f()

        if (allplays[p] == "<li class=\"end-quarter\">" or allplays[p] == "<li class=\"half-time\">") and allplays[p+2] == "<p>":
            driveinfo=[0,0,0]
            if showPlay: print '--->',driveinfo
            currstate=driveinfo


        if (allplays[p] == "<li class=\"\">" or allplays[p] == "<li class=\"video\">") and allplays[p+2] == "<p>":
            drivestate = allplays[p+1]
            drivestate = drivestate.replace("<h3>", "")
            drivestate = drivestate.replace("</h3>", "")
            if drivestate == "":
                drivestate = "Kickoff"
            vals=drivestate.split()
            driveinfo=[]
            if len(vals) == 1:
                driveinfo=[0,0,0]
            elif len(vals) == 6:
                down=getDown(vals[0])
                if vals[2] == "Goal":
                    togo="Goal"
                else:
                    togo=int(vals[2])
                if vals[4] == drivingteam:
                    dist=100 - int(vals[5])
                else:
                    dist=int(vals[5])
                driveinfo=[down,togo,dist]
            elif len(vals) == 5:
                if vals[0] == "and":
                    driveinfo=[0,"2 PT",int(vals[4])]
                elif vals[4] == "50":
                    down=getDown(vals[0])
                    if vals[2] == "Goal":
                        togo="Goal"
                    else:
                        togo=int(vals[2])
                    dist=50
                    driveinfo=[down,togo,dist]
                else:
                    print "Can not parse drive state!",drivestate
                    f()                        
            else:
                print "Can not parse drive state!",drivestate
                f()
            if showPlay: print '--->',drivestate,'\t\t',driveinfo
            currstate=driveinfo
                
    return teamname,plays

def parsePlayByPlay(gameid, pbp, team1data, team2data, debug):
    j=0
    ndrives=0
    drives=[]
    driveplays=[]
    awayteam=[team1data[1], team1data[3]]
    hometeam=[team2data[1], team2data[3]]
        
    teams={}
    teams[int(team1data[1])] = tc.TeamConv(team1data[0])
    teams[int(team2data[1])] = tc.TeamConv(team2data[0])
    teams[team1data[3]] = tc.TeamConv(team1data[0])
    teams[team2data[3]] = tc.TeamConv(team2data[0])
    teams[tc.TeamConv(team1data[0])] = team1data[3]
    teams[tc.TeamConv(team2data[0])] = team2data[3]
    while j < len(pbp):
        line = pbp[j]
        
               
        ## Game title
        drive="<span class=\"drive-details\">"
        dsum = None
        if line.find(drive) != -1 and False:
            continue
        
            dsum = parseDriveSummary(line, debug)
            if debug:
                print "Drive[",ndrives,'] -->',dsum
            if dsum == None:
                j += 1
                continue
            drives.append(dsum)
            ndrives += 1
            
        flag="<ul class=\"drive-list\">"
        if line.find(flag) != -1:
            k=j+1
            try:
                while pbp[k].find("</ul>") == -1:
                    k += 1
            except:
                k=len(pbp)
            teamname,plays=parsePlays(gameid, teams, pbp[j:k], dsum)
            if teamname == None or len(plays) == 0:
                j += 1
                continue
            plays.insert(0, teamname)
            driveplays.append(plays)
            if debug: print len(plays),'\t',plays                

        j += 1

#    if len(drives) != len(driveplays):
#        print "Found",len(drives),"drive summaries and",len(driveplays),"drive plays."
#        f()
        
    #drives = analyzeDriveSummary(gameid, drives, awayteam, hometeam)

    if len(driveplays) == 0:
        print "No drives found for this game."
        return None
        

    summary={}
    summary[tc.TeamConv(team1data[0])] = []
    summary[tc.TeamConv(team2data[0])] = []
    
    for i in range(len(driveplays)):
        team=driveplays[i][0]
        if summary.get(team) == None:
            print "Error paring drive plays for",team
            print summary.keys()
            f()
        plays = driveplays[i][1:]
        plays.insert(0, i)
        
        summary[team].append(plays)
        
    return summary

In [None]:

##############################################################################
#
#
# parseMatchup()
#
#
##############################################################################

def getAttr(mdata, key, i):
    j=i
    attrs=[]
    while j < len(mdata):
        #print j,'\t',mdata[j]
        if j > i + 20:
            f()
        value=None
        if mdata[j].find("</tr>") != -1:
            break
        if mdata[j].find("td>") == -1:
            value=mdata[j].strip()
            #print '--->',j,value
            attrs.append(value)            
        j += 1

    if len(attrs) == 4:
        attrs.pop(0)
    name=attrs[0]
    val1=attrs[1]
    val2=attrs[2]
    itype=int    
    if val1.find("-") != -1 and val2.find("-") != -1: itype = str
    if val1.find(".") != -1 and val2.find(".") != -1: itype = float
    if val1.find(":") != -1 and val2.find(":") != -1: itype = complex
    
    if itype == int:
        val1=int(val1)
        val2=int(val2)
    if itype == float:
        val1=float(val1)
        val2=float(val2)
    if itype == str:
        vals=val1.split('-')
        if len(vals[0]) == 0:
            val1 = int(val1)
        else:
            vals=[int(x) for x in val1.split("-")]
            val1 = [vals[0], vals[1]]
            
        vals=val2.split('-')
        if len(vals[0]) == 0:
            val2 = int(val2)
        else:
            vals=[int(x) for x in val2.split("-")]
            val2 = [vals[0], vals[1]]
    if itype == complex:
        vals=[int(x) for x in val1.split(":")]
        val1 = [vals[0], vals[1]]
        vals=[int(x) for x in val2.split(":")]
        val2 = [vals[0], vals[1]]
        
    #print 'attrs->',attrs
    #print val1,val2    
    retval=[name, val1, val2]    
    
    return retval


def parseMatchup(mdata, team1data, team2data, debug):
    teams={}
    teams[int(team1data[1])] = tc.TeamConv(team1data[0])
    teams[int(team2data[1])] = tc.TeamConv(team2data[0])
    teams[team1data[3]] = tc.TeamConv(team1data[0])
    teams[team2data[3]] = tc.TeamConv(team2data[0])
    summary=[]
    #print len(mdata)

    teamorder=[None,None]
    keys=["firstDowns", "thirdDownEff", "fourthDownEff", 
          "totalYards", "netPassingYards",
          "yardsPerPass", "completionAttempts", "interceptions",
          "rushingYards", "rushingAttempts", "yardsPerRushAttempt",
          "totalPenaltiesYards", "turnovers", "fumblesLost",
          "interceptions", "possessionTime"]
    values={}
    for key in keys:
        values[key] = None



    for i in range(len(mdata)):
        line=mdata[i]
        if line == "<th>Matchup</th>":
            j=i+1
            while j < len(mdata):
                if mdata[j].find("<img src=") != -1:
                    teamnum,addr=tn.stripTeamNum(mdata[j])
                    if teamorder[0] == None:
                        teamorder[0] = teams[teamnum]
                        j+=1
                        continue
                    if teamorder[1] == None:
                        teamorder[1] = teams[teamnum]
                        break
                j +=1 

        for key in keys:
            if line.find("data-stat-attr=\""+key+"\"") != -1:
                attr=getAttr(mdata, key, i)
                values[key] = attr
                #print key,'\t',attr
                break

    if teamorder[0] == None or teamorder[1] == None:
        return None

    retvals={}
    retvals[teamorder[0]] = {}
    retvals[teamorder[1]] = {}
    for k,v in values.iteritems():
        if v == None:
            retvals[teamorder[0]][k] = None
            retvals[teamorder[1]][k] = None
        else:
            retvals[teamorder[0]][v[0]] = v[1]
            retvals[teamorder[1]][v[0]] = v[2]
    return retvals

In [None]:


    
##############################################################################
#
#
# parseTeamData()
#
#
##############################################################################    
def parseTeamData(line, teams, showData):
    pos=line.find("<thead><tr>")
    if pos == -1:
        print "Problem parseing team data",line
        f()
        
    ## Get name
    info=line[:pos]
    info=info.replace("</caption>", "")
    ipos=info.rfind(">")
    info=info[ipos+1:]
    dataname=info
    teamname=None
    for team,tconv in teams.iteritems():
        if dataname.find(team) != -1:
            teamname = tconv
            break
    
    if teamname == None:
        print "Could not find team in",dataname
        print "Expected:",teams
        f()
    #print 'Data ->',dataname
    
    ## Get info
    line=line[pos:]
    line=line.replace("<thead>", "")
    #print 'Full line --->',line
    #print ''
    
    lines=line.split("<tr")
    if len(lines[0]) == 0:
        lines.pop(0)
        
    for l in range(len(lines)):
        if lines[l][0] == '>':
            lines[l] = lines[l][1:]
        lines[l] = lines[l].strip()
        #print '\t',l,'\t',lines[l]
    #print ''
        
    keys=[]
    headers=lines[0].split("<th")
    headers=[x.replace("class=\"", "") for x in headers]
    headers=[x.replace("</th>", "") for x in headers]
    headers=[x.strip() for x in headers]
    headers=[x for x in headers if x != ""]
    for h in range(len(headers)):
        pos=headers[h].find("\">")
        if pos != -1:
            headers[h] = headers[h][:pos]
        keys.append(headers[h])
        #print '\t',h,'\t',headers[h]
    #print ''

    values={}
    if showData: print 'keys --->',keys
    lines.pop(0)
    if showData: print lines
    if showData: print ''
    teamvals={}
    noInfo=False
    
    for lne in lines:
        lvals=lne.split("</td")

        vals=[]        
        for k in range(len(keys)):
            if lvals[k].find("class=\""+keys[k]+"\"") == -1:
                if lvals[k].find("No") == -1:
                    print "Problem parsing box score"
                    print keys[k]
                    print keys
                    print lvals
                    f()
                else:
                    noInfo=True
                    break
            
            lvals[k] = lvals[k].replace("</a>", "")
            lvals[k] = lvals[k].strip()
            pos = lvals[k].rfind(">")
            val = lvals[k][pos+1:]
            vals.append(val)
            #print keys[k],'\t',val

        if not noInfo:         
            teamvals[vals[0]] = vals[1:]
            if showData: print '\t',vals[0], teamvals[vals[0]]


    if noInfo:
        teamvals["None"] = []
        for i in range(len(keys)-1): teamvals["None"].append('0')
            

    keys=keys[1:]        
    for name in teamvals.keys():
        val = teamvals[name]
        data={}
        for i in range(len(keys)):
            if val[i].find(".") != -1: data[keys[i]] = float(val[i])
            elif val[i].find("/") != -1: data[keys[i]] = [int(x) for x in val[i].split("/")]
            else: 
                try:
                    data[keys[i]] = int(val[i])
                except:
                    data[keys[i]] = str(val[i])
        teamvals[name] = data
        
    try:
        field = dataname.replace(teamname, "").strip()
    except:
        print "Problem parsing data field name",dataname
        f()
        

    if showData: print teamname,'\t',field,'\t',teamvals
    return teamname,field,teamvals


    
    
##############################################################################
#
#
# parseBoxScore()
#
#
##############################################################################
def parseBoxScore(bdata, team1data, team2data, debug):
    teams={}
    teams[int(team1data[1])] = tc.TeamConv(team1data[0])
    teams[int(team2data[1])] = tc.TeamConv(team2data[0])
    teams[team1data[3]] = tc.TeamConv(team1data[0])
    teams[team2data[3]] = tc.TeamConv(team2data[0])

    values={}
    values[tc.TeamConv(team1data[0])] = {}
    values[tc.TeamConv(team2data[0])] = {}
    
    tconvs={}
    for team in values.keys():
        tconvs[team] = team
        invconv = tc.InvTeamConv(team)
        if invconv == team:
            continue
        tconvs[invconv] = team
        
    for team in values.keys():
        if team.find("State") != -1:
            tstate = team.replace("State", "").strip()
            if tconvs.get(tstate) == None:
                tconvs[tstate]= team
    
    showData=debug
    
    for i in range(len(bdata)):
        line=bdata[i]
        if line.find("class=\"boxscore-tabs game-package-box-score ") != -1:
            vals=line.split("<div class=\"col column-")
            for val in vals:
                if val.find(".png") != -1:
                    teamnum,addr = tn.stripTeamNum(val)
                else:
                    continue
#                try:
#                    print val
#                    teamnum,addr = tn.stripTeamNum(val)
#                except:
#                    print val
#                    continue
                if showData: print ''
                if showData: print teamnum
                teamname,field,teamdata=parseTeamData(val, tconvs, showData)
                if values.get(teamname) == None:
                    print "Did not recognize team",teamname
                    print "Should be",values.keys()
                    f()
                values[teamname][field] = teamdata
                if showData: print teamname,'\t',field,'\t',teamdata
                if showData: print ''

    if showData: print values
        
    return values

In [None]:
# This Python file uses the following encoding: utf-8
import argparse

import os, sys
import glob
import requests
from collections import Counter
import json
import datetime

import parsePlayByPlay
import parseScoringSummary
import parseMatchup
import parseBoxScore
import parseTeam
import parseScores
import parseSFref

import TeamConv as tc
import teamNum as tn

reload(sys)
sys.setdefaultencoding('utf8')


def f(): raise Exception("Found exit()")
    
def Fix(name, size):
    newname=name
    while len(newname) < size:
        newname += " "
    return newname



def TestTime(time1, time2):
    #print time1 - time2
    oneday=datetime.timedelta(days=1)
    if time1 == time2 or time1 == time2 + oneday or time1 == time2 - oneday:
        return True
    return False

def getdTimeYear(date, year):
    m,d = date.split('/')
    if int(m) < 10:
        dateyear="0"+m
    else:
        dateyear=m
    if int(d) < 10:
        dateyear+="/0"+d
    else:
        dateyear="/"+d
    dateyear = date + "/"+str(year)
    try:
        tformat = "%m/%d/%Y"
        gametime   = datetime.datetime.strptime(dateyear, tformat)
    except:
        print "Could not convert date:",dateyear
        f()
    return gametime

def getdTime(date):
    try:
        tformat = "%B %d, %Y"
        gametime   = datetime.datetime.strptime(date, tformat)
    except:
        print "Could not convert date:",date
        f()
    return gametime


    




##############################################################################
#
#
# parseTitle()
#
#
##############################################################################
def parseTitle(game, debug):
    print game
    vals=game.split(" - ")
    if len(vals) != 4:
        print "Can not parse title:",game
        f()

    try:
        team1,team2=vals[0].split(" vs. ")
        date=vals[2]
    except:
        print "Problem with parsing title:",vals
        f()

    return team1,team2,date






##############################################################################
#
#
# parseLogo()
#
#
##############################################################################
def parseLogo(logo, debug):
    team1=None
    team2=None
    teamnum=-1
    if debug: print "=============== logo ==============="
    for div in logo.split("<div"):
        if debug: print "\t->  ",div
        
        if teamnum == -1:
            teamnum,logoaddr=tn.getTeamNum(div)
        if teamnum != -1:
            if debug: print '--->',logoaddr,'<---'
        
        teamname="class=\"team-name\""
        if div.find(teamname) != -1:
            pos1=div.find("<span class=\"long-name\">")
            pos2=div.find("</span>", pos1+1)
            team=div[pos1+24:pos2]
            if debug: print '---> Team Long:  ',team,'<---'
            
            abrvtxt="<span class=\"abbrev\" title=\""+team+"\">"
            pos1=div.find(abrvtxt)
            pos2=div.find("</span>", pos1+1)
            abrv=div[pos1+len(abrvtxt):pos2]
            if debug: print '---> Team Short: ',abrv,'<---'
            

            if team1 == None:
                team1=[team, teamnum, logoaddr, abrv]
            else:
                team2=[team, teamnum, logoaddr, abrv]
            teamnum=-1

    if debug: print team1
    if debug: print team2

    if team1 == None or team2 == None:
        print "Could not find teams:",team1,team2
        f()

    return team1, team2
    f()






##############################################################################
#
#
# compareGames()
#
#
##############################################################################
def compareGames(fbsgames, fbsscores, teamnums):
    ks=fbsscores.keys()
    for k in ks:
        v = fbsscores[k]
        team1=k
        print "=====",Fix(k,25),"====="


        test=False
        if test:
            vals=["Buffalo"]
            for val in vals:
                for k2,v2 in fbsgames.iteritems():                
                    teams=v2.keys()
                    for team in teams:
                        if team.find(val) != -1:
                            print k2," vs ".join(teams)
            f()
                        #exit()

        testgames={}
        for k2,v2 in fbsgames.iteritems():                
            teams=v2.keys()
            for team in teams:
                if team1 == tc.TeamConv(team):
                    testgames[k2]=v2
                    break


        ngames = len(testgames)
        games=v['games']
        if ngames < 1:
            print ""
            print ""
            print "=====",k,"=====",
            print "  ---> FBS Games:",ngames
            f()
        

        for i in range(len(games)):
            game=games[i]
            fbsscore = game

            dtime = getdTimeYear(game['date'], 2015)
            team2 = game['opponent']

            gamekey = None
            fbsgame = None
            for key,v2 in testgames.iteritems():
                side = v2.keys()[0]
                dtime2 = getdTime(v2[side]['date'])
                if TestTime(dtime, dtime2):
                    gamekey = key
                    if fbsgames.get(gamekey):
                        fbsgame = fbsgames[gamekey]
                    else:
                        print "Error in key!",gamekey
                        f()
                    break
                
            if gamekey == None or fbsgame == None:
                print "  --->",team1,'vs.',team2,' \t on',game['date'],'\t',
                print "Could not find fbs game key."
                for key,v2 in testgames.iteritems():
                    sides = v2.keys()
                    dtime2 = getdTime(v2[sides[0]]['date'])
                    print key,'\t',sides[0],sides[1],dtime2
                f()

            fbsscores[k]['games'][i]['teamdata'] = fbsgame
            print '\t',Fix(game['date'], 5),'\t',Fix(team2,25),'\t',gamekey

    return fbsscores
    


def testStats(fullgames):
    for k,v in fullgames.iteritems():
        print ''        
        print "=============================================================="
        team=k
        games=v['games']
        for game in games:
            date=game['date']
            opp = game['opponent']
            score=game['score']
            oppscore=game['against']
            print '\t',date,'\t',score,'\t',oppscore,'\t --> ',team,' vs.',opp,
            details=game['play-by-play']
            try:
                teamdetails=details[team]
                oppdetails=details[opp]
            except:
                print "Error with key in fullgames{} in testStats()"
                print opp,details
                f()

            if teamdetails['summary']['score'] != score or oppdetails['summary']['score'] != oppscore:
                print "ERROR"
                print '\t\t',teamdetails['summary']['score'],'\t',oppdetails['summary']['score']
                print teamdetails
                print oppdetails
                f()
            else:
                print ""
        print ''        
        print "=============================================================="
        
    f()



def parseGame(gameid, gamehtml, fbsteams, debug, checkformissing):
    fdata = open(gamehtml).readlines()
    fdata = [x.strip('\r\n') for x in fdata]
    fdata = [x.strip('\t') for x in fdata]
    
    matchuphtml=gamehtml.replace("Plays", "Matchup")
    if matchuphtml == gamehtml:
        print "Problem parsing matchup html",gamehtml
        f()
    mdata=[]
    if os.path.exists(matchuphtml):
        mdata = open(matchuphtml).readlines()
        mdata = [x.strip('\r\n') for x in mdata]
        mdata = [x.strip('\t') for x in mdata]
    else:
        return "NoScore", None
    
    boxscorehtml=matchuphtml.replace("Matchup", "BoxScore")
    if boxscorehtml == matchuphtml:
        print "Problem parsing box score html",matchuphtml
        f()
    bdata=[]
    if os.path.exists(boxscorehtml):
        bdata = open(boxscorehtml).readlines()
        bdata = [x.strip('\r\n') for x in bdata]
        bdata = [x.strip('\t') for x in bdata]
    else:
        return "NoScore", None
    i=0
    
    gdata={}
    gdata["title"] = None
    gdata["logo"] = None
    gdata["plays"] = []
    while i < len(fdata):
        line=fdata[i]
        
        ## Game title
        gametitle="<meta name=\"title\" content=\""
        if line.find(gametitle) != -1 and gdata["title"] == None:
            gdata["title"] = line[len(gametitle):-3]

            

        ## Team logo
        logo="<img class=\"team-logo\""
        if line.find(logo) != -1 and gdata["logo"] == None:
            gdata["logo"] = line



        ## Game play-by-play
        pbp="<div id=\"gamepackage-play-by-play\" data-module=\"playbyplay\">"
        if line.find(pbp) != -1:
            while line.find("<div id=\"gamepackage-scoring-wrap\"") == -1:
                gdata["plays"].append(line)
                i += 1
                try:
                    line=fdata[i]
                except:
                    break



        ## Scoring summary
        scoresum="<div class=\"scoring-summary\">"
        if line.find(scoresum) != -1:
            gdata["scores"] = line

        i += 1

    if checkformissing:
        if gdata.get('scores') == None:
            return "NoScore",None
        else:            
            return "Good",None


    team1A,team2A,date=parseTitle(gdata["title"], debug)    
    if fbsteams.get(tc.TeamConv(team1A)) == None and fbsteams.get(tc.TeamConv(team2A)) == None:
        if debug: print "No FBS team here. Moving on."
        return "NoFBSTeam",None
    
    #print '--->',date,'<---'
    gametime = getdTime(date)
    if gametime > datetime.datetime.today():
        if debug: print "Game has not happened yet"
        return "NotPlayedYet",None


    team1Data,team2Data=parseLogo(gdata["logo"], debug)
    if team1Data[0] != team1A or team2Data[0] != team2A:
        print "Problem with teams"
        print team1Data[0],'<-->',team1A
        print team2Data[0],'<-->',team2A
        f()
        
    #print "---> Plays Info",len(gdata["plays"]),"<---"
    drives = parsePlayByPlay.parsePlayByPlay(gameid, gdata["plays"], team1Data, team2Data, debug)

    scores=[]
    if gdata.get('scores'):
        scores=parseScoringSummary.parseScoringSummary(gameid, gdata['scores'], team1Data, team2Data, debug)
    else:
        scores=None
    
    ##  Check for matchup information
    matchup=parseMatchup.parseMatchup(mdata, team1Data, team2Data, debug)
    #print matchup
    

    ##  Check for boxscore information
    boxscore=parseBoxScore.parseBoxScore(bdata, team1Data, team2Data, debug)
    #print boxscore

    gamedetails = {}
    team1Data[0] = tc.TeamConv(team1Data[0])
    team2Data[0] = tc.TeamConv(team2Data[0])
    gamedetails[team1Data[0]] = {}
    gamedetails[team2Data[0]] = {}
    print '\t'," vs. ".join(gamedetails.keys())

    gamedetails[team1Data[0]]['logo'] = team1Data[1:]
    gamedetails[team1Data[0]]["date"] = date
    gamedetails[team2Data[0]]['logo'] = team2Data[1:]
    gamedetails[team2Data[0]]["date"] = date
    
    gamedetails[team1Data[0]]['drives'] = None
    gamedetails[team1Data[0]]['scores'] = None
    gamedetails[team1Data[0]]["teamstats"] = None
    gamedetails[team1Data[0]]["indivstats"] = None
    
    gamedetails[team2Data[0]]['drives'] = None
    gamedetails[team2Data[0]]['scores'] = None
    gamedetails[team2Data[0]]["teamstats"] = None
    gamedetails[team2Data[0]]["indivstats"] = None
    
    
    if matchup != None:       
        if matchup.get(team1Data[0]) == None:
            print "Could not find",team1Data[0],"in matchup",matchup.keys()
            f()
        if matchup.get(team2Data[0]) == None:
            print "Could not find",team2Data[0],"in matchup",matchup.keys()
            f()
        gamedetails[team1Data[0]]["teamstats"] = matchup[team1Data[0]]
        gamedetails[team2Data[0]]["teamstats"] = matchup[team2Data[0]]
        
    if boxscore != None:
        if boxscore.get(team1Data[0]) == None:
            print "Could not find",team1Data[0],"in matchup",boxscore.keys()
            f()
        if boxscore.get(team2Data[0]) == None:
            print "Could not find",team2Data[0],"in matchup",boxscore.keys()
            f()
        gamedetails[team1Data[0]]["indivstats"] = boxscore[team1Data[0]]
        gamedetails[team2Data[0]]["indivstats"] = boxscore[team2Data[0]]

    if scores:
        if scores.get(team1Data[0]) == None:
            print "Could not find",team1Data[0],"in score",scores.keys()
            f()
        if scores.get(team2Data[0]) == None:
            print "Could not find",team2Data[0],"in score",scores.keys()
            f()
        gamedetails[team1Data[0]]['scores'] = scores[team1Data[0]]
        gamedetails[team2Data[0]]['scores'] = scores[team2Data[0]]
    
    if drives:
        if drives.get(team1Data[0]) == None:
            print "Could not find",team1Data[0],"in drives",drives.keys()
            f()
        if drives.get(team2Data[0]) == None:
            print "Could not find",team2Data[0],"in drives",drives.keys()
            f()
        gamedetails[team1Data[0]]['drives'] = drives[team1Data[0]]
        gamedetails[team2Data[0]]['drives'] = drives[team2Data[0]]
        


    return "Good",gamedetails



############################################################
##
##  Main()
##
############################################################
def main(args):
    yearid="2015"
    webbase="http://scores.espn.go.com"
    
    basepath     = os.path.abspath("/Users/tgadfort/Dropbox/Football")
    gamebase     = os.path.abspath("/Users/tgadfort/Dropbox/Football/Games")
    teambase     = os.path.abspath("/Users/tgadfort/Dropbox/Football/Games/Teams")
    schedulebase = os.path.abspath("/Users/tgadfort/Dropbox/Football/Games/Teams/Schedule")
    spteampath   = os.path.abspath("/Users/tgadfort/Dropbox/Football/Games/Teams/SFref")
    spyearpath   = os.path.abspath("/Users/tgadfort/Dropbox/Football/Games/Teams/SFref/Historical")
    scorebase    = os.path.abspath("/Users/tgadfort/Dropbox/Football/Scores")
    gamesdir     = os.path.relpath("Games/Plays")
    datadir      = os.path.abspath("/Users/tgadfort/Dropbox/Football/Data")
    sprefdir     = os.path.abspath("/Users/tgadfort/Dropbox/Football/SPref")


    historicalscorebase = scorebase
    historicalteambase  = os.path.abspath("/Users/tgadfort/Dropbox/Football/Games/Teams/Historical")
    historicaljsonfile  = os.path.abspath(datadir + "/" + "HistoricalTeamScores.json")
    
    
    fbsgamesfile   = os.path.abspath(datadir + "/" + yearid+"Data.json")
    teamdbfile     = os.path.abspath(datadir + "/" + "TeamDB.json")
    

    scorehtmlfile  = os.path.abspath(scorebase + "/" + yearid + ".html")
    scorejsonfile  = os.path.abspath(datadir + "/" + yearid + "Scores.json")
    scorecsvfile   = os.path.abspath(datadir + "/" + yearid + "Scores.csv")
    

    fullgamefile   = os.path.abspath(datadir + "/" + "Merged" + yearid + ".json")
    teamdb=json.load(open(teamdbfile))
    fbsteams=teamdb['names']
    

    sprefhtml             = os.path.abspath(sprefdir + "/" + "index.html")
    sprefteamsjsonfile    = os.path.abspath(datadir + "/" + "SPrefTeams.json")
    sprefgamelogsjsonfile = os.path.abspath(datadir + "/" + "SPrefGameLogs.json")
    sprefteamyearjsonfile = os.path.abspath(datadir + "/" + "SPrefTeamYear.json")


    if args.spref:
        parseSFref.getSFref(sprefhtml, args.force)
        parseSFref.parseSFref(sprefhtml, sprefteamsjsonfile)
        parseSFref.getSFrefTeams(spteampath, sprefteamsjsonfile, args.force)
        parseSFref.parseSFrefTeams(spteampath, sprefteamsjsonfile, args.force)
        parseSFref.getSFrefTeamYears(spyearpath, sprefteamsjsonfile, args.force)
        parseSFref.parseSFrefTeamYears(spyearpath, sprefteamyearjsonfile, args.force)
        parseSFref.parseSFrefTeamGameLog(spyearpath, sprefgamelogsjsonfile, args.force)
        return

    #################################
    # If we need to get historical scores
    #################################
    if args.gethistoricalscores:
#        parseScores.getHistoricalScores(historicalscorebase, args.force)
#        parseScores.getHistoricalTeams(historicalteambase, args.force)
        parseScores.parseHistoricalScores(historicalscorebase, 
                                          historicalteambase,
                                          historicaljsonfile)
        return

    #################################
    # If we need to merge scores and data
    #################################
    if args.merge:
        fbsgames  = json.load(open(fbsgamesfile))
        fbsscores = json.load(open(scorejsonfile))
        fullgames = compareGames(fbsgames, fbsscores, teamdb)
        print "Writing",len(fullgames),"full games to",fullgamefile
        json.dump(fullgames, open(fullgamefile, "w"))
        return



    #################################
    # If we need to download scores
    #################################
    if args.getscores:
        scoreid = yearid
        parseScores.getScores(scorebase, scorehtmlfile, scoreid, args.force)
        scores = parseScores.parseScores(scorehtmlfile)
        if scores:
            parseScores.writeFile(scorecsvfile, scores)
            json.dump(scores, open(scorejsonfile, "w"))
            print "\tWrote",len(scores),"lines to",scorejsonfile
        return



    #################################
    # If we need to redownload teams
    #################################
    if args.getgame:
        gameid = args.getgame[0]
        parseTeam.getGame(gameid, gamebase, test=False, force=True)
        return



    #################################
    # If we need to redownload teams
    #################################
    if args.getteams:
        for k in teamdb['nums'].keys():
            result = parseTeam.getTeam(k, teambase, test=False, force=args.force)
            parseTeam.getHistoricalSchedule(schedulebase, k, args.force)
        return



    #################################
    # If we need to redownload games
    #################################
    if args.getgames:
        for k in teamdb['nums'].keys():
            result = parseTeam.getTeamHistoricalGames(schedulebase, k)
            for gameid, gamenum in result.iteritems():
                gameresult = parseTeam.getGame(gameid, gamebase, test=False, force=args.force)
            continue
            
            result = parseTeam.getTeamGames(gamebase, webbase, k)
            for gameid, gamehtml in result.iteritems():
                result = parseTeam.getGame(gameid, gamehtml, gamebase, test=False, force=args.force)

        return



    debug=False
    games=glob.glob(gamesdir+"/*.html")
    gamedb={}
    fbsgames={}
    missing=[]
    for g in range(len(games)):
        print "==================="
        print "Game",g+1,'/',len(games),'\t',
        game=games[g]
        key = os.path.basename(game)
        key = key.split(".")[0]
        print key,'  ',
        gamestatus,gameresults = parseGame(key, game, fbsteams, debug, args.missing)
        if args.missing:
            if gamestatus != "Good":
                missing.append([gamestatus, key])
            continue
        if gamestatus != "Good":
            print "\t-->",gamestatus
            missing.append([gamestatus, key])
            continue
        if gameresults == None:
            print "\t-->",gamestatus
            continue
    
        #print gameresults
        if gamedb.get(key):
            print "Already parsed this game [",key,"]!"
            continue
        fbsgames[key] = gameresults
        #print gameresults
        teams=gameresults.keys()
        if fbsteams.get(tc.TeamConv(teams[0])) and fbsteams.get(tc.TeamConv(teams[1])):
            gamedb[key] = gameresults
        else:   
            print "Not keeping game for later downloads by gameDB because one team is not FBS"
            
    print "Done parsing",len(games),"games."
    print "FBS Games:",len(fbsgames)
    print "Writing",len(fbsgames),"games to",fbsgamesfile
    json.dump(fbsgames, open(fbsgamesfile, "w"))



    

    #################################
    # If we need to get missing games
    #################################
    print '----- Missing -----'
    for game in missing:        
        print '\t--->',game
    print '-------------------'
    
    if args.missing:
        for game in missing:
            gamestatus = game[0]
            gameid = game[1]
            if gamestatus == "NoScore":
                result = parseTeam.getGame(gameid, gamebase, test=False, force=args.force)
        return
    
    
 

##################################################################
##
## main
##
##################################################################
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    #parser.add_argument('-get-games', dest='getgames', nargs=1, help='Base directory name.')
    parser.add_argument('-spref', action="store_true", dest='spref', help='Get SFrefence data.')
    parser.add_argument('-merge', action="store_true", dest='merge', help='Merge scores and game data.')
    parser.add_argument('-get-game', nargs=1, dest='getgame', help='Get missing games.')
    parser.add_argument('-get-games', action="store_true", dest='getgames', default=False, help='Get missing games.')
    parser.add_argument('-get-teams', action="store_true", dest='getteams', default=False, help='Get missing games.')
    parser.add_argument('-missing', action="store_true", dest='missing', default=False, help='Look for missing games.')
    parser.add_argument('-force', action="store_true", dest='force', default=False, help='Force downloads.')
    parser.add_argument('-get-scores', action="store_true", dest='getscores', default=False, help='Get only scores.')
    parser.add_argument('-get-historical-scores', action="store_true", dest='gethistoricalscores', default=False, help='Get historical scores.')
    args = parser.parse_args()

    main(args)
    