To run this notebook, please visit https://colab.research.google.com/drive/1uT1AB6gl5Q08ba3uYiobQZHTJtjFnXoM

In [None]:
!pip install aiohttp async_timeout



In [None]:
!gdown --id 1nuSfGzNECJAuma3DRjZ0ddqjSZ5Efa9g
!gdown --id 1FoeAfnlrhFqKSGPESBaw0xjBv_lYhjqK

Downloading...
From: https://drive.google.com/uc?id=1nuSfGzNECJAuma3DRjZ0ddqjSZ5Efa9g
To: /content/teamsgames.pkl
100% 1.50M/1.50M [00:00<00:00, 99.5MB/s]
Downloading...
From: https://drive.google.com/uc?id=1FoeAfnlrhFqKSGPESBaw0xjBv_lYhjqK
To: /content/prev_and_new_rtgs.csv
100% 940k/940k [00:00<00:00, 61.3MB/s]


In [None]:
import asyncio, aiohttp, async_timeout, json, datetime as dt, itertools, re, copy, pickle, math, scipy
from bs4 import BeautifulSoup, SoupStrainer
from time import strptime
from statistics import mean
from scipy.stats.mstats import gmean
 
 
async def dictyieldvalues(dictionary):
    for i in dictionary.values():
        yield i
 
async def gather_with_concurrency(n, *tasks):
    semaphore = asyncio.Semaphore(n)
    async def sem_task(task):
        async with semaphore:
            return await task
    return await asyncio.gather(*(sem_task(task) for task in tasks))
 
 
class Team:
    def __init__(self, teamid, fullname, isd1=False, conference=None):
        self.teamid = teamid
        self.fullname = fullname
        self.isd1 = isd1
        self.conference = conference
        self.games = set()
    
    def winslosses(self):
        sortedlist = [[],[]]
        for game in self.games:
            if game.winner() is self:
                sortedlist[0].append(game)
            elif game.loser() is self:
                sortedlist[1].append(game)
        return sortedlist
    
    def opponent(self, game):
        if game.hometeam is self:
            return game.awayteam
        elif game.awayteam is self:
            return game.hometeam
        else:
            print(f'{self.fullname} did not play in game {game.gameid}!')
    
    def D1winslosses(self):
        winslosses = self.winslosses()
        return [[win for win in winslosses[0] if self.opponent(win).isd1], [loss for loss in winslosses[1] if self.opponent(loss).isd1]]
    
    def record(self):
        winslosses = self.winslosses()
        return f'{len(winslosses[0])}-{len(winslosses[1])}'
    
    def D1record(self):
        D1winslosses = self.D1winslosses()
        return f'{len(D1winslosses[0])}-{len(D1winslosses[1])}'
    
    def winpct(self):
        winslosses = self.winslosses()
        wins = winslosses[0]
        losses = winslosses[1]
        if not wins and not losses:
            return 0.
        else:
            return len(wins)/(len(wins)+len(losses))
    
    def D1winpct(self):
        D1winslosses = self.D1winslosses()
        D1wins = D1winslosses[0]
        D1losses = D1winslosses[1]
        if not D1wins and not D1losses:
            return 0.
        else:
            return len(D1wins)/(len(D1wins)+len(D1losses))
    
    def D1weightedwinpct(self):
        D1winslosses = self.D1winslosses()
        D1wins = D1winslosses[0]
        D1losses = D1winslosses[1]
        D1homewins = []
        D1awaywins = []
        D1homelosses = []
        D1awaylosses = []
        D1neutralwins = []
        D1neutrallosses = []
        for game in D1wins:
            if game.neutralsite:
                D1neutralwins.append(game)
            elif game.hometeam is self:
                D1homewins.append(game)
            elif game.awayteam is self:
                D1awaywins.append(game)
        for game in D1losses:
            if game.neutralsite:
                D1neutrallosses.append(game)
            elif game.hometeam is self:
                D1homelosses.append(game)
            elif game.awayteam is self:
                D1awaylosses.append(game)
        D1homewins = [game for game in D1wins if game.hometeam is self]
        D1roadwins = [game for game in D1wins if game.awayteam is self]
        D1homelosses = [game for game in D1losses if game.hometeam is self]
        weightedwins = 1.4*len(D1awaywins)+len(D1neutralwins)+0.6*len(D1homewins)
        weightedlosses = 1.4*len(D1homelosses)+len(D1neutrallosses)+0.6*len(D1awaylosses)
        if not D1wins and not D1losses:
            return 0.
        else:
            return weightedwins/(weightedwins+weightedlosses)
    
    def D1winpctwithoutopponent(self, opponent):
        D1winslosses = self.D1winslosses()
        D1winswithoutopponent = [win for win in D1winslosses[0] if self.opponent(win) is not opponent]
        D1losseswithoutopponent = [loss for loss in D1winslosses[1] if self.opponent(loss) is not opponent]
        if not D1winswithoutopponent and not D1losseswithoutopponent:
            return 0.
        else:
            return len(D1winswithoutopponent)/(len(D1winswithoutopponent)+len(D1losseswithoutopponent))
    
    def opponents(self):
        return {self.opponent(game) for game in self.games}
    
    def D1opponents(self):
        return {self.opponent(game) for game in self.games if self.opponent(game).isd1}
    
    def OWP(self):
        return mean([opponent.D1winpctwithoutopponent(self) for opponent in self.D1opponents()])
    
    def OOWP(self):
        return mean([opponent.OWP() for opponent in self.D1opponents()])
    
    def RPI(self):
        return 0.25*self.D1winpct()+0.5*self.OWP()+0.25*self.OOWP()
    
    def weightedRPI(self):
        return 0.25*self.D1weightedwinpct()+0.5*self.OWP()+0.25*self.OOWP()
    
    def offensiverating(self):
        l = []
        for game in self.games:
            rat = game.offensiveratings()
            if rat:
                l.append(rat[self.teamid])
        return mean(l)
    
    def defensiverating(self):
        l = []
        for game in self.games:
            rat = game.offensiveratings()
            if rat:
                l.append(rat[self.opponent(game).teamid])
        return mean(l)
 
 
class Game:
    def __init__(self, gameid, time=None, hometeam=None, homescore=0, awayteam=None, awayscore=0, ots=0, neutralsite=False, boxscore=None):
        self.gameid = gameid
        self.time = time
        self.hometeam = hometeam
        self.homescore = homescore
        self.awayteam = awayteam
        self.awayscore = awayscore
        self.neutralsite = neutralsite
        self.ots = ots
        if boxscore:
            self.boxscore = boxscore
        else:
            self.boxscore = dict()
    
    def winner(self):
        if self.homescore > self.awayscore:
            return self.hometeam
        elif self.awayscore > self.homescore:
            return self.awayteam
    
    def loser(self):
        if self.homescore > self.awayscore:
            return self.awayteam
        elif self.awayscore > self.homescore:
            return self.hometeam
    
    def pointtotal(self):
        return self.homescore+self.awayscore
    
    def pointdifferential(self):
        return abs(self.homescore-self.awayscore)
    
    def possessions(self):
        if self.boxscore:
            hometeaminfo = self.boxscore[self.hometeam.teamid]
            awayteaminfo = self.boxscore[self.awayteam.teamid]
            homefg, homefga = hometeaminfo['fg']
            homefta = hometeaminfo['ft'][1]
            homeorb = hometeaminfo['oreb']
            homedrb = hometeaminfo['dreb']
            hometov = hometeaminfo['to']
            awayfg, awayfga = awayteaminfo['fg']
            awayfta = awayteaminfo['ft'][1]
            awayorb = awayteaminfo['oreb']
            awaydrb = awayteaminfo['dreb']
            awaytov = awayteaminfo['to']
            return totalpossessions(homefga, homefta, homeorb, homedrb, homefg, hometov, awayfga, awayfta, awayorb, awaydrb, awayfg, awaytov)
    
    def offensiveratings(self):
        poss = self.possessions()
        ratings = dict()
        ratings[self.hometeam.teamid] = 100*self.homescore/poss
        ratings[self.awayteam.teamid] = 100*self.awayscore/poss
        return ratings
 
 
def totalpossessions(teamfga, teamfta, teamorb, teamdrb, teamfg, teamtov, oppfga, oppfta, opporb, oppdrb, oppfg, opptov):
    return 0.5*((teamfga+0.4*teamfta-1.07*(teamorb/(teamorb+oppdrb))*(teamfga-teamfg)+teamtov)+(oppfga+0.4*oppfta-1.07*(opporb/(opporb+teamdrb))*(oppfga-oppfg)+opptov))
 
def savedata(teams, games, savefile):
    saveteams = [[team.teamid, team.fullname, team.isd1, team.conference] for team in teams.values()]
    savegames = [[game.gameid, game.time, game.hometeam.teamid, game.homescore, game.awayteam.teamid, game.awayscore, game.ots, game.neutralsite, game.boxscore] for game in games.values()]
    pickle.dump([saveteams, savegames], open(savefile,'wb'))
 
def recalldata(savefile):
    teamsgames = pickle.load(open(savefile,'rb'))
    recallteams = {team[0] : Team(team[0], team[1], isd1=team[2], conference=team[3]) for team in teamsgames[0]}
    recallgames = {game[0] : Game(game[0], time=game[1], hometeam=recallteams[game[2]], homescore=game[3], awayteam=recallteams[game[4]], awayscore=game[5], ots=game[6], neutralsite=game[7], boxscore=game[8]) for game in teamsgames[1]}
    for game in recallgames.values():
        game.hometeam.games.add(game)
        game.awayteam.games.add(game)
    return recallteams, recallgames
 
 
async def getD1teams(session, who='women'):
    teams = dict()
    async with session.get(f'https://www.espn.com/{who}s-college-basketball/teams') as resp:
        html = await resp.text()
        teamsoup = BeautifulSoup(html, 'lxml', parse_only=SoupStrainer(text=lambda string: string.startswith("window['__espnfitt__']")))
    columns = json.loads(teamsoup.text[23:-1])['page']['content']['leagueTeams']['columns']
    for column in columns:
        groups = column['groups']
        for group in groups:
            conference = group['nm']
            teamsjson = group['tms']
            for teamjson in teamsjson:
                teamid = int(teamjson['id'])
                fullname = teamjson['n']
                team = Team(teamid, fullname, isd1=True, conference=conference)
                teams[teamid] = team
    return teams
 
async def getD1teamschedulejson(teamid, session, who='women'):
    print(f'getting team {teamid}')
    schedulejson = None
    while not schedulejson:
        async with session.get(f'https://www.espn.com/{who}s-college-basketball/team/schedule/_/id/{teamid}/season/2020') as resp:
            try:
                with async_timeout.timeout(10.0):
                    html = await asyncio.wait_for(resp.text(), timeout=10.0)
            except aiohttp.client_exceptions.ClientPayloadError:
                print(f'still waiting for team {teamid}, restarting')
                continue
            try:
                schedulesoup = BeautifulSoup(html, 'lxml', parse_only=SoupStrainer(text=lambda string: string.startswith("window['__espnfitt__']")))
                schedulejson = json.loads(schedulesoup.text[23:-1])['page']['content']['scheduleData']
            except json.decoder.JSONDecodeError:
                print(f'problem with team {teamid}')
                pass
    print(f'done getting team {teamid}')
    return teamid, schedulejson
 
def processschedulejson(teams, team, schedulejson):
    games = dict()
    gamesjson = list(itertools.chain.from_iterable([schedule['events']['post'] for schedule in schedulejson['teamSchedule']]))
    for gamejson in gamesjson:
        gameid = int(gamejson['time']['link'].split('=')[1])
        datetime = dt.datetime.strptime(gamejson['date']['date'], '%Y-%m-%dT%H:%MZ').replace(tzinfo=dt.timezone.utc)
        neutralsite = gamejson['opponent']['neutralSite']
        game = Game(gameid, time=datetime, neutralsite=neutralsite)
        games[gameid] = game
    return games
 
def processboxscore(div):
    trs = list(itertools.chain.from_iterable(tbody.find_all('tr') for tbody in div.find_all('tbody')))
    try:
        boxscore = emptybox.copy()
        teamtr = [tr for tr in trs if tr.find('td', text='TEAM')][0]
        for td in teamtr.find_all('td'):
            c = td.get('class')[0]
            if c in boxscore:
                boxscore[c] = datamaps[c](td.text)
        return boxscore
    except ValueError:
        boxscore = emptybox.copy()
        for tr in [tr for tr in trs if not tr.get('class') or 'highlight' not in tr.get('class')]:
            for td in tr.find_all('td'):
                c = td.get('class')[0]
                if c in boxscore:
                    prev = boxscore[c]
                    boxscore[c] = dataadds[c](prev, datamaps[c](td.text))
    return boxscore
 
async def getgameinfo(gameid, session, who='women'):
    print(f'getting game {gameid}')
    gameinfo = dict()
    while not gameinfo:
        async with session.get(f'https://www.espn.com/{who}s-college-basketball/boxscore?gameId={gameid}') as resp:
            try:
                with async_timeout.timeout(10.0):
                    html = await asyncio.wait_for(resp.text(), timeout=10.0)
            except asyncio.exceptions.TimeoutError:
                print(f'still waiting for game {gameid}, restarting')
                continue
            for i in re.findall('espn.gamepackage.(?:home|away)TeamId = \"\d+\"', html):
                if 'home' in i:
                    gameinfo['hometeamid'] = int(i.split('"')[1])
                if 'away' in i:
                    gameinfo['awayteamid'] = int(i.split('"')[1])
            soup = BeautifulSoup(html, 'lxml')
            teamsdiv = soup.find('div', {'class': 'competitors'})
            try:
                teams = teamsdiv.find_all('div', {'class': 'team'}, recursive=False)
            except:
                print(f'problem with {gameid}, trying again')
                gameinfo = dict()
                continue
            for team in teams:
                nameparts = {span.get('class')[0]: span.text for span in team.find_all('span') if span.get('class')[0] in ['long-name', 'short-name']}
                if 'home' in team.get('class'):
                    gameinfo['hometeamname'] = ' '.join([nameparts['long-name'], nameparts['short-name']])
                if 'away' in team.get('class'):
                    gameinfo['awayteamname'] = ' '.join([nameparts['long-name'], nameparts['short-name']])
            status = teamsdiv.find('span', {'class': 'game-time status-detail'}).text
            if 'OT' in status:
                num = status.split('/')[1].split('OT')[0]
                if not num:
                    gameinfo['ots'] = 1
                else:
                    gameinfo['ots'] = int(num)
            else:
                gameinfo['ots'] = 0
            boxscores = soup.find('div', id='gamepackage-boxscore-module').div.find_all('div', recursive=False)
            for div in boxscores:
                if 'gamepackage-home-wrap' in div.get('class'):
                    try:
                        gameinfo['homebox'] = processboxscore(div)
                    except:
                        print(f'problem with {gameid}')
                        return gameid, None
                if 'gamepackage-away-wrap' in div.get('class'):
                    try:
                        gameinfo['awaybox'] = processboxscore(div)
                    except:
                        print(f'problem with {gameid}')
                        return gameid, None
    print(f'done getting game {gameid}')
    return gameid, gameinfo
 
datamaps = {'fg': lambda i: [int(n) for n in i.split('-')],
            '3pt': lambda i: [int(n) for n in i.split('-')],
            'ft': lambda i: [int(n) for n in i.split('-')],
            'oreb': int,
            'dreb': int,
            'reb': int,
            'ast': int,
            'stl': int,
            'blk': int,
            'to': int,
            'pf': int,
            'pts': int
           }
 
dataadds = {'fg': lambda i, j: [i[0]+j[0], i[1]+j[1]],
            '3pt': lambda i, j: [i[0]+j[0], i[1]+j[1]],
            'ft': lambda i, j: [i[0]+j[0], i[1]+j[1]],
            'oreb': lambda i, j: i+j,
            'dreb': lambda i, j: i+j,
            'reb': lambda i, j: i+j,
            'ast': lambda i, j: i+j,
            'stl': lambda i, j: i+j,
            'blk': lambda i, j: i+j,
            'to': lambda i, j: i+j,
            'pf': lambda i, j: i+j,
            'pts': lambda i, j: i+j
           }
 
emptybox = {'fg': [0, 0],
            '3pt': [0, 0],
            'ft': [0, 0],
            'oreb': 0,
            'dreb': 0,
            'reb': 0,
            'ast': 0,
            'stl': 0,
            'blk': 0,
            'to': 0,
            'pf': 0,
            'pts': 0
           }
 
async def getD1teamsgames(who='women', savefile=None):
    async with aiohttp.ClientSession() as session:
        teams = await getD1teams(session, who=who)
        schedulejsons = await gather_with_concurrency(16, *[getD1teamschedulejson(team.teamid, session, who=who) async for team in dictyieldvalues(teams)])
    games = dict()
    for teamid, schedulejson in schedulejsons:
        games.update(processschedulejson(teams, teams[teamid], schedulejson))
    async with aiohttp.ClientSession() as session:
        gameinfos = await gather_with_concurrency(16, *[getgameinfo(game.gameid, session, who=who) async for game in dictyieldvalues(games)])
    for gameid, gameinfo in gameinfos:
        if gameinfo:
            game = games[gameid]
            hometeamid = gameinfo['hometeamid']
            hometeam = teams.get(hometeamid)
            if not hometeam:
                hometeam = Team(hometeamid, fullname=gameinfo['hometeamname'])
                teams[hometeamid] = hometeam
            awayteamid = gameinfo['awayteamid']
            awayteam = teams.get(awayteamid)
            if not awayteam:
                awayteam = Team(awayteamid, fullname=gameinfo['awayteamname'])
                teams[awayteamid] = awayteam
            game.hometeam = hometeam
            game.awayteam = awayteam
            homebox = gameinfo['homebox']
            awaybox = gameinfo['awaybox']
            game.homescore = homebox.pop('pts')
            game.awayscore = awaybox.pop('pts')
            game.boxscore = {hometeamid: gameinfo['homebox'], awayteamid: gameinfo['awaybox']}
            hometeam.games.add(game)
            awayteam.games.add(game)
        else:
            del games[gameid]
    for teamid, team in list(teams.items()):
        if not team.games:
            del teams[teamid]
    if savefile:
        savedata(teams, games, savefile)
    return teams, games
 
#KRACH rankings
def cleanexistingties(teamsin, gamesin):
    teams = {team.teamid: Team(team.teamid, team.fullname, isd1=team.isd1, conference=team.conference) for team in teamsin.values()}
    games = {game.gameid: Game(game.gameid, time=game.time, hometeam=teams[game.hometeam.teamid], homescore=game.homescore, awayteam=teams[game.awayteam.teamid], awayscore=game.awayscore, ots=game.ots, boxscore=game.boxscore) for game in gamesin.values() if game.winner()}
    for game in list(games.values()):
        game.hometeam.games.add(game)
        game.awayteam.games.add(game)
    for teamid, team in list(teams.items()):
        if not team.games:
            del teams[teamid]
    return teams, games
 
def cleannonD1(teamsin, gamesin):
    teams = {team.teamid: Team(team.teamid, team.fullname, isd1=team.isd1, conference=team.conference) for team in teamsin.values() if team.isd1}
    games = {game.gameid: Game(game.gameid, time=game.time, hometeam=teams[game.hometeam.teamid], homescore=game.homescore, awayteam=teams[game.awayteam.teamid], awayscore=game.awayscore, ots=game.ots, boxscore=game.boxscore) for game in gamesin.values() if game.winner() and game.winner().isd1 and game.loser().isd1}
    for game in list(games.values()):
        game.hometeam.games.add(game)
        game.awayteam.games.add(game)
    return teams, games
 
def cleanbeforetime(time, teamsin, gamesin):
    teams = {team.teamid: Team(team.teamid, team.fullname, isd1=team.isd1, conference=team.conference) for team in teamsin.values()}
    games = {game.gameid: Game(game.gameid, time=game.time, hometeam=teams[game.hometeam.teamid], homescore=game.homescore, awayteam=teams[game.awayteam.teamid], awayscore=game.awayscore, ots=game.ots, boxscore=game.boxscore) for game in gamesin.values() if game.time and game.time >= time}
    for game in list(games.values()):
        game.hometeam.games.add(game)
        game.awayteam.games.add(game)
    return teams, games
 
def cleanaftertime(time, teamsin, gamesin):
    teams = {team.teamid: Team(team.teamid, team.fullname, isd1=team.isd1, conference=team.conference) for team in teamsin.values()}
    games = {game.gameid: Game(game.gameid, time=game.time, hometeam=teams[game.hometeam.teamid], homescore=game.homescore, awayteam=teams[game.awayteam.teamid], awayscore=game.awayscore, ots=game.ots, boxscore=game.boxscore) for game in gamesin.values() if game.time and game.time < time}
    for game in list(games.values()):
        game.hometeam.games.add(game)
        game.awayteam.games.add(game)
    return teams, games
 
def numplayed(team1, team2):
    return len(team1.games.intersection(team2.games))
 
def sos(krachratings, team):
    return gmean([krachratings[team.opponent(game).teamid] for game in team.games if team.opponent(game).isd1])
 
def oocsos(krachratings, team):
    return gmean([krachratings[team.opponent(game).teamid] for game in team.games if team.opponent(game).isd1 and team.opponent(game).conference != team.conference])
 
'''def sos(krachratings, team):
    return sum(krachratings[opponent.teamid]*numplayed(team, opponent)/(krachratings[opponent.teamid]+krachratings[team.teamid]) for opponent in team.D1opponents())/sum(numplayed(team, opponent)/(krachratings[opponent.teamid]+krachratings[team.teamid]) for opponent in team.D1opponents())'''
 
def conferencestrength(krachratings, teams, conference):
    return gmean([krachratings[team.teamid] for team in teams.values() if team.conference == conference])
 
def victorypoints(team, alpha):
    D1winslosses = team.D1winslosses()
    return sum([1/(1+math.exp(-game.pointdifferential()/(alpha*game.pointtotal()))) for game in D1winslosses[0]]+[1/(1+math.exp(game.pointdifferential()/(alpha*game.pointtotal()))) for game in D1winslosses[1]])
 
def rrwp(krachratings, team):
    teamrating = krachratings[team.teamid]
    return sum([teamrating/(teamrating+krachratings[opponentid]) for opponentid in krachratings if opponentid != team.teamid])/(len(krachratings)-1)
 
def rrwpkrach(krachratings, rating):
    return sum([rating/(rating+opprating) for opprating in krachratings.values()])/len(krachratings)
 
def krachadj(krachratings):
    return scipy.optimize.fsolve(lambda rating: rrwpkrach(krachratings, rating)-0.5, 100)[0]
 
def calckrachratings(teamsin, gamesin, vpalpha=5, goaldelta=1e-10, time=None, sincetime=None, savefile=None, calcteamsos=True):
    teams, games = cleanexistingties(teamsin, gamesin)
    if time:
        teams, games = cleanaftertime(time, teams, games)
    if sincetime:
        teams, games = cleanbeforetime(sincetime, teams, games)
    D1teams = {team.teamid: team for team in teams.values() if team.isd1}
    
    krachratings = {teamid: 100. for teamid, team in D1teams.items() if team.isd1}
    
    iterations = 0
    alphaadj = vpalpha/mean([game.pointtotal() for game in games.values() if game.hometeam.isd1 and game.awayteam.isd1])
    victorypoints_dict = dict()
    D1opponents_dict = dict()
    numplayed_dict = dict()
    for team in D1teams.values():
            victorypoints_dict[team] = victorypoints(team, alphaadj)
            D1opponents = team.D1opponents()
            D1opponents_dict[team] = D1opponents
            for opponent in D1opponents:
                numplayed_dict[frozenset({team, opponent})] = numplayed(team, opponent)
    while True:
        print(f'Iteration {iterations+1}')
        newkrachratings = dict(krachratings)
        delta = 0.
        
        for team in D1teams.values():
            newkrachratings[team.teamid] = victorypoints_dict[team]/sum(numplayed_dict[frozenset({team, opponent})]/(krachratings[team.teamid]+krachratings[opponent.teamid]) for opponent in D1opponents_dict[team])
            delta += abs(newkrachratings[team.teamid]-krachratings[team.teamid])
        
        krachratings = dict(newkrachratings)
        
        print(delta)
        if delta < goaldelta*gmean(list(krachratings.values())):
            adj = krachadj(krachratings)
            krachratings = {k: v*100/adj for k, v in krachratings.items()}
            break
        
        iterations += 1
    if calcteamsos:
        teamsos = {team.teamid: sos(krachratings, team) for team in D1teams.values()}
    if savefile:
        if calcteamsos:
            pickle.dump([krachratings, teamsos], open(savefile,'wb'))
        else:
            pickle.dump(krachratings, open(savefile,'wb'))
    if calcteamsos:
        return krachratings, teamsos
    else:
        return krachratings

In [None]:
teams, games = recalldata('teamsgames.pkl')
teams, games = cleannonD1(teams, games)
ortgs = {teamid: team.offensiverating() for teamid, team in teams.items()}
drtgs = {teamid: team.defensiverating() for teamid, team in teams.items()}
krachrtgs, teamsos = calckrachratings(teams, games)
log100 = math.log(100)
logkrach = {i: math.log(r)/log100-1 for i, r in krachrtgs.items()}
logsos = {i: math.log(r)/log100-1 for i, r in teamsos.items()}

Iteration 1
8271.407311614666
Iteration 2
4507.002997421938
Iteration 3
3275.106184432617
Iteration 4
2692.0815965509732
Iteration 5
2336.7223583588957
Iteration 6
2086.38307589521
Iteration 7
1895.3342868150055
Iteration 8
1740.290496157891
Iteration 9
1610.5343068250033
Iteration 10
1498.458013735543
Iteration 11
1398.20524315828
Iteration 12
1307.4924354766038
Iteration 13
1224.7711073863964
Iteration 14
1148.697864629292
Iteration 15
1079.2279082672424
Iteration 16
1014.9084073478934
Iteration 17
955.1296736084084
Iteration 18
899.5520821504015
Iteration 19
847.5680581404207
Iteration 20
798.9813033637348
Iteration 21
753.4239006545422
Iteration 22
710.6556231419436
Iteration 23
670.507900542357
Iteration 24
632.7280862114449
Iteration 25
597.1579503146157
Iteration 26
563.6533460108016
Iteration 27
532.1727899220632
Iteration 28
502.49220626485913
Iteration 29
474.49689015832735
Iteration 30
448.0857438046376
Iteration 31
423.16477797162094
Iteration 32
399.66613034256017
Iteratio

In [None]:
import torch

class Net(torch.nn.Module):
    def __init__(self, input_size, hidden1_size, hidden2_size, hidden3_size, hidden4_size):
        super(Net, self).__init__()
        self.input_size = input_size
        self.hidden1_size = hidden1_size
        self.hidden2_size = hidden2_size
        self.hidden3_size = hidden3_size
        self.hidden4_size = hidden4_size
        self.relu = torch.nn.ReLU()
        self.fc1 = torch.nn.Linear(self.input_size, self.hidden1_size)
        self.fc2 = torch.nn.Linear(self.hidden1_size, self.hidden2_size)
        self.fc3 = torch.nn.Linear(self.hidden2_size, self.hidden3_size)
        self.fc4 = torch.nn.Linear(self.hidden3_size, self.hidden4_size)
        self.fc5 = torch.nn.Linear(self.hidden4_size, 2)
    
    def forward(self, x):
        hidden1 = self.fc1(x)
        relu1 = self.relu(hidden1)
        hidden2 = self.fc2(relu1)
        relu2 = self.relu(hidden2)
        hidden3 = self.fc3(relu2)
        relu3 = self.relu(hidden3)
        hidden4 = self.fc4(relu3)
        relu4 = self.relu(hidden4)
        hidden5 = self.fc5(relu4)
        output = self.relu(hidden5)
        return output
 
def evaluate_regression(regressor, X, y, samples = 100, std_multiplier = 2):
    preds = [regressor(X) for i in range(samples)]
    preds = torch.stack(preds)
    means = preds.mean(axis=0)
    stds = preds.std(axis=0)
    ci_upper = means + (std_multiplier * stds)
    ci_lower = means - (std_multiplier * stds)
    ic_acc = (ci_lower <= y) * (ci_upper >= y)
    ic_acc = ic_acc.float().mean()
    return ic_acc, (ci_upper >= y).float().mean(), (ci_lower <= y).float().mean()

In [None]:
import csv, numpy
 
input = []
output = []
with open('prev_and_new_rtgs.csv','r') as f:
    reader = csv.DictReader(f)
    for row in reader:
        gameid = int(row["Game ID"])
        rowgame = games[gameid]
        homeprevgames = int(row["Home Team # Previous Games"])
        awayprevgames = int(row["Away Team # Previous Games"])
        if homeprevgames == 0 or awayprevgames == 0:
            continue
        hometeamid = int(row["Home Team ID"])
        awayteamid = int(row["Away Team ID"])
        homescore = int(row["Home Team Score"])
        awayscore = int(row["Away Team Score"])
        totalhomepoints = 0
        totalhomepointsagainst = 0
        totalawaypoints = 0
        totalawaypointsagainst = 0
        for game in teams[hometeamid].games:
            if game.time < rowgame.time:
                if hometeamid == game.hometeam.teamid:
                    totalhomepoints += game.homescore
                    totalhomepointsagainst += game.awayscore
                else:
                    totalhomepoints += game.awayscore
                    totalhomepointsagainst += game.homescore
        for game in teams[awayteamid].games:
            if game.time < rowgame.time:
                if awayteamid == game.hometeam.teamid:
                    totalawaypoints += game.homescore
                    totalawaypointsagainst += game.awayscore
                else:
                    totalawaypoints += game.awayscore
                    totalawaypointsagainst += game.homescore
        avghomepoints = totalhomepoints/homeprevgames
        avghomepointsagainst = totalhomepointsagainst/homeprevgames
        avgawaypoints = totalawaypoints/awayprevgames
        avgawaypointsagainst = totalawaypointsagainst/awayprevgames
        homeortg = float(row["Home Team Previous ORtg"])
        homedrtg = float(row["Home Team Previous DRtg"])
        awayortg = float(row["Away Team Previous ORtg"])
        awaydrtg = float(row["Away Team Previous DRtg"])
        input.append([logkrach[hometeamid]-logkrach[awayteamid], logsos[hometeamid], logsos[awayteamid], homeortg-100, homedrtg-100, avghomepoints, avghomepointsagainst, awayortg-100, awaydrtg-100, avgawaypoints, avgawaypointsagainst, rowgame.neutralsite])
        output.append([homescore, awayscore])
 
numpoints = len(input)
testindices = set(numpy.random.choice(range(numpoints), numpoints//10, replace=False))
traininput = []
trainoutput = []
testinput = []
testoutput = []
for x, y, i in zip(input, output, range(numpoints)):
    if i in testindices:
        testinput.append(x)
        testoutput.append(y)
    else:
        traininput.append(x)
        trainoutput.append(y)
 
traininput = torch.FloatTensor(traininput).cuda()
trainoutput = torch.FloatTensor(trainoutput).cuda()
testinput = torch.FloatTensor(testinput).cuda()
testoutput = torch.FloatTensor(testoutput).cuda()

In [None]:
model = Net(12, 200, 200, 200, 200)
model.cuda()
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
 
ds_train = torch.utils.data.TensorDataset(traininput, trainoutput)
dataloader_train = torch.utils.data.DataLoader(ds_train, batch_size=16, shuffle=True)
 
ds_test = torch.utils.data.TensorDataset(testinput, testoutput)
dataloader_test = torch.utils.data.DataLoader(ds_test, batch_size=16, shuffle=True)

In [None]:
!nvidia-smi

Fri Dec 11 22:09:57 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.45.01    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    39W / 300W |   1243MiB / 16130MiB |      5%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import os
for epoch in range(5000):
    for inputs, outputs in dataloader_train:
        optimizer.zero_grad()
        preds = model(inputs)
        loss = criterion(preds, outputs)
        print(f'Loss: {loss.item()}')
        loss.backward()
        optimizer.step()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Loss: 77.53054809570312
Loss: 76.7471923828125
Loss: 157.39248657226562
Loss: 93.74910736083984
Loss: 91.66502380371094
Loss: 91.173095703125
Loss: 96.04794311523438
Loss: 69.84100341796875
Loss: 95.44548797607422
Loss: 54.71886444091797
Loss: 168.2933349609375
Loss: 100.37088775634766
Loss: 99.5069580078125
Loss: 88.17253112792969
Loss: 117.19338989257812
Loss: 56.9421272277832
Loss: 71.79550170898438
Loss: 55.70951843261719
Loss: 93.82089233398438
Loss: 121.93627166748047
Loss: 60.03762435913086
Loss: 104.3819351196289
Loss: 94.35770416259766
Loss: 104.64956665039062
Loss: 93.69073486328125
Loss: 79.60482788085938
Loss: 79.49046325683594
Loss: 67.81388854980469
Loss: 119.761962890625
Loss: 84.82363891601562
Loss: 125.47686767578125
Loss: 106.4569320678711
Loss: 87.31344604492188
Loss: 76.5800552368164
Loss: 130.6563262939453
Loss: 122.4669189453125
Loss: 54.91923904418945
Loss: 143.8178253173828
Loss: 131.25991821289062

KeyboardInterrupt: ignored

In [None]:
criterion(model(testinput), testoutput)

tensor(111.6408, device='cuda:0', grad_fn=<MseLossBackward>)

In [None]:
torch.save(model.state_dict(), 'model2.torch')