<a href="https://colab.research.google.com/github/kalebr/carybball/blob/master/CaryBBall.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#@title
import requests
import urllib.request
import time
from bs4 import BeautifulSoup
import re
from collections import namedtuple

# download the table of scores
schedule_url = "https://www.teamsideline.com/sites/cary/schedule/274051/Youth-Basketball-Boys-11-12-West"
response = requests.get(schedule_url)
soup = BeautifulSoup(response.text, "html.parser")

# parse data
table = soup.find("table", {"id": "ctl00_ContentPlaceHolder1_ScheduleGrid_ctl00"})
rows = table.findAll("tr")

# t1 is home team, t2 is away team
Game = namedtuple('Game', 'weeknum, gamenum, team1, team2')
Team = namedtuple('Team', 'name, score, elo, winchnc, delo')

def calc_r(elo):
  """ returns tranformed rating baed on elo"""
  return 10.0**(elo/400.0)

def calc_e(r1, r2):
  """ return expected reult for r1 """
  return r1 / (r1 + r2)

def calc_win_chance(helo, aelo):
  """ return home win chance"""
  return round(1 / (1 + (10**((helo-aelo)/400.0))), 2)

def calc_mov_mod(helo, aelo, hcore, acore):
  n = (abs(hcore - acore) + 3.0)**.8
  dh = 7.5 + (.006* (helo-aelo))
  da = 7.5 + (.006* (aelo-helo))

  return n/dh, n/da

def parseGame(r, use_mov):
  """ take row of data, calculate and print reult """
  global gamenum
  global weeknum

  try:
    if "rgGroupHeader" in r.get("class"):
      weeknum = int((r.find("p").text.split(" ")[1]))
      gamenum = 0
  except:
    pass


  # if row i not a game, skip
  if not r.find("span", {"id":re.compile('HomeLabel')}):
    return
  
  # get relevant field from data row
  home_team = r.find("span", {"id":re.compile('HomeLabel')}).text
  home_team_score = r.find("span", {"id":re.compile('HomeScoreLabel')}).text
  away_team = r.find("span", {"id":re.compile('AwayLabel')}).text
  away_team_score = r.find("span", {"id":re.compile('AwayScoreLabel')}).text

  # if game han't been played yet, skip
  if home_team_score == "":
    return

  gamenum += 1

  # core hould be int 
  home_team_score = int(home_team_score)
  away_team_score = int(away_team_score)
  
  # get elo or et up new team
  home_team_elo = 1500
  if home_team in teams:
    home_team_elo = teams[home_team]
  else:
    teams[home_team] = home_team_elo

  away_team_elo = 1500
  if away_team in teams:
    away_team_elo = teams[away_team]
  else:
    teams[away_team] = away_team_elo
  
  # calculate r'
  rh = calc_r(home_team_elo)
  ra = calc_r(away_team_elo)

  # calculate e'
  eh = calc_e(rh, ra)
  ea = calc_e(ra, rh)

  # determine winner and set ', assume home team won, else change it
  sh = 1 if home_team_score > away_team_score else 0
  sa = 1 if home_team_score < away_team_score else 0

  # calculate win chance baed on current elo
  home_win_chnc = calc_win_chance(away_team_elo, home_team_elo)
  away_win_chnc = 1 - home_win_chnc

  # calculate margin of victory mod baed on 538' formulate
  hwinmod = 1
  awinmod = 1
  if use_mov:
    hwinmod, awinmod = calc_mov_mod(home_team_elo, away_team_elo, home_team_score, 
                                  away_team_score)

  # set new elo
  home_elo_new = int(home_team_elo + ((k*(sh-eh)) * hwinmod))
  away_elo_new = int(away_team_elo + ((k*(sa-ea)) * awinmod))
  
  teams[home_team] = home_elo_new
  teams[away_team] = away_elo_new

  # add game to games list
  hometeam = Team(home_team, home_team_score, home_team_elo, home_win_chnc, home_elo_new - home_team_elo)
  awayteam = Team(away_team, away_team_score, away_team_elo, away_win_chnc, away_elo_new - away_team_elo)
  games.append(Game(weeknum, gamenum, hometeam, awayteam)) 

def print_game_info(g):
  try:
    win_team = g.team1 if g.team1.score > g.team2.score else g.team2
    lose_team = g.team2 if g.team1.score > g.team2.score else g.team1
    # print(win_team.name, win_team.score, lose_team.name, lose_team.score)
    print( 
        f'{g.weeknum:4} '
        f'{g.gamenum:4} '
        f'  '
        f'{win_team.name:12.10} '
        f'{win_team.score:2d} '
        f'{win_team.elo} '
        f'{win_team.winchnc:5.2f} '
        f'{win_team.delo:5d} '
        f'  '
        f'{lose_team.name:12.10} '
        f'{lose_team.score:2d} '
        f'{lose_team.elo} '
        f'{lose_team.winchnc:5.2f} '
        f'{lose_team.delo:5d}'
    )
  except:
    print("ERROR!!!!")
    print(g)

def print_headers():
  print()
  print("Week Game   WTeam    WScore WElo WChnc WdElo   LTeam    LScore LElo LChnc LdElo")

def print_elo_rank():
  # print rankings by ELO
  print()
  print("--- ELO ---")
  i = 1
  for k, v in sorted(teams.items(), key=lambda x: x[1], reverse=True):
    print(i, ". ", k, round(v))
    i += 1


In [0]:
#@title
# do all the calculations and create the game instances
teams = {}
k = 40.0
use_mov = True
games = []
weeknum = 0
gamenum = 0


# parse results of each game
for r in rows:
  parseGame(r, use_mov)


In [0]:
#@title
# print elo rankings
print_elo_rank()


--- ELO ---
1 .  Paladins 1633
2 .  Skyhawks 1622
3 .  Longhorns 1621
4 .  Huskies 1581
5 .  Monarchs 1555
6 .  Sun Devils 1548
7 .  Spiders 1545
8 .  Racers 1524
9 .  Buccaneers 1506
10 .  Nittany Lions 1500
11 .  Blue Raiders 1476
12 .  Thundering Herd 1466
13 .  Hokies 1458
14 .  Wolverines 1423
15 .  Ducks 1396
16 .  Hawkeyes 1357
17 .  Gauchos 1336
18 .  Roadrunners 1335


In [0]:
#@title
# print all games in order
print( "*** ALL GAMES ***")
print_headers()
for g in games:
  print_game_info(g)

*** ALL GAMES ***

Week Game   WTeam    WScore WElo WChnc WdElo   LTeam    LScore LElo LChnc LdElo
   1    1   Spiders      40 1500  0.50    39   Hawkeyes     14 1500  0.50   -40
   1    2   Sun Devils   38 1500  0.50    40   Roadrunner   11 1500  0.50   -41
   1    3   Skyhawks     31 1500  0.50    28   Blue Raide   15 1500  0.50   -29
   1    4   Huskies      39 1500  0.50    28   Hokies       23 1500  0.50   -29
   1    5   Thundering   26 1500  0.50     8   Buccaneers   25 1500  0.50    -9
   1    6   Paladins     37 1500  0.50    23   Racers       25 1500  0.50   -24
   1    7   Longhorns    42 1500  0.50    47   Gauchos       8 1500  0.50   -48
   1    8   Wolverines   28 1500  0.50    20   Ducks        18 1500  0.50   -21
   1    9   Nittany Li   43 1500  0.50    33   Monarchs     22 1500  0.50   -34
   2    1   Hokies       26 1471  0.52    23   Roadrunner   13 1459  0.48   -24
   2    2   Blue Raide   28 1471  0.40    12   Spiders      26 1539  0.60   -11
   2    3   Sun Devil

{'tags': ['hide_input']}

In [0]:
#@title
# print highest games with highest scores
print("*** HIGHEST SCORES ***")
print_headers()
for g in sorted(games, key=lambda x: max(x.team1.score, x.team2.score), reverse=True)[0:10]:
  print_game_info(g)

*** HIGHEST SCORES ***

Week Game   WTeam    WScore WElo WChnc WdElo   LTeam    LScore LElo LChnc LdElo
   2   11   Huskies      56 1519  0.63    36   Ducks        18 1429  0.37   -42
   2    7   Racers       48 1476  0.50    49   Ducks        13 1479  0.50   -50
   3   10   Sun Devils   48 1521  0.63    31   Ducks        16 1425  0.37   -37
   2   13   Blue Raide   47 1483  0.56    28   Hawkeyes     27 1443  0.44   -30
   3    3   Paladins     47 1549  0.48    45   Sun Devils   18 1566  0.52   -45
   5    6   Nittany Li   47 1488  0.40    12   Sun Devils   45 1559  0.60   -11
   3   17   Huskies      46 1575  0.63    20   Nittany Li   29 1481  0.37   -24
   3   12   Paladins     44 1594  0.64    20   Hokies       26 1495  0.36   -24
   4    4   Nittany Li   44 1457  0.60    31   Roadrunner   16 1390  0.40   -36
   5    8   Spiders      44 1514  0.44    31   Racers       29 1554  0.56   -30


In [0]:
#@title
# Lowest Scores
print("*** LOWEST SCORES*** ")
print_headers()
for g in sorted(games, key=lambda x: min(x.team1.score, x.team2.score))[0:10]:
  print_game_info(g)

*** LOWEST SCORES*** 

Week Game   WTeam    WScore WElo WChnc WdElo   LTeam    LScore LElo LChnc LdElo
   3   16   Monarchs     30 1510  0.52    34   Blue Raide    7 1498  0.48   -36
   1    7   Longhorns    42 1500  0.50    47   Gauchos       8 1500  0.50   -48
   5    3   Thundering   20 1451  0.64    15   Roadrunner    8 1354  0.36   -19
   2    6   Monarchs     39 1466  0.44    49   Thundering   10 1508  0.56   -47
   1    2   Sun Devils   38 1500  0.50    40   Roadrunner   11 1500  0.50   -41
   3    1   Racers       30 1546  0.55    27   Nittany Li   11 1511  0.45   -30
   4    3   Longhorns    24 1625  0.72    12   Blue Raide   11 1462  0.28   -16
   3    6   Huskies      38 1555  0.71    20   Gauchos      12 1402  0.29   -27
   3    7   Thundering   22 1450  0.39    27   Wolverines   12 1531  0.61   -24
   3   14   Roadrunner   17 1376  0.50    14   Gauchos      12 1375  0.50   -15


In [0]:
#@title
print(" *** UPSETS ***")
print_headers()
upset_threshold = .45
f = list(filter(lambda x: ((x.team1.score > x.team2.score) and (x.team1.winchnc < upset_threshold))
                        or ((x.team2.score > x.team1.score) and (x.team2.winchnc < upset_threshold)), games))
for g in sorted(f, key=lambda x: min(x.team1.winchnc, x.team2.winchnc)):
  print_game_info(g)

 *** UPSETS ***

Week Game   WTeam    WScore WElo WChnc WdElo   LTeam    LScore LElo LChnc LdElo
   5    2   Buccaneers   29 1486  0.30    20   Longhorns    25 1637  0.70   -16
   4    6   Ducks        37 1388  0.38    25   Hokies       28 1471  0.62   -23
   3    7   Thundering   22 1450  0.39    27   Wolverines   12 1531  0.61   -24
   2    2   Blue Raide   28 1471  0.40    12   Spiders      26 1539  0.60   -11
   5    6   Nittany Li   47 1488  0.40    12   Sun Devils   45 1559  0.60   -11
   3    4   Monarchs     43 1487  0.42    23   Spiders      34 1540  0.58   -22
   2    6   Monarchs     39 1466  0.44    49   Thundering   10 1508  0.56   -47
   2   15   Nittany Li   32 1500  0.44    11   Wolverines   30 1542  0.56   -11
   5    8   Spiders      44 1514  0.44    31   Racers       29 1554  0.56   -30
   3    9   Buccaneers   16 1466  0.44    13   Blue Raide   13 1511  0.56   -13


In [0]:
#@title
print(" *** ELO HISTORY ***")
print_headers()
team = "Buccaneers"
for g in list(filter(lambda x: x.team1.name == team or x.team2.name == team, games)):
  print_game_info(g)

 *** ELO HISTORY ***

Week Game   WTeam    WScore WElo WChnc WdElo   LTeam    LScore LElo LChnc LdElo
   1    5   Thundering   26 1500  0.50     8   Buccaneers   25 1500  0.50    -9
   2    4   Skyhawks     17 1528  0.55     9   Buccaneers   14 1491  0.45   -11
   2   12   Spiders      29 1528  0.57    12   Buccaneers   23 1480  0.43   -14
   3    9   Buccaneers   16 1466  0.44    13   Blue Raide   13 1511  0.56   -13
   3   15   Buccaneers   24 1479  0.62    15   Hawkeyes     13 1395  0.38   -19
   4    2   Monarchs     24 1544  0.57     6   Buccaneers   23 1494  0.43    -8
   5    2   Buccaneers   29 1486  0.30    20   Longhorns    25 1637  0.70   -16
