In [101]:
import json
from bs4 import BeautifulSoup
from urllib.request import urlopen
from html.parser import HTMLParser
import numpy as np
import pandas as pd

In [102]:
url = 'https://www.hockey-reference.com/leagues/NHL_2019_standings.html'
soup = BeautifulSoup(urlopen(url), "html.parser")

In [103]:
# Find the proper table using the caption tag
for caption in soup.find_all('caption'):
    if caption.get_text() == 'Expanded Standings Table':
        table = caption.find_parent('table', {"id":"standings"})

In [104]:
#Table data into one large array
all_teams = []
#Grab all the data
for row in table.find_all('tr'):
    for cell in row.find_all('td'):
        all_teams.append(cell.text)

In [105]:
#divide the all_teams data into their own arrays
def teamsplitter(l, n):
    # For item i in a range that is a length of l,
    for i in range(0, len(l), n):
        # Create an index range for l of n items:
        yield l[i:i+n]

In [106]:
#21 columns in chart
all_teams = list(teamsplitter(all_teams,21))
#Data->numpy
all_teams = np.array(all_teams)
#numpy-> Pandas DataFrame
teamtable = pd.DataFrame(all_teams)
teamtable.head()


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,20
0,Tampa Bay Lightning,62-16-4,6-1,7-3,32-7-2,30-9-2,44-8-0,18-8-4,23-5-0,21-3-0,...,11-4-1,24-3-4,30-8-0,8-2-1,10-5-0,13-0-1,6-4-0,12-1-2,10-3-0,3-1-0
1,Calgary Flames,50-25-7,0-4,5-3,26-10-5,24-15-2,21-7-4,29-18-3,8-5-3,13-2-1,...,16-11-2,16-8-7,22-6-0,7-5-1,8-4-1,9-3-2,9-1-1,8-3-2,8-7-0,1-2-0
2,Boston Bruins,49-24-9,2-3,9-6,29-9-3,20-15-6,29-19-4,20-5-5,16-10-2,13-9-2,...,12-2-2,21-6-9,23-11-0,7-3-2,7-4-2,7-7-0,6-3-3,11-0-2,9-6-0,2-1-0
3,Washington Capitals,48-26-8,4-1,5-7,24-11-6,24-15-2,33-14-5,15-12-3,14-7-3,19-7-2,...,10-5-1,19-5-8,16-14-0,5-3-2,10-4-1,9-4-0,3-6-3,9-4-1,11-3-1,1-2-0
4,New York Islanders,48-27-7,5-5,6-2,24-13-4,24-14-3,30-17-5,18-10-2,12-8-4,18-9-1,...,9-6-1,19-6-7,19-12-0,6-4-1,6-5-2,9-4-1,8-2-1,8-4-2,9-7-0,2-1-0


In [107]:
#clean data & rename headers
teamtable = teamtable.drop([4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],axis=1)
teamtable = teamtable.rename(columns={0:"team",1:"Overall",2:"Shootout",3:"Overtime"})

teamtable.head()

Unnamed: 0,team,Overall,Shootout,Overtime
0,Tampa Bay Lightning,62-16-4,6-1,7-3
1,Calgary Flames,50-25-7,0-4,5-3
2,Boston Bruins,49-24-9,2-3,9-6
3,Washington Capitals,48-26-8,4-1,5-7
4,New York Islanders,48-27-7,5-5,6-2


In [108]:
# Striping out the strings of the records

teamname = teamtable.drop(["Overall","Shootout","Overtime"],axis=1)
Overall_math = teamtable.Overall.str.split("-",expand=True).rename(columns={0:"wins",1:"loss",2:"OT_L"}).astype(int)
Shootout_math = teamtable.Shootout.str.split("-",expand=True).drop([1],axis=1).rename(columns={0:"SOWins"}).astype(int)
Overtime_math = teamtable.Overtime.str.split("-",expand=True).drop([1],axis=1).rename(columns={0:"OTWins"}).astype(int)

In [109]:
#Join math tables
pts_math = teamname.join(Overall_math)
pts_math = pts_math.join(Shootout_math)
pts_math = pts_math.join(Overtime_math)

pts_math.head()

Unnamed: 0,team,wins,loss,OT_L,SOWins,OTWins
0,Tampa Bay Lightning,62,16,4,6,7
1,Calgary Flames,50,25,7,0,5
2,Boston Bruins,49,24,9,2,9
3,Washington Capitals,48,26,8,4,5
4,New York Islanders,48,27,7,5,6


In [110]:
# Math to get all the pts totals...(Keeping to show the work)
pts_math['OT_W'] = pts_math.SOWins + pts_math.OTWins
pts_math['true_Wins'] = pts_math.wins - pts_math.OT_W
pts_math['True_Wins_Pts'] = pts_math.true_Wins*3
pts_math['OT_W_Pts'] = pts_math.OT_W*2
pts_math['OT_L_Pts'] = pts_math.OT_L*1

pts_math.head()

Unnamed: 0,team,wins,loss,OT_L,SOWins,OTWins,OT_W,true_Wins,True_Wins_Pts,OT_W_Pts,OT_L_Pts
0,Tampa Bay Lightning,62,16,4,6,7,13,49,147,26,4
1,Calgary Flames,50,25,7,0,5,5,45,135,10,7
2,Boston Bruins,49,24,9,2,9,11,38,114,22,9
3,Washington Capitals,48,26,8,4,5,9,39,117,18,8
4,New York Islanders,48,27,7,5,6,11,37,111,22,7


In [111]:
# THE NEW TOTAL POINTS!!
pts_math['new_Record']= pts_math.true_Wins.astype(str).str.cat([pts_math.OT_W.astype(str),pts_math.OT_L.astype(str),Overall_math.loss.astype(str)],sep='-')
pts_math['PTS_Total']= pts_math.True_Wins_Pts+pts_math.OT_W_Pts+pts_math.OT_L_Pts

pts_math['current_Points']=(pts_math.wins*2)+(pts_math.OT_L)

pts_math['current_Rank'] = pts_math['current_Points'].rank(ascending=False)
pts_math['new_Rank'] = pts_math['current_Points'].rank(ascending=False)


pts_math.head()

Unnamed: 0,team,wins,loss,OT_L,SOWins,OTWins,OT_W,true_Wins,True_Wins_Pts,OT_W_Pts,OT_L_Pts,new_Record,PTS_Total,current_Points,current_Rank,new_Rank
0,Tampa Bay Lightning,62,16,4,6,7,13,49,147,26,4,49-13-4-16,177,128,1.0,1.0
1,Calgary Flames,50,25,7,0,5,5,45,135,10,7,45-5-7-25,152,107,2.5,2.5
2,Boston Bruins,49,24,9,2,9,11,38,114,22,9,38-11-9-24,145,107,2.5,2.5
3,Washington Capitals,48,26,8,4,5,9,39,117,18,8,39-9-8-26,143,104,4.0,4.0
4,New York Islanders,48,27,7,5,6,11,37,111,22,7,37-11-7-27,140,103,5.0,5.0


In [112]:
# clean for export
pts_math = pts_math.drop(['wins','loss','OT_L','SOWins','OT_W','OTWins','true_Wins','OT_W', 'True_Wins_Pts', 'OT_W_Pts', 'OT_L_Pts'], axis=1)
pts_math = pts_math.rename(columns={"PTS_Total":"points","team":"Teams","new_Record":"Record","points":"3-2-1 Points","current_Points":"Current Points","current_Rank":"Current Rank","current_Points":"Current Points","new_Rank":"3-2-1 Rank"})

pts_math = pts_math.sort_values(by=["points"],ascending=False)

pts_math.head()

Unnamed: 0,Teams,Record,points,Current Points,Current Rank,3-2-1 Rank
0,Tampa Bay Lightning,49-13-4-16,177,128,1.0,1.0
1,Calgary Flames,45-5-7-25,152,107,2.5,2.5
2,Boston Bruins,38-11-9-24,145,107,2.5,2.5
3,Washington Capitals,39-9-8-26,143,104,4.0,4.0
4,New York Islanders,37-11-7-27,140,103,5.0,5.0


In [14]:
team_props = pd.read_json('teams.json',typ='frame')

In [113]:
finish_table = pts_math.join(team_props,on='Teams')
finish_table = finish_table.set_index('abb')


print(finish_table)

                     Teams       Record  points  Current Points  Current Rank  \
abb                                                                             
TB     Tampa Bay Lightning   49-13-4-16     177             128           1.0   
CAL         Calgary Flames    45-5-7-25     152             107           2.5   
BOS          Boston Bruins   38-11-9-24     145             107           2.5   
WAS    Washington Capitals    39-9-8-26     143             104           4.0   
NYI     New York Islanders   37-11-7-27     140             103           5.0   
TOR    Toronto Maple Leafs    40-6-8-28     140             100           8.0   
SJ         San Jose Sharks    38-8-9-27     139             101           6.0   
NAS    Nashville Predators    38-9-6-29     138             100           8.0   
CAR    Carolina Hurricanes    39-7-7-29     138              99          11.0   
PIT    Pittsburgh Penguins   37-7-12-26     137             100           8.0   
WPG          Winnipeg Jets  