In [1]:
from gazpacho import get, Soup
import pandas as pd

### Money

In [2]:
url = 'https://www.capfriendly.com/archive/2022'
html = get(url)

In [3]:
soup = Soup(html)

In [4]:
table = soup.find('table', {'id': 'ich'})

In [5]:
str(table)[:100]

'<table id="ich" class="sortablex tblcf tbl index"><thead><tr class="column_head"><th class="left" st'

In [6]:
trs = table.find('tr', {'class': 'tmx'})

In [7]:
tr = trs[0]

In [8]:
tr.find('a', mode='first').text

'Montreal Canadiens'

In [9]:
print(tr)

<tr class="odd tmx_16"><td class="tmx left" data-team="16" data-label="TEAM ▾"><span><a href="/teams/cap-tracker/canadiens/2022"><img class="im_mid" style="height:20px;width:20px;margin-right:5px;margin-top:-2px;" alt="Logo of the Montreal Canadiens" src="https://capfriendly-wlb8ng5.stackpathdns.com/assets/images/logos/montreal_canadiens.svg">Montreal Canadiens</a></span></td><td data-label="FINAL CAP HIT" class="tmx_s left"><span class="num" data-num="92493917">$92,493,917</span></td><td data-label="LTIR USED" class="tmx_s left"><span class="num" data-num="10993917">$10,993,917</span></td><td data-label="FINAL CAP SPACE" class="tmx_s left"><span class="num" data-num="0">$0</span></td></tr>


In [10]:
tr.find('td', {'data-label': 'FINAL CAP HIT'}, partial=False).text

'$92,493,917'

In [11]:
def parse_tr(tr):
    team = tr.find('a', mode='first').text
    cap = tr.find('td', {'data-label': 'FINAL CAP HIT'}, partial=False).text
    cap = float(cap.replace(',', '').replace('$', ''))
    return team, cap

In [12]:
cap_hits = [parse_tr(tr) for tr in trs]

In [13]:
cap_hits = pd.DataFrame(cap_hits, columns=['Team', 'spend'])

### Standings

In [32]:
url = 'https://www.hockey-reference.com/leagues/NHL_2022.html'

In [33]:
df_a = pd.read_html(url)[0]
df_b = pd.read_html(url)[1]
df = pd.concat([df_a, df_b])

In [34]:
df['PTS%'] = df['PTS%'].apply(pd.to_numeric, errors='coerce')
df = df.dropna(subset=["PTS%"])
df = df.rename(columns={'Unnamed: 0': 'Team'})
df["Team"] = df["Team"].str.replace("*", "", regex=False)
df = df.sort_values("PTS%", ascending = False)

In [35]:
df

Unnamed: 0,Team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,RW,RgRec,RgPt%
1,Florida Panthers,82,58,18,6,122,0.744,340,246,1.07,-0.08,0.713,42,42-18-22,0.646
1,Colorado Avalanche,82,56,19,7,119,0.726,312,234,0.91,-0.04,0.671,46,46-19-17,0.665
10,Carolina Hurricanes,82,54,20,8,116,0.707,278,202,0.88,-0.05,0.665,47,47-20-15,0.665
2,Toronto Maple Leafs,82,54,21,7,115,0.701,315,253,0.69,-0.06,0.646,45,45-21-16,0.646
2,Minnesota Wild,82,53,22,7,113,0.689,310,253,0.68,-0.02,0.64,37,37-22-23,0.591
10,Calgary Flames,82,50,21,11,111,0.677,293,208,0.99,-0.05,0.61,44,44-21-17,0.64
11,New York Rangers,82,52,24,6,110,0.671,254,207,0.54,-0.03,0.628,44,44-24-14,0.622
3,Tampa Bay Lightning,82,51,23,8,110,0.671,287,233,0.64,-0.02,0.64,39,39-23-20,0.598
3,St. Louis Blues,82,49,22,11,109,0.665,311,242,0.79,-0.05,0.604,43,43-22-17,0.628
4,Boston Bruins,82,51,26,5,107,0.652,255,220,0.38,-0.05,0.622,40,40-26-16,0.585


In [36]:
df = pd.merge(df, cap_hits, on='Team', how='left')
df["efficiency"] = df["spend"] / 1_000_000 / df["PTS%"]

In [37]:
df.sort_values("efficiency")

Unnamed: 0,Team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,RW,RgRec,RgPt%,spend,efficiency
0,Florida Panthers,82,58,18,6,122,0.744,340,246,1.07,-0.08,0.713,42,42-18-22,0.646,82930085.0,111.465168
6,New York Rangers,82,52,24,6,110,0.671,254,207,0.54,-0.03,0.628,44,44-24-14,0.622,75108450.0,111.935097
1,Colorado Avalanche,82,56,19,7,119,0.726,312,234,0.91,-0.04,0.671,46,46-19-17,0.665,81739413.0,112.588723
4,Minnesota Wild,82,53,22,7,113,0.689,310,253,0.68,-0.02,0.64,37,37-22-23,0.591,79996390.0,116.105065
2,Carolina Hurricanes,82,54,20,8,116,0.707,278,202,0.88,-0.05,0.665,47,47-20-15,0.665,83866550.0,118.623126
3,Toronto Maple Leafs,82,54,21,7,115,0.701,315,253,0.69,-0.06,0.646,45,45-21-16,0.646,83761840.0,119.489073
15,Nashville Predators,82,45,30,7,97,0.591,266,252,0.19,0.02,0.537,35,35-30-17,0.53,71216151.0,120.501102
5,Calgary Flames,82,50,21,11,111,0.677,293,208,0.99,-0.05,0.61,44,44-21-17,0.64,81707950.0,120.691211
9,Boston Bruins,82,51,26,5,107,0.652,255,220,0.38,-0.05,0.622,40,40-26-16,0.585,80829990.0,123.972377
8,St. Louis Blues,82,49,22,11,109,0.665,311,242,0.79,-0.05,0.604,43,43-22-17,0.628,82634124.0,124.261841


### Saving results

In [38]:
df.to_csv('../data/efficiency.csv', index=False)

In [39]:
df.head()

Unnamed: 0,Team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,RW,RgRec,RgPt%,spend,efficiency
0,Florida Panthers,82,58,18,6,122,0.744,340,246,1.07,-0.08,0.713,42,42-18-22,0.646,82930085.0,111.465168
1,Colorado Avalanche,82,56,19,7,119,0.726,312,234,0.91,-0.04,0.671,46,46-19-17,0.665,81739413.0,112.588723
2,Carolina Hurricanes,82,54,20,8,116,0.707,278,202,0.88,-0.05,0.665,47,47-20-15,0.665,83866550.0,118.623126
3,Toronto Maple Leafs,82,54,21,7,115,0.701,315,253,0.69,-0.06,0.646,45,45-21-16,0.646,83761840.0,119.489073
4,Minnesota Wild,82,53,22,7,113,0.689,310,253,0.68,-0.02,0.64,37,37-22-23,0.591,79996390.0,116.105065


In [40]:
df['date_fetched'] = pd.Timestamp('today')

In [41]:
df.head()

Unnamed: 0,Team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,RW,RgRec,RgPt%,spend,efficiency,date_fetched
0,Florida Panthers,82,58,18,6,122,0.744,340,246,1.07,-0.08,0.713,42,42-18-22,0.646,82930085.0,111.465168,2022-07-11 14:08:41.556671
1,Colorado Avalanche,82,56,19,7,119,0.726,312,234,0.91,-0.04,0.671,46,46-19-17,0.665,81739413.0,112.588723,2022-07-11 14:08:41.556671
2,Carolina Hurricanes,82,54,20,8,116,0.707,278,202,0.88,-0.05,0.665,47,47-20-15,0.665,83866550.0,118.623126,2022-07-11 14:08:41.556671
3,Toronto Maple Leafs,82,54,21,7,115,0.701,315,253,0.69,-0.06,0.646,45,45-21-16,0.646,83761840.0,119.489073,2022-07-11 14:08:41.556671
4,Minnesota Wild,82,53,22,7,113,0.689,310,253,0.68,-0.02,0.64,37,37-22-23,0.591,79996390.0,116.105065,2022-07-11 14:08:41.556671


In [42]:
import sqlite3

con = sqlite3.connect('../data/efficiency.db')

df.to_sql('teams', con, index=False, if_exists='append')

32

In [43]:
pd.read_sql('''
    select 
    * 
    from teams 
    where efficiency < 150
    order by efficiency
''', con)

Unnamed: 0,Team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,RW,RgRec,RgPt%,spend,efficiency,date_fetched
0,Florida Panthers,82,58,18,6,122,0.744,340,246,1.07,-0.08,0.713,42,42-18-22,0.646,82930085.0,111.465168,2022-07-11 14:08:41.556671
1,New York Rangers,82,52,24,6,110,0.671,254,207,0.54,-0.03,0.628,44,44-24-14,0.622,75108450.0,111.935097,2022-07-11 14:08:41.556671
2,Colorado Avalanche,82,56,19,7,119,0.726,312,234,0.91,-0.04,0.671,46,46-19-17,0.665,81739413.0,112.588723,2022-07-11 14:08:41.556671
3,Minnesota Wild,82,53,22,7,113,0.689,310,253,0.68,-0.02,0.64,37,37-22-23,0.591,79996390.0,116.105065,2022-07-11 14:08:41.556671
4,Carolina Hurricanes,82,54,20,8,116,0.707,278,202,0.88,-0.05,0.665,47,47-20-15,0.665,83866550.0,118.623126,2022-07-11 14:08:41.556671
5,Toronto Maple Leafs,82,54,21,7,115,0.701,315,253,0.69,-0.06,0.646,45,45-21-16,0.646,83761840.0,119.489073,2022-07-11 14:08:41.556671
6,Nashville Predators,82,45,30,7,97,0.591,266,252,0.19,0.02,0.537,35,35-30-17,0.53,71216151.0,120.501102,2022-07-11 14:08:41.556671
7,Calgary Flames,82,50,21,11,111,0.677,293,208,0.99,-0.05,0.61,44,44-21-17,0.64,81707950.0,120.691211,2022-07-11 14:08:41.556671
8,Boston Bruins,82,51,26,5,107,0.652,255,220,0.38,-0.05,0.622,40,40-26-16,0.585,80829990.0,123.972377,2022-07-11 14:08:41.556671
9,St. Louis Blues,82,49,22,11,109,0.665,311,242,0.79,-0.05,0.604,43,43-22-17,0.628,82634124.0,124.261841,2022-07-11 14:08:41.556671
