In [1]:
from gazpacho import get, Soup
import pandas as pd

### Money

In [2]:
url = 'https://www.capfriendly.com/archive/2022'
html = get(url)

In [3]:
soup = Soup(html)

In [4]:
table = soup.find('table', {'id': 'ich'})

In [5]:
str(table)[:100]

'<table id="ich" class="sortablex tblcf tbl index"><thead><tr class="column_head"><th class="left" st'

In [6]:
trs = table.find('tr', {'class': 'tmx'})

In [7]:
tr = trs[0]

In [8]:
tr.find('a', mode='first').text

'Montreal Canadiens'

In [9]:
print(tr)

<tr class="odd tmx_16"><td class="tmx left" data-team="16" data-label="TEAM ▾"><span><a href="/teams/cap-tracker/canadiens/2022"><img class="im_mid" style="height:20px;width:20px;margin-right:5px;margin-top:-2px;" alt="Logo of the Montreal Canadiens" src="https://capfriendly-wlb8ng5.stackpathdns.com/assets/images/logos/montreal_canadiens.svg">Montreal Canadiens</a></span></td><td data-label="FINAL CAP HIT" class="tmx_s left"><span class="num" data-num="92493917">$92,493,917</span></td><td data-label="LTIR USED" class="tmx_s left"><span class="num" data-num="10993917">$10,993,917</span></td><td data-label="FINAL CAP SPACE" class="tmx_s left"><span class="num" data-num="0">$0</span></td></tr>


In [10]:
tr.find('td', {'data-label': 'FINAL CAP HIT'}, partial=False).text

'$92,493,917'

In [11]:
def parse_tr(tr):
    team = tr.find('a', mode='first').text
    cap = tr.find('td', {'data-label': 'FINAL CAP HIT'}, partial=False).text
    cap = float(cap.replace(',', '').replace('$', ''))
    return team, cap

In [12]:
cap_hits = [parse_tr(tr) for tr in trs]

In [13]:
cap_hits = pd.DataFrame(cap_hits, columns=['Team', 'spend'])

### Standings

In [14]:
url = 'https://www.hockey-reference.com/leagues/NHL_2022.html'

In [15]:
df = pd.read_html(url)[0]

In [16]:
df['PTS%'] = df['PTS%'].apply(pd.to_numeric, errors='coerce')
df = df.dropna(subset=["PTS%"])
df = df.rename(columns={'Unnamed: 0': 'Team'})
df = df.sort_values("PTS%", ascending = False)

In [17]:
df = pd.merge(df, cap_hits, on='Team', how='left')
df["efficiency"] = df["spend"] / 1_000_000 / df["PTS%"]

In [18]:
df.sort_values("efficiency")

Unnamed: 0,Team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,RW,RgRec,RgPt%,spend,efficiency
0,Florida Panthers,82,58,18,6,122,0.744,340,246,1.07,-0.08,0.713,42,42-18-22,0.646,82930085.0,111.465168
4,New York Rangers,82,52,24,6,110,0.671,254,207,0.54,-0.03,0.628,44,44-24-14,0.622,75108450.0,111.935097
1,Carolina Hurricanes,82,54,20,8,116,0.707,278,202,0.88,-0.05,0.665,47,47-20-15,0.665,83866550.0,118.623126
2,Toronto Maple Leafs,82,54,21,7,115,0.701,315,253,0.69,-0.06,0.646,45,45-21-16,0.646,83761840.0,119.489073
5,Boston Bruins,82,51,26,5,107,0.652,255,220,0.38,-0.05,0.622,40,40-26-16,0.585,80829990.0,123.972377
3,Tampa Bay Lightning,82,51,23,8,110,0.671,287,233,0.64,-0.02,0.64,39,39-23-20,0.598,89214115.0,132.956952
6,Pittsburgh Penguins,82,46,25,11,103,0.628,272,229,0.49,-0.04,0.585,37,37-25-20,0.573,83539221.0,133.024237
7,Washington Capitals,82,44,26,12,100,0.61,275,245,0.35,-0.02,0.524,35,35-26-21,0.555,84839289.0,139.080802
9,Columbus Blue Jackets,82,37,38,7,81,0.494,262,300,-0.44,0.02,0.445,26,26-38-18,0.427,70736371.0,143.191034
10,Buffalo Sabres,82,32,39,11,75,0.457,232,290,-0.69,0.02,0.39,25,25-39-18,0.415,67574592.0,147.865628


### Saving results

In [19]:
df.to_csv('../data/efficiency.csv', index=False)

In [20]:
df.head()

Unnamed: 0,Team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,RW,RgRec,RgPt%,spend,efficiency
0,Florida Panthers,82,58,18,6,122,0.744,340,246,1.07,-0.08,0.713,42,42-18-22,0.646,82930085.0,111.465168
1,Carolina Hurricanes,82,54,20,8,116,0.707,278,202,0.88,-0.05,0.665,47,47-20-15,0.665,83866550.0,118.623126
2,Toronto Maple Leafs,82,54,21,7,115,0.701,315,253,0.69,-0.06,0.646,45,45-21-16,0.646,83761840.0,119.489073
3,Tampa Bay Lightning,82,51,23,8,110,0.671,287,233,0.64,-0.02,0.64,39,39-23-20,0.598,89214115.0,132.956952
4,New York Rangers,82,52,24,6,110,0.671,254,207,0.54,-0.03,0.628,44,44-24-14,0.622,75108450.0,111.935097


In [21]:
df['date_fetched'] = pd.Timestamp('today')

In [22]:
df.head()

Unnamed: 0,Team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,RW,RgRec,RgPt%,spend,efficiency,date_fetched
0,Florida Panthers,82,58,18,6,122,0.744,340,246,1.07,-0.08,0.713,42,42-18-22,0.646,82930085.0,111.465168,2022-06-05 11:12:39.430977
1,Carolina Hurricanes,82,54,20,8,116,0.707,278,202,0.88,-0.05,0.665,47,47-20-15,0.665,83866550.0,118.623126,2022-06-05 11:12:39.430977
2,Toronto Maple Leafs,82,54,21,7,115,0.701,315,253,0.69,-0.06,0.646,45,45-21-16,0.646,83761840.0,119.489073,2022-06-05 11:12:39.430977
3,Tampa Bay Lightning,82,51,23,8,110,0.671,287,233,0.64,-0.02,0.64,39,39-23-20,0.598,89214115.0,132.956952,2022-06-05 11:12:39.430977
4,New York Rangers,82,52,24,6,110,0.671,254,207,0.54,-0.03,0.628,44,44-24-14,0.622,75108450.0,111.935097,2022-06-05 11:12:39.430977


In [23]:
import sqlite3

con = sqlite3.connect('../data/efficiency.db')

df.to_sql('teams', con, index=False, if_exists='append')

16

In [24]:
pd.read_sql('''
    select 
    * 
    from teams 
    where efficiency < 150
    order by efficiency
''', con)

Unnamed: 0,Team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,RW,RgRec,RgPt%,spend,efficiency,date_fetched
0,Toronto Maple Leafs,22,16,4,2,34,0.773,78,55,0.85,-0.2,0.727,14,14-4-4,0.727,82812722.0,107.131594,2021-03-01 06:40:37.743832
1,Florida Panthers,20,13,4,3,29,0.725,65,57,0.29,-0.11,0.675,9,9-4-7,0.625,78373558.0,108.101459,2021-03-01 06:40:37.743832
2,Florida Panthers,82,58,18,6,122,0.744,340,246,1.07,-0.08,0.713,42,42-18-22,0.646,82930085.0,111.465168,2022-06-05 11:12:39.430977
3,Vegas Golden Knights,17,12,4,1,25,0.735,51,36,0.72,-0.16,0.735,10,10-4-3,0.676,82163798.0,111.78748,2021-03-01 06:40:37.743832
4,New York Rangers,82,52,24,6,110,0.671,254,207,0.54,-0.03,0.628,44,44-24-14,0.622,75108450.0,111.935097,2022-06-05 11:12:39.430977
5,Philadelphia Flyers,18,11,4,3,25,0.694,61,54,0.35,-0.04,0.667,9,9-4-5,0.639,80375446.0,115.814764,2021-03-01 06:40:37.743832
6,Boston Bruins,19,12,5,2,26,0.684,59,50,0.53,0.05,0.579,7,7-5-7,0.553,79332810.0,115.98364,2021-03-01 06:40:37.743832
7,Carolina Hurricanes,20,13,6,1,27,0.675,69,56,0.85,0.2,0.55,8,8-6-6,0.55,78830512.0,116.785944,2021-03-01 06:40:37.743832
8,Carolina Hurricanes,82,54,20,8,116,0.707,278,202,0.88,-0.05,0.665,47,47-20-15,0.665,83866550.0,118.623126,2022-06-05 11:12:39.430977
9,Toronto Maple Leafs,82,54,21,7,115,0.701,315,253,0.69,-0.06,0.646,45,45-21-16,0.646,83761840.0,119.489073,2022-06-05 11:12:39.430977
