In [1]:
from gazpacho import get, Soup
import pandas as pd

### Money

In [2]:
url = 'https://www.capfriendly.com/archive/2021'
html = get(url)

In [3]:
soup = Soup(html)

In [4]:
table = soup.find('table', {'id': 'ich'})

In [5]:
str(table)[:100]

'<table id="ich" class="sortablex tblcf tbl index"><thead><tr class="column_head"><th align="left" st'

In [6]:
trs = table.find('tr', {'class': 'tmx'})

In [7]:
tr = trs[0]

In [8]:
tr.find('a', mode='first').text

'Winnipeg Jets'

In [9]:
print(tr)

<tr class="odd tmx_30"><td class="tmx" data-team="30" data-label="TEAM ▾" align="left"><span><a href="/teams/cap-tracker/jets/2021"><img class="im_mid" style="height:20px;width:20px;margin-right:5px;margin-top:-2px;" alt="Winnipeg Jets" src="https://capfriendly-wlb8ng5.stackpathdns.com/assets/images/logos/winnipeg_jets.svg">Winnipeg Jets</a></span></td><td data-label="FINAL CAP HIT" class="tmx_s" align="left"><span class="num" data-num="84372152">$84,372,152</span></td><td data-label="LTIR USED" class="tmx_s" align="left"><span class="num" data-num="2872151">$2,872,151</span></td><td data-label="FINAL CAP SPACE" class="tmx_s" align="left"><span class="num" data-num="0">$0</span></td></tr>


In [10]:
tr.find('td', {'data-label': 'FINAL CAP HIT'}, partial=False).text

'$84,372,152'

In [11]:
def parse_tr(tr):
    team = tr.find('a', mode='first').text
    cap = tr.find('td', {'data-label': 'FINAL CAP HIT'}, partial=False).text
    cap = float(cap.replace(',', '').replace('$', ''))
    return team, cap

In [12]:
cap_hits = [parse_tr(tr) for tr in trs]

In [13]:
cap_hits = pd.DataFrame(cap_hits, columns=['Team', 'spend'])

### Standings

In [14]:
url = 'https://www.hockey-reference.com/leagues/NHL_2021.html'

In [15]:
df = pd.read_html(url)[0]

In [16]:
df['PTS%'] = df['PTS%'].apply(pd.to_numeric, errors='coerce')
df = df.dropna(subset=["PTS%"])
df = df.rename(columns={'Unnamed: 0': 'Team'})
df = df.sort_values("PTS%", ascending = False)

In [17]:
df = pd.merge(df, cap_hits, on='Team', how='left')
df["efficiency"] = df["spend"] / 1_000_000 / df["PTS%"]

In [18]:
df.sort_values("efficiency")

Unnamed: 0,Team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,RW,RgRec,RgPt%,spend,efficiency
15,New York Rangers,56,27,23,6,60,0.536,177,157,0.31,-0.05,0.491,24,24-23-9,0.509,76041380.0,141.868246
18,Philadelphia Flyers,56,25,23,8,58,0.518,163,201,-0.59,0.08,0.455,17,17-23-16,0.446,80631201.0,155.658689
16,Dallas Stars,56,23,19,14,60,0.536,158,154,0.06,-0.01,0.446,17,17-19-20,0.482,84690956.0,158.005515
22,Ottawa Senators,56,23,28,5,51,0.455,157,190,-0.51,0.08,0.402,18,18-28-10,0.411,72262575.0,158.818846
20,Calgary Flames,56,26,27,3,55,0.491,156,161,-0.08,0.01,0.464,22,22-27-7,0.455,80767183.0,164.495281
24,Los Angeles Kings,56,21,28,7,49,0.438,143,170,-0.42,0.06,0.375,19,19-28-9,0.42,72360188.0,165.205909
21,Arizona Coyotes,56,24,26,6,54,0.482,153,176,-0.36,0.05,0.42,19,19-26-11,0.438,80575270.0,167.16861
27,Detroit Red Wings,56,19,27,10,48,0.429,127,171,-0.69,0.1,0.348,16,16-27-13,0.402,73888274.0,172.233739
19,Chicago Blackhawks,56,24,25,7,55,0.491,161,186,-0.39,0.06,0.429,15,15-25-16,0.411,85938963.0,175.028438
28,New Jersey Devils,56,19,30,7,45,0.402,145,194,-0.77,0.11,0.384,15,15-30-11,0.366,70916114.0,176.408244


### Saving results

In [19]:
df.to_csv('../data/efficiency.csv', index=False)

In [20]:
df.head()

Unnamed: 0,Team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,RW,RgRec,RgPt%,spend,efficiency
0,Colorado Avalanche*,56,39,13,4,82,0.732,197,133,1.0,-0.14,0.705,35,35-13-8,0.696,,
1,Vegas Golden Knights*,56,40,14,2,82,0.732,191,124,1.05,-0.15,0.723,30,30-14-12,0.643,,
2,Carolina Hurricanes*,56,36,12,8,80,0.714,179,136,0.67,-0.1,0.625,27,27-12-17,0.634,,
3,Florida Panthers*,56,37,14,5,79,0.705,189,153,0.56,-0.08,0.67,26,26-14-16,0.607,,
4,Pittsburgh Penguins*,56,37,16,3,77,0.688,196,156,0.62,-0.09,0.643,29,29-16-11,0.616,,


In [21]:
df['date_fetched'] = pd.Timestamp('today')

In [22]:
df.head()

Unnamed: 0,Team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,RW,RgRec,RgPt%,spend,efficiency,date_fetched
0,Colorado Avalanche*,56,39,13,4,82,0.732,197,133,1.0,-0.14,0.705,35,35-13-8,0.696,,,2021-10-14 13:26:49.786453
1,Vegas Golden Knights*,56,40,14,2,82,0.732,191,124,1.05,-0.15,0.723,30,30-14-12,0.643,,,2021-10-14 13:26:49.786453
2,Carolina Hurricanes*,56,36,12,8,80,0.714,179,136,0.67,-0.1,0.625,27,27-12-17,0.634,,,2021-10-14 13:26:49.786453
3,Florida Panthers*,56,37,14,5,79,0.705,189,153,0.56,-0.08,0.67,26,26-14-16,0.607,,,2021-10-14 13:26:49.786453
4,Pittsburgh Penguins*,56,37,16,3,77,0.688,196,156,0.62,-0.09,0.643,29,29-16-11,0.616,,,2021-10-14 13:26:49.786453


In [23]:
import sqlite3

con = sqlite3.connect('../data/efficiency.db')

df.to_sql('teams', con, index=False, if_exists='append')

In [24]:
pd.read_sql('''
    select 
    * 
    from teams 
    where efficiency < 150
    order by efficiency
''', con)

Unnamed: 0,Team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,RW,RgRec,RgPt%,spend,efficiency,date_fetched
0,Toronto Maple Leafs,22,16,4,2,34,0.773,78,55,0.85,-0.2,0.727,14,14-4-4,0.727,82812722.0,107.131594,2021-03-01 06:40:37.743832
1,Florida Panthers,20,13,4,3,29,0.725,65,57,0.29,-0.11,0.675,9,9-4-7,0.625,78373558.0,108.101459,2021-03-01 06:40:37.743832
2,Vegas Golden Knights,17,12,4,1,25,0.735,51,36,0.72,-0.16,0.735,10,10-4-3,0.676,82163798.0,111.78748,2021-03-01 06:40:37.743832
3,Philadelphia Flyers,18,11,4,3,25,0.694,61,54,0.35,-0.04,0.667,9,9-4-5,0.639,80375446.0,115.814764,2021-03-01 06:40:37.743832
4,Boston Bruins,19,12,5,2,26,0.684,59,50,0.53,0.05,0.579,7,7-5-7,0.553,79332810.0,115.98364,2021-03-01 06:40:37.743832
5,Carolina Hurricanes,20,13,6,1,27,0.675,69,56,0.85,0.2,0.55,8,8-6-6,0.55,78830512.0,116.785944,2021-03-01 06:40:37.743832
6,Minnesota Wild,18,12,6,0,24,0.667,57,44,0.73,0.01,0.667,8,8-6-4,0.556,80900154.0,121.289586,2021-03-01 06:40:37.743832
7,Winnipeg Jets,20,13,6,1,27,0.675,69,53,0.55,-0.25,0.675,9,9-6-5,0.575,83532360.0,123.751644,2021-03-01 06:40:37.743832
8,Colorado Avalanche,18,11,6,1,23,0.639,55,42,0.82,0.1,0.611,10,10-6-2,0.611,80849033.0,126.524308,2021-03-01 06:40:37.743832
9,Washington Capitals,21,12,5,4,28,0.667,73,69,0.03,-0.16,0.595,10,10-5-6,0.619,85516834.0,128.211145,2021-03-01 06:40:37.743832
