In [1]:
from gazpacho import get, Soup
import pandas as pd

### Money

In [2]:
url = 'https://www.capfriendly.com'
html = get(url)

In [3]:
soup = Soup(html)

In [4]:
table = soup.find('table', {'id': 'ich'})

In [5]:
str(table)[:100]

'<table id="ich" class="sortablex tblcf index tbl sortable"><thead><tr class="column_head"><th class='

In [6]:
trs = table.find('tr', {'class': 'tmx'})

In [7]:
tr = trs[0]

In [8]:
tr.find('a', mode='first').text

'Tampa Bay Lightning'

In [9]:
print(tr)

<tr class="odd tmx_26" data-sort-f="45029166" data-sort-d="24600000" data-sort-g="10800000" data-sort-inj2="17712500" data-sort-tch="98141666" data-sort-team="Tampa Bay Lightning" data-sort-rs="20" data-sort-spc="45" data-sort-inj="4" data-sort-ch="98196125" data-sort-ch2="0" data-sort-ch3="1070500"><td class="tmx left" data-team="26" data-label="TEAM ▾"><span><img class="im_mid" style="height:15px;width:15px;margin-right:5px;margin-top:-2px;" alt="Tampa Bay Lightning" src="https://capfriendly-wlb8ng5.stackpathdns.com/assets/images/logos/tampa_bay_lightning.svg"><a href="/teams/lightning">Tampa Bay Lightning</a></span><div class="r"><a title="Daily Cap Tracker" href="/teams/lightning/cap-tracker"><img title="Daily Tracker" alt="Daily Tracker" style="margin-bottom:-3px;width:15px;height:15px" width="13" height="15" src="https://capfriendly-wlb8ng5.stackpathdns.com/assets/images/icons/teams/cal.svg" onerror="this.src='https://capfriendly-wlb8ng5.stackpathdns.com/assets/images/icons/teams

In [10]:
tr.find('td', {'data-label': 'PROJECTED CAP HIT'}, partial=False).text

'$98,196,125'

In [11]:
def parse_tr(tr):
    team = tr.find('a', mode='first').text
    cap = tr.find('td', {'data-label': 'PROJECTED CAP HIT'}, partial=False).text
    cap = float(cap.replace(',', '').replace('$', ''))
    return team, cap

In [12]:
cap_hits = [parse_tr(tr) for tr in trs]

In [13]:
cap_hits = pd.DataFrame(cap_hits, columns=['Team', 'spend'])

### Standings

In [14]:
url = 'https://www.hockey-reference.com/leagues/NHL_2021.html'

In [15]:
df = pd.read_html(url)[0]

In [16]:
df['PTS%'] = df['PTS%'].apply(pd.to_numeric, errors='coerce')
df = df.dropna(subset=["PTS%"])
df = df.rename(columns={'Unnamed: 0': 'Team'})
df = df.sort_values("PTS%", ascending = False)

In [17]:
df = pd.merge(df, cap_hits, on='Team', how='left')
df["efficiency"] = df["spend"] / 1_000_000 / df["PTS%"]

In [18]:
df.sort_values("efficiency")

Unnamed: 0,Team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,RW,RgRec,RgPt%,spend,efficiency
0,Toronto Maple Leafs,22,16,4,2,34,0.773,78,55,0.85,-0.2,0.727,14,14-4-4,0.727,82812722.0,107.131594
3,Florida Panthers,20,13,4,3,29,0.725,65,57,0.29,-0.11,0.675,9,9-4-7,0.625,78373558.0,108.101459
2,Vegas Golden Knights,17,12,4,1,25,0.735,51,36,0.72,-0.16,0.735,10,10-4-3,0.676,82163798.0,111.78748
4,Philadelphia Flyers,18,11,4,3,25,0.694,61,54,0.35,-0.04,0.667,9,9-4-5,0.639,80375446.0,115.814764
5,Boston Bruins,19,12,5,2,26,0.684,59,50,0.53,0.05,0.579,7,7-5-7,0.553,79332810.0,115.98364
6,Carolina Hurricanes,20,13,6,1,27,0.675,69,56,0.85,0.2,0.55,8,8-6-6,0.55,78830512.0,116.785944
8,Minnesota Wild,18,12,6,0,24,0.667,57,44,0.73,0.01,0.667,8,8-6-4,0.556,80900154.0,121.289586
7,Winnipeg Jets,20,13,6,1,27,0.675,69,53,0.55,-0.25,0.675,9,9-6-5,0.575,83532360.0,123.751644
10,Colorado Avalanche,18,11,6,1,23,0.639,55,42,0.82,0.1,0.611,10,10-6-2,0.611,80849033.0,126.524308
9,Washington Capitals,21,12,5,4,28,0.667,73,69,0.03,-0.16,0.595,10,10-5-6,0.619,85516834.0,128.211145


### Saving results

In [19]:
df.to_csv('../data/efficiency.csv', index=False)

In [20]:
df.head()

Unnamed: 0,Team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,RW,RgRec,RgPt%,spend,efficiency
0,Toronto Maple Leafs,22,16,4,2,34,0.773,78,55,0.85,-0.2,0.727,14,14-4-4,0.727,82812722.0,107.131594
1,Tampa Bay Lightning,19,14,4,1,29,0.763,69,39,1.53,-0.05,0.737,13,13-4-2,0.737,98196125.0,128.697412
2,Vegas Golden Knights,17,12,4,1,25,0.735,51,36,0.72,-0.16,0.735,10,10-4-3,0.676,82163798.0,111.78748
3,Florida Panthers,20,13,4,3,29,0.725,65,57,0.29,-0.11,0.675,9,9-4-7,0.625,78373558.0,108.101459
4,Philadelphia Flyers,18,11,4,3,25,0.694,61,54,0.35,-0.04,0.667,9,9-4-5,0.639,80375446.0,115.814764


In [21]:
df['date_fetched'] = pd.Timestamp('today')

In [22]:
df.head()

Unnamed: 0,Team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,RW,RgRec,RgPt%,spend,efficiency,date_fetched
0,Toronto Maple Leafs,22,16,4,2,34,0.773,78,55,0.85,-0.2,0.727,14,14-4-4,0.727,82812722.0,107.131594,2021-03-01 06:40:37.743832
1,Tampa Bay Lightning,19,14,4,1,29,0.763,69,39,1.53,-0.05,0.737,13,13-4-2,0.737,98196125.0,128.697412,2021-03-01 06:40:37.743832
2,Vegas Golden Knights,17,12,4,1,25,0.735,51,36,0.72,-0.16,0.735,10,10-4-3,0.676,82163798.0,111.78748,2021-03-01 06:40:37.743832
3,Florida Panthers,20,13,4,3,29,0.725,65,57,0.29,-0.11,0.675,9,9-4-7,0.625,78373558.0,108.101459,2021-03-01 06:40:37.743832
4,Philadelphia Flyers,18,11,4,3,25,0.694,61,54,0.35,-0.04,0.667,9,9-4-5,0.639,80375446.0,115.814764,2021-03-01 06:40:37.743832


In [23]:
import sqlite3

con = sqlite3.connect('../data/efficiency.db')

df.to_sql('teams', con, index=False, if_exists='append')

In [24]:
pd.read_sql('''
    select 
    * 
    from teams 
    where efficiency < 150
    order by efficiency
''', con)

Unnamed: 0,Team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,RW,RgRec,RgPt%,spend,efficiency,date_fetched
0,Toronto Maple Leafs,22,16,4,2,34,0.773,78,55,0.85,-0.2,0.727,14,14-4-4,0.727,82812722.0,107.131594,2021-03-01 06:40:37.743832
1,Florida Panthers,20,13,4,3,29,0.725,65,57,0.29,-0.11,0.675,9,9-4-7,0.625,78373558.0,108.101459,2021-03-01 06:40:37.743832
2,Vegas Golden Knights,17,12,4,1,25,0.735,51,36,0.72,-0.16,0.735,10,10-4-3,0.676,82163798.0,111.78748,2021-03-01 06:40:37.743832
3,Philadelphia Flyers,18,11,4,3,25,0.694,61,54,0.35,-0.04,0.667,9,9-4-5,0.639,80375446.0,115.814764,2021-03-01 06:40:37.743832
4,Boston Bruins,19,12,5,2,26,0.684,59,50,0.53,0.05,0.579,7,7-5-7,0.553,79332810.0,115.98364,2021-03-01 06:40:37.743832
5,Carolina Hurricanes,20,13,6,1,27,0.675,69,56,0.85,0.2,0.55,8,8-6-6,0.55,78830512.0,116.785944,2021-03-01 06:40:37.743832
6,Minnesota Wild,18,12,6,0,24,0.667,57,44,0.73,0.01,0.667,8,8-6-4,0.556,80900154.0,121.289586,2021-03-01 06:40:37.743832
7,Winnipeg Jets,20,13,6,1,27,0.675,69,53,0.55,-0.25,0.675,9,9-6-5,0.575,83532360.0,123.751644,2021-03-01 06:40:37.743832
8,Colorado Avalanche,18,11,6,1,23,0.639,55,42,0.82,0.1,0.611,10,10-6-2,0.611,80849033.0,126.524308,2021-03-01 06:40:37.743832
9,Washington Capitals,21,12,5,4,28,0.667,73,69,0.03,-0.16,0.595,10,10-5-6,0.619,85516834.0,128.211145,2021-03-01 06:40:37.743832
