In [1]:
from gazpacho import get, Soup

In [2]:
url = 'https://www.capfriendly.com/archive/2020'
html = get(url)

In [3]:
soup = Soup(html)

In [4]:
table = soup.find('table', {'id': 'ich'})

In [5]:
str(table)[:100]

'<table id="ich" class="sortablex tblcf tbl index"><thead><tr class="column_head"><th align="left" st'

In [6]:
trs = table.find('tr', {'class': 'tmx'})

In [7]:
tr = trs[0]

In [8]:
tr.find('a', mode='first').text

'Arizona Coyotes'

In [9]:
print(tr)

<tr class="odd tmx_2"><td class="tmx" data-team="2" data-label="TEAM ▾" align="left"><span><img class="im_mid" style="height:20px;width:20px;margin-right:5px;margin-top:-2px;" alt="Arizona Coyotes" src="https://capfriendly-wlb8ng5.stackpathdns.com/assets/images/logos/arizona_coyotes.svg"><a href="/teams/coyotes/cap-tracker/2020">Arizona Coyotes</a></span></td><td data-label="FINAL CAP HIT" class="tmx_s" align="left"><span class="num" data-num="85093608">$85,093,608</span></td><td data-label="LTIR USED" class="tmx_s" align="left"><span class="num" data-num="3593608">$3,593,608</span></td><td data-label="FINAL CAP SPACE" class="tmx_s" align="left"><span class="num" data-num="0">$0</span></td></tr>


In [10]:
tr.find('td', {'data-label': 'FINAL CAP HIT'}, partial=False).text

'$85,093,608'

In [11]:
def parse_tr(tr):
    team = tr.find('a', mode='first').text
    cap = tr.find('td', {'data-label': 'FINAL CAP HIT'}, partial=False).text
    cap = float(cap.replace(',', '').replace('$', ''))
    return team, cap

In [12]:
cap_hits = [parse_tr(tr) for tr in trs]

In [13]:
url = 'https://www.hockey-reference.com/leagues/NHL_2020_standings.html'
html = get(url)

In [14]:
soup = Soup(html)

In [15]:
import pandas as pd

In [16]:
east = pd.read_html(str(soup.find('table')[0]))[0]
west = pd.read_html(str(soup.find('table')[1]))[0]

In [17]:
df = pd.concat([east, west])[['Unnamed: 0', 'GP', 'W']].reset_index(drop=True)

In [18]:
df['W'] = df['W'].apply(pd.to_numeric, errors='coerce')
wins = df.dropna()
wins = wins.rename(columns={'Unnamed: 0': 'Team'})

In [19]:
wins["Team"] = wins['Team'].str.replace("*", "")

In [20]:
cap_hits = pd.DataFrame(cap_hits, columns=['Team', 'spend'])
df = pd.merge(wins, cap_hits, on='Team', how='left')
df['mpw'] = round(df['spend'] / df['W'] / 1_000_000, 2)
df.sort_values('mpw', ascending=True)

Unnamed: 0,Team,GP,W,spend,mpw
17,Colorado Avalanche,70,42.0,76170877.0,1.81
0,Boston Bruins,70,44.0,81661144.0,1.86
1,Tampa Bay Lightning,70,43.0,80228851.0,1.87
16,St. Louis Blues,71,42.0,83225503.0,1.98
8,Washington Capitals,69,41.0,81562757.0,1.99
9,Philadelphia Flyers,69,41.0,82862898.0,2.02
10,Pittsburgh Penguins,69,40.0,81786587.0,2.04
19,Winnipeg Jets,71,37.0,77781416.0,2.1
14,New York Rangers,70,37.0,77613088.0,2.1
23,Vegas Golden Knights,71,39.0,81857661.0,2.1


### Saving results

In [21]:
df.to_csv('../data/mpw.csv', index=False)

In [22]:
df.head()

Unnamed: 0,Team,GP,W,spend,mpw
0,Boston Bruins,70,44.0,81661144.0,1.86
1,Tampa Bay Lightning,70,43.0,80228851.0,1.87
2,Toronto Maple Leafs,70,36.0,95178332.0,2.64
3,Florida Panthers,69,35.0,81300139.0,2.32
4,Montreal Canadiens,71,31.0,77065314.0,2.49


In [23]:
df['date_fetched'] = pd.Timestamp('today')

In [24]:
df.head()

Unnamed: 0,Team,GP,W,spend,mpw,date_fetched
0,Boston Bruins,70,44.0,81661144.0,1.86,2020-10-29 15:57:54.511429
1,Tampa Bay Lightning,70,43.0,80228851.0,1.87,2020-10-29 15:57:54.511429
2,Toronto Maple Leafs,70,36.0,95178332.0,2.64,2020-10-29 15:57:54.511429
3,Florida Panthers,69,35.0,81300139.0,2.32,2020-10-29 15:57:54.511429
4,Montreal Canadiens,71,31.0,77065314.0,2.49,2020-10-29 15:57:54.511429


In [25]:
df = df.drop('GP', axis=1)

In [26]:
import sqlite3

con = sqlite3.connect('../data/mpw.db')

df.to_sql('teams', con, index=False, if_exists='append')

In [27]:
pd.read_sql('''
    select 
    * 
    from teams 
    where mpw > 2 and W < 41
    order by mpw desc
''', con)

Unnamed: 0,Team,GP,W,spend,mpw,date_fetched
0,Detroit Red Wings,71,17.0,79968736.0,4.70,2020-08-11 15:14:34.722507
1,Detroit Red Wings,71,17.0,79968736.0,4.70,2020-08-12 13:00:12.128263
2,Detroit Red Wings,,17.0,79969005.0,4.70,2020-10-29 15:57:31.665946
3,Detroit Red Wings,,17.0,79969005.0,4.70,2020-10-29 15:57:54.511429
4,Ottawa Senators,71,25.0,74319369.0,2.97,2020-08-11 15:14:34.722507
...,...,...,...,...,...,...
95,Vegas Golden Knights,,39.0,81857661.0,2.10,2020-10-29 15:57:54.511429
96,Pittsburgh Penguins,69,40.0,81786224.0,2.04,2020-08-11 15:14:34.722507
97,Pittsburgh Penguins,69,40.0,81786224.0,2.04,2020-08-12 13:00:12.128263
98,Pittsburgh Penguins,,40.0,81786587.0,2.04,2020-10-29 15:57:31.665946
