In [1]:
# Library imports

# web Scraping
import requests
import httplib2
from bs4 import BeautifulSoup, SoupStrainer

# data processing
import pandas as pd
import re

# time processing
from datetime import datetime

# system settings
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

In [2]:
# convert dollar figures to integer values
def dollars_to_int(dollars):
    dollars = dollars.strip('$')
    dollars = dollars.replace(',', '')
    dollars = int(dollars)
    return dollars

In [26]:
# convert name string into a standardized form
def parse_name(name):
    split_name = name.split(' ')

    for i in range(len(split_name)):
        split_name[i] = split_name[i].replace("-", " ")
        split_name[i] = split_name[i].replace("'", " ")
        split_name[i] = split_name[i].replace(".", " ")
        split_name[i] = split_name[i].strip()

    edited_name = ' '.join(split_name)

    return edited_name

In [4]:
# Take dollars and convert to simplified string

def parse_dollars(dollars):
    dollars = str(dollars)

    dollars = dollars.strip('$')
    dollars = dollars.replace(',', '')
    dollars = int(dollars)

    if dollars > 1_000_000:
        rounded_dollars = round(dollars / 1_000_000, 2)
        rounded_dollars = "$" + str(rounded_dollars) + "M"
    elif dollars > 100_000:
        rounded_dollars = int(dollars / 1000)
        rounded_dollars = "$" + str(rounded_dollars) + "K"
    else:
        rounded_dollars = '$' + str(dollars)

    return rounded_dollars

In [5]:
# Simplify contract to rounded figures

def parse_contract(contract):
    split_contract = contract.split(' ')

    total = parse_dollars(split_contract[0])
    apy = split_contract[1].replace('(', '')
    apy = parse_dollars(apy)

    contract_value = total + " (" + apy + ' APY)'

    return contract_value

In [6]:
# Simplify draft capital into more compact view

def parse_draft(entry):
    draft_reg = r'([0-9]{4}) Draft, Round (.*), #([0-9]{1,}) overall'

    try:
        entry_value = re.search(draft_reg, entry).group(
            1) + " #" + re.search(draft_reg, entry).group(2) + "." + re.search(draft_reg, entry).group(3)
    except AttributeError:
        entry_value = re.search(
            '([0-9]{4})', entry).group(1) + " UDFA"

    return entry_value

In [7]:
## Import reference csv that features team names in different formats as well as urls

filepath = r"C:/users/jordan\desktop/overthecap_bot/team_info.csv"

team_info = pd.read_csv(filepath)

In [8]:
# Returns pandas table that shows cap situations for all 32 teams

def get_league_cap():
    url = 'https://overthecap.com/salary-cap-space/'
    tables = pd.read_html(url)
    league_cap = tables[0]

    league_cap.columns = ['team', 'cap_space', 'effective_cap_space',
                          '#', 'active_cap_spending', 'dead_money']
    league_cap['team'] = league_cap['team'].str.lower()

    return league_cap

In [9]:
get_league_cap()

Unnamed: 0,team,cap_space,effective_cap_space,#,active_cap_spending,dead_money
0,jaguars,"$82,022,150","$82,022,150",62,"$117,788,344","$4,208,098"
1,jets,"$67,948,314","$67,948,314",58,"$124,033,656","$14,927,076"
2,patriots,"$62,211,837","$62,211,837",62,"$136,427,431","$865,060"
3,colts,"$43,635,239","$43,635,239",65,"$144,116,640","$237,143"
4,broncos,"$42,350,603","$42,350,603",56,"$154,448,525","$602,118"
5,washington,"$38,207,074","$38,207,074",67,"$152,584,002","$4,864,097"
6,bengals,"$37,665,630","$37,005,630",50,"$150,938,218","$2,363,427"
7,dolphins,"$31,884,465","$31,884,465",57,"$158,494,825","$4,615,085"
8,panthers,"$29,680,936","$29,680,936",57,"$132,304,072","$22,672,549"
9,chargers,"$23,801,246","$21,821,246",48,"$164,343,044","$58,009"


In [10]:
# Returns a table with contract information for a given team

def get_team_cap(team):
    url = team_info.loc[team_info['team_abbrev'] == team]['url'].values.item()
    tables = pd.read_html(url)
    table = tables[0]
    table.columns = table.columns.droplevel(1)

    return table

In [11]:
# Returns a list with the positional spending for a given team

def get_cap_liabilities(team):
    url = team_info.loc[team_info['team_abbrev'] == team]['url'].values.item()
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    positional_spending = soup.find(class_='positional-cap-number')

    spending = []

    for i in positional_spending.find_all('li'):
        spending.append(i.span.text)

    return spending

In [12]:
# Returns a JSON object for a team with their name, cap space, cap spending, dead money, and the top 5 highest paid players

def get_team_spending(team):
    cap_space = get_league_cap()
    team_name = team_info.loc[team_info['team_abbrev']
                              == team]['team_name'].values.item()
    nickname = team_info.loc[team_info['team_abbrev']
                             == team]['nickname'].values.item()
    cap_spending = get_team_cap(team)
    positional_spending = get_cap_liabilities(team)

    team_dict = dict()

    team_dict['nickname'] = team_name
    team_dict['cap_space'] = cap_space.loc[cap_space['team']
                                           == nickname]['cap_space'].item()
    team_dict['active_cap_spending'] = cap_space.loc[cap_space['team']
                                                     == nickname]['active_cap_spending'].item()
    team_dict['dead_money'] = cap_space.loc[cap_space['team']
                                            == nickname]['dead_money'].item()
    team_dict['players'] = cap_spending[['Player', 'CapNumber',
                                         'Guaranteed Salary']].head().to_dict(orient='records')
    team_dict['positional_spending'] = positional_spending
    team_dict['image_url'] = team_info.loc[team_info['team_abbrev']
                                           == team]['image_url'].values.item()
    team_dict['url'] = team_info.loc[team_info['team_abbrev']
                                     == team]['url'].values.item()
    team_dict['primary_color'] = team_info.loc[team_info['team_abbrev']
                                               == team]['primary_color'].values.item()
    team_dict['primary_color'] = int(
        team_dict['primary_color'].replace('#', ''), 16)
    team_dict['primary_color'] = int(hex(team_dict['primary_color']), 0)

    team_dict['positional_breakdown'] = list()

    for i in range(len(team_dict['players'])):
        team_dict['players'][i]['CapNumber'] = parse_dollars(
            team_dict['players'][i]['CapNumber'])

    for i in range(len(team_dict['positional_spending'])):
        split = team_dict['positional_spending'][i].split(': ')
        team_dict['positional_breakdown'].append({split[0]: split[1]})

    team_dict['positional_spending'] = {
        k: v for x in team_dict['positional_breakdown'] for k, v in x.items()}

    for k, v in team_dict['positional_spending'].items():
        team_dict['positional_spending'][k] = parse_dollars(v)

    return team_dict

In [14]:
get_team_spending('NO')

{'nickname': 'New Orleans Saints',
 'cap_space': '($69,690,461)',
 'active_cap_spending': '$252,532,363',
 'dead_money': '$1,290,287',
 'players': [{'Player': 'Cameron Jordan',
   'CapNumber': '$18.9M',
   'Guaranteed Salary': '$0'},
  {'Player': 'Michael Thomas',
   'CapNumber': '$18.8M',
   'Guaranteed Salary': '$3,000,000'},
  {'Player': 'Terron Armstead',
   'CapNumber': '$16.23M',
   'Guaranteed Salary': '$0'},
  {'Player': 'Taysom Hill',
   'CapNumber': '$16.16M',
   'Guaranteed Salary': '$7,159,000'},
  {'Player': 'Janoris Jenkins',
   'CapNumber': '$14.2M',
   'Guaranteed Salary': '$1,200,000'}],
 'positional_spending': {'Offense': '$128.04M',
  'Defense': '$118.08M',
  'Special': '$11.04M'},
 'image_url': 'https://overthecap.com/Logos/Saints.gif',
 'url': 'https://overthecap.com/salary-cap/new-orleans-saints/',
 'primary_color': 13019254,
 'positional_breakdown': [{'Offense': '$128,035,968'},
  {'Defense': '$118,080,895'},
  {'Special': '$11,035,500'}]}

In [15]:
def retrieve_players():
    http = httplib2.Http()
    status, response = http.request("https://overthecap.com/contracts/")

    links = []

    for link in BeautifulSoup(response, 'html.parser', parse_only=SoupStrainer('a')):
        if link.has_attr('href'):
            links.append(link['href'])

    players = [x for x in links if 'player' in x]

    player_df = pd.DataFrame()

    player_df['url_root'] = players
    player_df['player_id'] = player_df['url_root'].str.extract(r"(\d{1,})")
    player_df['player_url_name'] = player_df['url_root'].str.extract(
        r"/.*/(.*)/.*/")
    player_df['player_name'] = player_df['player_url_name'].str.replace(
        "-", " ")
    player_df['url'] = 'https://overthecap.com' + player_df['url_root']

    player_df = player_df[['player_name',
                           'player_url_name', 'player_id', 'url', 'url_root']]

    return player_df


In [45]:
def get_player_contract(player):
    today = datetime.today()
    current_year = today.year
    
    player = player.lower()

    players_df = retrieve_players()

    parsed_player = parse_name(player)

    url = players_df.loc[players_df['player_name'] == parsed_player]['url'].item()

    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')

    bio = {}

    for s in soup.find(class_='player-bio-new').stripped_strings:
        key, value = s.split(': ')
        bio[key] = value

    if str(current_year) in bio['Free Agency']:
        bio['FA'] = True
    else:
        bio['FA'] = False

    bio['Position'] = bio['Contract Ranking'].split(" at ")[1]
    bio['Entry'] = parse_draft(bio['Entry'])
    bio['Contract Ranking'] = bio['Contract Ranking'].split(" at ")[0]
    bio['URL'] = url

    if bio['FA'] == False:
        tables = pd.read_html(url)[0]

        tables = tables[tables['Year'] != 'Total']

        tables['Year'] = tables['Year'].str.extract('([0-9]{4})')
        tables['Base Salary'] = tables['Base Salary'].apply(dollars_to_int)
        tables['Prorated Bonus'] = tables['Prorated Bonus'].apply(
            dollars_to_int)
        tables['Guaranteed Salary'] = tables['Guaranteed Salary'].apply(
            dollars_to_int)
        tables['CapNumber'] = tables['CapNumber'].apply(dollars_to_int)

        tables = tables.fillna(0)

        tables['cumsum'] = tables['Prorated Bonus'][::-1].cumsum()
        tables['cumguaranteed'] = tables['Guaranteed Salary'][::-1].cumsum()
        tables['dead_cap'] = tables['cumsum'] + tables['cumguaranteed']

        ### Add to bio
        bio['Dead Cap'] = tables[tables['Year'].str.contains(
            str(current_year))]['dead_cap'].item()
    
        bio['Current Year Salary'] = tables[tables['Year'].str.contains(
            str(current_year))]['CapNumber'].item()

        ### Reformat values
        bio['Fully Guaranteed Money'] = parse_dollars(
            bio['Fully Guaranteed Money'])
        bio['Contract Value'] = parse_contract(bio['Contract Value'])
        bio['Dead Cap'] = parse_dollars(bio['Dead Cap'])
        bio['Current Year Salary'] = parse_dollars(bio['Current Year Salary'])

    return bio

In [46]:
get_player_contract('derrick henry')

{'Age': '27',
 'Free Agency': '2024 (UFA)',
 'Accrued Seasons': '5',
 'Height': '6-3',
 'Weight': '247',
 'College': 'Alabama',
 'Entry': '2016 #2.45',
 '2021 Salary Cap Charge': '$13,500,000 (7.35% of cap)',
 '2021 Cash Payout': '$10,500,000 (7.02% of spending)',
 '2021 Cash to Cap Ratio': '0.78',
 'Contract Value': '$50.0M ($12.5M APY)',
 'Fully Guaranteed Money': '$25.5M',
 'Contract Ranking': '6/174',
 '2020 OTC Valuation': '$15,686,000',
 'FA': False,
 'Position': 'RB',
 'URL': 'https://overthecap.com/player/derrick-henry/4758/',
 'Dead Cap': '$19.5M',
 'Current Year Salary': '$13.5M'}