In [1]:
# Header block to include all modules that must be imported ahead of time
# Only needs to be run once per session, and each time a new module is added

import requests
import json
import pandas as pd # this will need to be installed via command line first
import glob
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
# Method to get the salary cap hits for every player currently signed on an NFL team
# Creates a csv with columns player name, salary cap hit, and team location abbrviation 

def GetTeamSalary(team: list): # accepts a list ['team-name', 'LOC']
    # Use requests to get the raw HTML response from overthecap.com
    OTCResponse = requests.get(f'https://overthecap.com/salary-cap/{team[0]}')
        
    # If we get a normal response proceed with data scraping
    if OTCResponse.status_code == 200:
        print(f"Request succeeded with status code {OTCResponse.status_code}")
        TeamNameCapitalized = (team[0].replace("-", " ")).title()
        print(f"Getting cap hits for all players who are currently signed with the {TeamNameCapitalized}:")

        # Pandas read_html method allows the table contents to be put into a DataFrame
        TableTag = 'salary-cap-table contracted-players'
        DataFrames = pd.read_html(OTCResponse.content, attrs = {'class': TableTag})
        
        # Select the specific parts of the dataframe we want 
        SalariesDF = DataFrames[0][['Player', 'Cap Number']]
        if len(SalariesDF) >= 51:
            SalariesDF = SalariesDF.drop(51, axis=0)
        # Add team location abv to each player
        SalariesDF = SalariesDF.assign(TeamLOC = team[1])
        
        # Change the order of columns to make more sense semantically 
        new_order = ['Player', 'TeamLOC','Cap Number']
        SalariesDF = SalariesDF.loc[:, new_order]
        SalariesDF.columns = ['Player', 'Team', 'Salary']
        # Sort dataframe by player name
        SalariesDF = SalariesDF.sort_values(by=['Player'], ignore_index=True)
        
        # Method to create a unique key for each row
        def CreateKey(player:str, team: str):
            # Split player name into first, last name, possible suffix
            NameList = player.split()
            # Set first and last name
            FirstName, LastName = NameList[0][0], NameList[1]
            # Create the unique key
            key = f'{FirstName}{LastName}{team}'
            
            return key
        
            
        # Create unique key for each row that is represents the player name and team location
        SalariesDF['Key'] = SalariesDF.apply(lambda row: CreateKey(row['Player'], row['Team']), axis = 1)
        
        # Create a .csv file with all the player names and their cap hits
        SalariesDF.to_csv(f'./data/team-salary-lists/{team[1]}-Player-Salary-List.csv', index = False, header = False)
        
        """
        # Read the csv back for testing purposes
        SalariesCSV = pd.read_csv(f'./data/team-salary-lists/{team[1]}-Player-Salary-List.csv', header = None)
        SalariesCSV.columns = ['Player', 'Team', 'Salary', 'Key']
        print(SalariesCSV)
        """
        
        return 0
        
    # If we don't get a normal reponse, stop scraping
    else:
        print(f"Request failed with status code {OTCResponse.status_code}")
        return -1


In [3]:
# Method to create a dataframe of all nfl players and their salary cap hits
# Relevent player data is from OverTheCap.com and Wikipedia.

def CombineSalaryLists():
    # The directory containing the player salary data from all 32 nfl teams
    SalaryDataPath = "./data/team-salary-lists/"
    
    # Get a list of all the files in the directory
    SalaryCSVList = glob.glob(SalaryDataPath + "/*.csv")
    SalaryCSVList = SalaryCSVList[:-1] # do not process the master list
    
    # Create a list to store the individual dataframes
    SalariesDFList = []
    
    # Loop through each file and append it to the list
    for file in SalaryCSVList:
        # Read the file into a dataframe
        df = pd.read_csv(file, header = None, names = ['Name', 'Team', 'Salary', 'Key'])
        # Append the dataframe to the list
        SalariesDFList.append(df)
    
    # Combine all the dataframes into one
    SalariesDF = pd.concat(SalariesDFList, ignore_index=True)
    
    # Create a .csv (label file with z to insure it stays at the end of the directory)
    SalariesDF.to_csv(f'./data/team-salary-lists/zMaster-Player-Salary-List.csv', index = False, header = False)
    
    """
    # Read the csv back for testing purposes
    SalariesCSV = pd.read_csv(f'./data/team-salary-lists/zMaster-Player-Salary-List.csv', header = None)
    SalariesCSV.columns = ['Player', 'Team', 'Salary', 'Key']
    print(SalariesCSV)
    """
    
    return 0

In [4]:
# Method to get the salary cap hits for every player currently signed on every NFL team
# Reads in the team list csv, and calls the GetTeamSalaryCapHits method for each team
def GetAllTeamSalary():
    # Read in the full team list csv
    FullTeamListCSV = pd.read_csv('./data/team-attribute-lists/Teams-List.csv', header = None)
    FullTeamListCSV.columns = ['TeamName', 'Abbreviation', 'Conference', 'Division']
    
    # Create a list of all the team names and abbreviations as a list of lists
    TeamList = []
    FullTeamListCSV.apply(lambda row: TeamList.append(row[:2].tolist()), axis = 1)
    
    # Get the salary cap hits for each team
    for team in TeamList:
        # format the team name to be used in the URL
        team[0] = team[0].replace(" ", "-").lower()
        GetTeamSalary(team) # team = ['team-name', 'LOC']
    

GetAllTeamSalary()
CombineSalaryLists()

Request succeeded with status code 200
Getting cap hits for all players who are currently signed with the Arizona Cardinals:
Request succeeded with status code 200
Getting cap hits for all players who are currently signed with the Atlanta Falcons:
Request succeeded with status code 200
Getting cap hits for all players who are currently signed with the Baltimore Ravens:
Request succeeded with status code 200
Getting cap hits for all players who are currently signed with the Buffalo Bills:
Request succeeded with status code 200
Getting cap hits for all players who are currently signed with the Carolina Panthers:
Request succeeded with status code 200
Getting cap hits for all players who are currently signed with the Chicago Bears:
Request succeeded with status code 200
Getting cap hits for all players who are currently signed with the Cincinnati Bengals:
Request succeeded with status code 200
Getting cap hits for all players who are currently signed with the Cleveland Browns:
Request suc

0