In [3]:
# Header block to include all modules that must be imported ahead of time
# Only needs to be run once per session, and each time a new module is added

import requests
import json
import pandas as pd # this will need to be installed via command line first

In [89]:
# Method to get all the revelent player attributes for every player currently signed on every NFL team
# The CBS Sports website is used as the source

def GetPlayterAttributes(team: list): # accepts a list ['team-name', 'LOC']
    # Use requests to get the raw HTML response from cbssports.com
    CBSResponse = requests.get(f'https://www.cbssports.com/nfl/teams/{team[1]}/{team[0]}/roster/')
    
    # If we get a normal response proceed with data scraping
    if CBSResponse.status_code == 200:
                 
        # Confirm the request was successful
        print(f"Request succeeded with status code {CBSResponse.status_code}")
        
        # Use Pandas to read the HTML content and put it into a DataFrame
        TableTag = 'TableBase-table'
        DataFrames = pd.read_html(CBSResponse.content, attrs={'class': TableTag})
        
        # Select the relevant data from the DataFrame(offense, defense, and special teams)
        OffenseDF = DataFrames[0][['Player', 'POS', 'HT', 'WT', 'DOB (AGE)', 'EXP']]
        DefenseDF = DataFrames[1][['Player', 'POS', 'HT', 'WT', 'DOB (AGE)', 'EXP']]
        SpecialDF = DataFrames[2][['Player', 'POS', 'HT', 'WT', 'DOB (AGE)', 'EXP']]
        
        # Combine the three DataFrames into one
        FullTeamDF = pd.concat([OffenseDF, DefenseDF, SpecialDF], axis = 0, ignore_index = True)
        # print(FullTeamDF.head(20))
        
        # Method to clean the player names since CBS puts player news directly in the name cell
        def CleanNameCell(NameText):
            # Split the player name into initial, first-last name string, and last name string
            NameText = NameText.split(' ')
            
            # If the name cell doesn't contain player news and no name postfix
            if len(NameText) == 3:
                # Remove the first initial by selecting the rest of the elements
                NameText = NameText[1:]
                # Extract the first name from the first-last name string
                NameText[0] = NameText[0][len(NameText[1]):]
                
                print(NameText[1] + "1", " " in NameText)
                # Create new string with first and last name
                NameText = ' '.join(NameText)
                
            # The name cell contains a name postfix
            elif len(NameText) == 5:
                # Create new list to store the frist name, last name, postfix
                NameTextPostfix = []
                # Extract the first name from the first-suffix name string, add it to the list
                NameTextPostfix.append(NameText[2][len(NameText[4]):])
                # Add the last name to the list
                NameTextPostfix.append(NameText[1])
                # Add the postfix to the list
                NameTextPostfix.append(NameText[4])
                # Turn the list into a string
                NameText = ' '.join(NameTextPostfix)

            # Otherwise the player name cell contains player news, injury status
            else:
                # Select only the capitilized words from the player cell
                NameTextCapitalized = [s for s in NameText if s and not s[0].islower()]
                # Select the first inital which is always the first item in NameText when there is news, injury status
                First = NameTextCapitalized[0][0]
                # Select the last which is always the second item in NameText when there is news, injury status
                LastName =  NameTextCapitalized[1]
                # Create postfix varaible in case player name has it
                Postfix = ''
                PostfixList = {'Jr.', 'II','III', 'IV'} # set of possible postfixes

                # Iterate through the items in NameTextCapitalzied from the third item to the end
                for s in range(3, (len(NameTextCapitalized) - 1)):
                    # Make sure we're always comparing to the first letter of current item
                    FirstTemp = NameTextCapitalized[s][0]
                    # If the current element first char is the first initial and the next element is last name
                    if NameTextCapitalized[s][0] == FirstTemp and NameTextCapitalized[s + 1].replace(":", "") == LastName:
                        # If we haven't reached the second to last elementand the next next element is a postfix
                        if NameTextCapitalized[s + 2] in PostfixList:
                            # Set the postfix variable to the next next element
                            Postfix = NameTextCapitalized[s + 2].replace(".", "") # remove the period from the postfix

                        # Set the first and last name 
                        First = NameTextCapitalized[s]
                        LastName = NameTextCapitalized[s + 1].replace(":", "") # remove the possible colon from the last name
                        # No break point because the first instance of the full name might not contain a postfix
                        
                # Recreats NameText with the first name, last name, and possible postfix
                NameText = [First, LastName, Postfix]
                NameText = ' '.join(NameText)        

            
            return NameText
        

        # Apply the method to the player names column
        FullTeamDF['Player'] = FullTeamDF['Player'].apply(CleanNameCell)
        print(FullTeamDF.head(30))
               
        """
        # Create a .csv file with all the player names and their cap hits
        SalariesDF.to_csv(f'{TeamNameCaptalized}-Player-Salary-List.csv', index = False, header = False)
        
        # Read the csv back for testing purposes
        SalariesCSV = pd.read_csv(f'{TeamNameCaptalized}-Player-Salary-List.csv', header = None)
        SalariesCSV.columns = ['Player', 'CapNumber', 'TeamLOC']
        print(SalariesCSV)
        """

    # If we don't get a normal reponse, stop scraping
    else:
        print(f"Request failed with status code {CBSResponse.status_code}")
        return -1
    
GetPlayterAttributes(['seattle-seahawks', 'SEA'])

Request succeeded with status code 200
Howell1 False
Smith1 False
Charbonnet1 False
Holani1 False
Lewis1 False
McIntosh1 False
Bobo1 False
Eskridge1 False
Hatten1 False
Lockett1 False
Metcalf1 False
White1 False
Williams1 False
Winston1 False
Young1 False
Brown1 False
Fant1 False
Mabry1 False
Russell1 False
Westover1 False
Harris1 False
Novitsky1 False
Oluwatimi1 False
Cross1 False
Fant1 False
Forsythe1 False
Greenfield1 False
Jerrell1 False
O'Neal1 False
Pircher1 False
Laumea1 False
Anchrum1 False
Curtis1 False
Eiland1 False
Haynes1 False
Tomlinson1 False
Jones1 False
Williams1 False
Adams1 False
Hankins1 False
Levelston1 False
Pickering1 False
Young1 False
Gotel1 False
Anderson1 False
Ceaser1 False
Dodson1 False
Gibbs1 False
Hall1 False
Knight1 False
Mafe1 False
Nwosu1 False
O'Connell1 False
Onujiogu1 False
Rhattigan1 False
Richardson1 False
Taylor1 False
Boykin1 False
Brown1 False
Bryant1 False
Burns1 False
Jackson1 False
James1 False
Pritchett1 False
Witherspoon1 False
Woolen1 Fals

In [3]:
# Method to get the revelent attributes for every player currently signed on every NFL team
# Reads in the team list csv, and calls the GetTeamSalaryCapHits method for each team
def GetAllPlayerAttributes():
    # Read in the full team list csv
    FullTeamListCSV = pd.read_csv('./data/team-attribute-lists/Teams-List.csv', header = None)
    FullTeamListCSV.columns = ['TeamName', 'Abbreviation', 'Conference', 'Division']
    
    # Create a list of all the team names and abbreviations as a list of lists
    TeamList = []
    # Take the first two columns of each row, convert to a list, append to TeamList
    FullTeamListCSV.apply(lambda row: TeamList.append(row[:2].tolist()), axis = 1)
    
    # Get the salary cap hits for each team
    for team in TeamList:
        # format the team name to be used in the URL
        team[0] = team[0].replace(" ", "-").lower()
        GetPlayterAttributes(team)
    

GetAllPlayerAttributes()
"""
# A list of links to Over The Cap Salary Cap pages for various teams
NFLTeams = [['buffalo-bills', 'BUF', 4], ['miami-dolphins', 'MIA', 20], ['new-england-patriots', 'NE0', 22], ['new-york-jets', 'NYJ', 25],
            ['baltimore-ravens', 'BAL', 3], ['cincinnati-bengals', 'CIN', 7], ['cleveland-browns', 'CLE', 8], ['pittsburgh-steelers', 'PIT', 27],
            ['houston-texans', 'HOU', 13], ['indianapolis-colts', 'IND', 14], ['jacksonville-jaguars', 'JAX', 15], ['tennessee-titans', 'TEN', 31], 
            ['denver-broncos', 'DEN', 10], ['kansas-city-chiefs', 'KC0', 16], ['las-vegas-raiders', 'LV0', 17], ['los-angeles-chargers', 'LAC', 18],
            ['dallas-cowboys', 'DAL', 9], ['new-york-giants', 'NYG', 24], ['philadelphia-eagles', 'PHI', 26], ['washington-commanders', 'WSH', 32],
            ['chicago-bears', 'CHI', 6], ['detroit-lions', 'DET', 11], ['green-bay-packers', 'GB0', 12], ['minnesota-vikings', 'MIN', 21],
            ['atlanta-falcons', 'ATL', 2], ['carolina-panthers', 'CAR', 5], ['new-orleans-saints', 'NO0', 23], ['tampa-bay-buccaneers', 'TB0', 30],
            ['arizona-cardinals', 'ARI', 1], ['los-angeles-rams', 'LAR', 19], ['san-francisco-49ers', 'SF0', 28], ['seattle-seahawks', 'SEA', 29]
           ]

GetTeamRoster(['buffalo-bills', 'BUF', 4])
"""

buffalo-bills BUF 4


AttributeError: 'str' object has no attribute 'capabilities'