In [10]:
# Header block to include all modules that must be imported ahead of time
# Only needs to be run once per session, and each time a new module is added

import requests
import json
import pandas as pd # this will need to be installed via command line first
import lxml # this will need to be installed via command line first(as well)
import html5lib # this will need to be installed via command line first
import numpy as np
import re

In [7]:
# Method to retrieve all team names, confrences, and divisions 

def GetAllTeamNamesConfrencesDivisions():
    # Use requests to get the raw HTML response from overthecap.com
    CBSResponse = requests.get(f'https://www.cbssports.com/nfl/teams/')
    
    # If we get a normal response, proceed with data scraping
    if CBSResponse.status_code == 200:
        
        # Confirm the request was successful
        print(f"Request succeeded with status code {CBSResponse.status_code}")

        # Use Pandas to read the HTML content and put it into a DataFrame
        TableTag = 'TableBase-table'
        DataFrames = pd.read_html(CBSResponse.content, attrs={'class': TableTag})
        
        # Select the specific parts of the dataframe we want 
        TeamsDFAFC = DataFrames[0][['East']]
        TeamsDFNFC = DataFrames[1][['East']]
        
        # Insert a new row at the top with column name
        TeamsDFAFC.insert(0, 'Conference', 'AFC')
        TeamsDFNFC.insert(0, 'Conference', 'NFC')
        
        # Generate a list of divisions in the order the appear
        DivisionsList = ['East', TeamsDFAFC.iloc[4, 1], TeamsDFAFC.iloc[9, 1], TeamsDFAFC.iloc[14, 1]]
        
        # The CBS table includes division labels as rows so they need to be removed
        TeamsDFAFC.drop(labels = [4, 9, 14], inplace = True)
        TeamsDFNFC.drop(labels = [4, 9, 14], inplace = True)
        
        # Rename the column with all the teamnames, concatinate the dataframes together
        TeamsDFAFC.rename(columns = {'East': 'TeamName'}, inplace = True)
        TeamsDFNFC.rename(columns = {'East': 'TeamName'}, inplace = True)
        TeamsDF = pd.concat([TeamsDFAFC, TeamsDFNFC], ignore_index = True)
        
        # Create list to add each division to its respective team
        DivisionsFullList = []
        for i in range(2):
            for division in DivisionsList:
                for j in range(4):
                    DivisionsFullList.append(division)
        
        # Add the new column to the teams dataframe
        TeamsDF['Division'] = DivisionsFullList
        
        # Change the order of columns to match the database schema  
        new_order = ['TeamName','Conference', 'Division']
        TeamsDF = TeamsDF.loc[:, new_order]
        
        # Sort the teams dataframe by team name
        TeamsDF.sort_values(by = ['TeamName'], inplace = True)
        
        # Turn the DataFrame into a numpy array and sort it by team name
        TeamsArray = TeamsDF.to_numpy()
        print(TeamsArray)
        
        """
        # Read the csv back for testing purposes
        TeamsDF = pd.read_csv(f'./Team_Lists/Teams-List.csv', header = None)
        TeamsDF.columns = ['TeamName','Conference', 'Division']
        print(TeamsDF)
        """

    # If we don't get a normal reponse, stop scraping
    else:
        print(f"Request failed with status code {CBSResponse.status_code}")
        return -1


Request succeeded with status code 200
[['Arizona Cardinals' 'NFC' 'West']
 ['Atlanta Falcons' 'NFC' 'South']
 ['Baltimore Ravens' 'AFC' 'North']
 ['Buffalo Bills' 'AFC' 'East']
 ['Carolina Panthers' 'NFC' 'South']
 ['Chicago Bears' 'NFC' 'North']
 ['Cincinnati Bengals' 'AFC' 'North']
 ['Cleveland Browns' 'AFC' 'North']
 ['Dallas Cowboys' 'NFC' 'East']
 ['Denver Broncos' 'AFC' 'West']
 ['Detroit Lions' 'NFC' 'North']
 ['Green Bay Packers' 'NFC' 'North']
 ['Houston Texans' 'AFC' 'South']
 ['Indianapolis Colts' 'AFC' 'South']
 ['Jacksonville Jaguars' 'AFC' 'South']
 ['Kansas City Chiefs' 'AFC' 'West']
 ['Las Vegas Raiders' 'AFC' 'West']
 ['Los Angeles Chargers' 'AFC' 'West']
 ['Los Angeles Rams' 'NFC' 'West']
 ['Miami Dolphins' 'AFC' 'East']
 ['Minnesota Vikings' 'NFC' 'North']
 ['New England Patriots' 'AFC' 'East']
 ['New Orleans Saints' 'NFC' 'South']
 ['New York Giants' 'NFC' 'East']
 ['New York Jets' 'AFC' 'East']
 ['Philadelphia Eagles' 'NFC' 'East']
 ['Pittsburgh Steelers' 'AFC' 'N

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  TeamsDFAFC.drop(labels = [4, 9, 14], inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  TeamsDFNFC.drop(labels = [4, 9, 14], inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  TeamsDFAFC.rename(columns = {'East': 'TeamName'}, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-cop

In [19]:
# Method to get all team abbreviations
def GetAllTeamAbbrvs():
   # Define the API endpoint and parameters
    endpoint = "https://en.wikipedia.org/w/api.php"
    params = {
        "action": "query",
        "format": "json",
        "titles": "Wikipedia:WikiProject_National_Football_League/National_Football_League_team_abbreviations",
        "prop": "revisions",
        "rvprop": "content"
    }

    # Make the request to the Wikipedia API
    WikiResponse = requests.get(endpoint, params=params)
    
    # If we get a normal response, proceed with data scraping
    if WikiResponse.status_code == 200:
        
        # Confirm the request was successful
        print(f"Request succeeded with status code {WikiResponse.status_code}")
        
        # Extract the JSON data from the response
        WikiData = WikiResponse.json()
        PageContent = WikiData['query']['pages']['53669795']['revisions'][0]['*']
        
        # Sort the team abbreviations into a list, select only the abbreviations we want
        AllAbbeviations = re.findall(r'\b[A-Z]{3}\b', PageContent)
        TeamAbbrviations = AllAbbeviations[4::2] # This selects every other element in the list from index 4
        print(TeamAbbrviations)
        
        
    else:
        print("Error fetching data from the Wikipedia API.")


Request succeeded with status code 200
['ARI', 'ATL', 'BAL', 'BUF', 'CAR', 'CHI', 'CIN', 'CLE', 'DAL', 'DEN', 'DET', 'HOU', 'IND', 'JAC', 'LAC', 'MIA', 'MIN', 'NYG', 'NYJ', 'PHI', 'PIT', 'SEA', 'TEN', 'WAS']


In [3]:
# Method to get the salary cap hits for every player currently signed on an NFL team
# Creates a csv with columns player name, salary cap hit, and team location abbrviation 

def GetTeamSalaryCapHits(team):
    # Use requests to get the raw HTML response from overthecap.com
    OTCResponse = requests.get(f'https://overthecap.com/salary-cap/{team[0]}')
        
    # If we get a normal response proceed with data scraping
    if OTCResponse.status_code == 200:
        print(f"Request succeeded with status code {OTCResponse.status_code}")
        TeamNameCaptalized = (team[0].replace("-", " ")).title()
        print(f"Here are the Salary Cap hits for all players who are currently signed with the {TeamNameCaptalized}:")

        # Pandas read_html method allows the table contents to be put into a DataFrame
        TableTag = 'salary-cap-table contracted-players'
        DataFrames = pd.read_html(OTCResponse.content, attrs = {'class': TableTag})
        
        # Select the specific parts of the dataframe we want 
        SalariesDF = DataFrames[0][['Player', 'Cap Number']]
        if len(SalariesDF) >= 51:
            SalariesDF = SalariesDF.drop(51, axis=0)
        # Add team location abv to each player
        SalariesDF = SalariesDF.assign(TeamLOC = team[1])
        SalariesDF = SalariesDF.assign(TeamName = (team[0].replace("-", " ")).title())
        
        
        # Change the order of columns to make more sense semantically 
        new_order = ['Player', 'TeamLOC', 'TeamName', 'Cap Number']
        SalariesDF = SalariesDF.loc[:, new_order]
        
        # Specify filename and path
        csvPathName = f'./Salary_Lists/{TeamNameCaptalized}-Player-Salary-List.csv'
        
        # Create a .csv file with all the player names and their cap hits
        SalariesDF.to_csv(csvPathName, index = False, header = False)
        
        
    
        """
        # Read the csv back for testing purposes
        SalariesCSV = pd.read_csv(f'./Salary_Lists/{TeamNameCaptalized}-Player-Salary-List.csv', header = None)
        SalariesCSV.columns = ['Player', 'TeamLOC', 'TeamName', 'Cap Number']
        print(SalariesCSV)
        """
    # If we don't get a normal reponse, stop scraping
    else:
        print(f"Request failed with status code {OTCResponse.status_code}")
        return -1


In [4]:
# This particular cell will be used to call all other methods
        
GetAllTeamNamesConfrencesDivisions()     
GetAllTeamAbbrvs()

# A list of links to Over The Cap Salary Cap pages for various teams
NFLTeams = [['buffalo-bills', 'BUF'], ['miami-dolphins', 'MIA'], ['new-england-patriots', 'NE'], ['new-york-jets', 'NYJ'],
            ['baltimore-ravens', 'BAL'], ['cincinnati-bengals', 'CIN'], ['cleveland-browns', 'CLE'], ['pittsburgh-steelers', 'PIT'],
            ['houston-texans', 'HOU'], ['indianapolis-colts', 'IND'], ['jacksonville-jaguars', 'JAX'], ['tennessee-titans', 'TEN'], 
            ['denver-broncos', 'DEN'], ['kansas-city-chiefs', 'KC'], ['las-vegas-raiders', 'LV'], ['los-angeles-chargers', 'LAC'],
            ['dallas-cowboys', 'DAL'], ['new-york-giants', 'NYG'], ['philadelphia-eagles', 'PHI'], ['washington-commanders', 'WSH'],
            ['chicago-bears', 'CHI'], ['detroit-lions', 'DET'], ['green-bay-packers', 'GB'], ['minnesota-vikings', 'MIN'],
            ['atlanta-falcons', 'ATL'], ['carolina-panthers', 'CAR'], ['new-orleans-saints', 'NO'], ['tampa-bay-buccaneers', 'TB'],
            ['arizona-cardinals', 'ARI'], ['los-angeles-rams', 'LAR'], ['san-francisco-49ers', 'SF'], ['seattle-seahawks', 'SEA']
           ]

# Call the method defined above to get all the player data
def GetMultipleTeamSalaryCapHits(TeamList):
    for team in TeamList:
        GetTeamSalaryCapHits(team)

# GetMultipleTeamSalaryCapHits(NFLTeams)
GetTeamSalaryCapHits(['san-francisco-49ers', 'SF'])

Request succeeded with status code 200
Here are the Salary Cap hits for all players who are currently signed with the San Francisco 49Ers:


OSError: Cannot save file into a non-existent directory: 'Salary_Lists'