In [10]:
# Header block to include all modules that must be imported ahead of time
# Only needs to be run once per session, and each time a new module is added

import requests
import json
import numpy as np
import re
import pandas as pd # this will need to be installed via command line first

In [11]:
# Method to retrieve all team names, confrences, and divisions 

def GetAllTeamNamesConfrencesDivisions():
    # Use requests to get the raw HTML response from cbssports.com
    CBSResponse = requests.get(f'https://www.cbssports.com/nfl/teams/') 
    
    # If we get a normal response, proceed with data scraping
    if CBSResponse.status_code == 200:
        
        # Confirm the request was successful
        print(f"Request succeeded with status code {CBSResponse.status_code}")

        # Use Pandas to read the HTML content and put it into a DataFrame
        TableTag = 'TableBase-table'
        DataFrames = pd.read_html(CBSResponse.content, attrs={'class': TableTag})
        
        # Select the specific parts of the dataframe we want 
        TeamsDFAFC = DataFrames[0][['East']]
        TeamsDFNFC = DataFrames[1][['East']]
        
        # Insert a new row at the top with column name
        TeamsDFAFC.insert(0, 'Conference', 'AFC')
        TeamsDFNFC.insert(0, 'Conference', 'NFC')
        
        # Generate a list of divisions in the order the appear
        DivisionsList = ['East', TeamsDFAFC.iloc[4, 1], TeamsDFAFC.iloc[9, 1], TeamsDFAFC.iloc[14, 1]]
        
        # The CBS table includes division labels as rows so they need to be removed
        TeamsDFAFC.drop(labels = [4, 9, 14], inplace = True)
        TeamsDFNFC.drop(labels = [4, 9, 14], inplace = True)
        
        # Rename the column with all the teamnames, concatinate the dataframes together
        TeamsDFAFC.rename(columns = {'East': 'TeamName'}, inplace = True)
        TeamsDFNFC.rename(columns = {'East': 'TeamName'}, inplace = True)
        TeamsDF = pd.concat([TeamsDFAFC, TeamsDFNFC], ignore_index = True)
        
        # Create list to add each division to its respective team
        DivisionsFullList = []
        for i in range(2):
            for division in DivisionsList:
                for j in range(4):
                    DivisionsFullList.append(division)
        
        # Add the new column to the teams dataframe
        TeamsDF['Division'] = DivisionsFullList
        
        # Change the order of columns to match the database schema  
        new_order = ['TeamName','Conference', 'Division']
        TeamsDF = TeamsDF.loc[:, new_order]
        
        # Sort the teams dataframe by team name
        TeamsDF.sort_values(by = ['TeamName'], inplace = True)
        
        # Turn the DataFrame into a numpy array and sort it by team name
        TeamsArray = TeamsDF.to_numpy().tolist()
        
        return TeamsArray

    # If we don't get a normal reponse, stop scraping
    else:
        print(f"Request failed with status code {CBSResponse.status_code}")
        return -1

In [12]:
# Method to get all team abbreviations
def GetAllTeamAbbrvs():
    # Define the API endpoint and parameters
    endpoint = "https://en.wikipedia.org/w/api.php"
    params = {
        "action": "query",
        "format": "json",
        "titles": "Wikipedia:WikiProject_National_Football_League/National_Football_League_team_abbreviations",
        "prop": "revisions",
        "rvprop": "content"
    }

    # Make the request to the Wikipedia API
    WikiResponse = requests.get(endpoint, params=params)
    
    # If we get a normal response, proceed with data scraping
    if WikiResponse.status_code == 200:
        
        # Confirm the request was successful
        print(f"Request succeeded with status code {WikiResponse.status_code}")
        
        # Extract the JSON data from the response
        WikiData = WikiResponse.json()
        PageContent = WikiData['query']['pages']['53669795']['revisions'][0]['*']
        
        # Sort the team abbreviations into a list, select only the abbreviations we want
        AllAbbeviations = re.findall(r'\b[A-Z]{2,3}\b', PageContent)
        TeamAbbrviations = AllAbbeviations[4::2] # This selects every other element in the list from index 4
        
        return TeamAbbrviations

    # If we don't get a normal reponse, stop scraping
    else:
        print("Error fetching data from the Wikipedia API.")
        return -1


In [13]:
# Method to contruct a list of all team names, abbreviations, confrences, divisions
# Creates the Complete Team List as a csv

def CreateCompleteTeamList():
    # Get all team names, confrences, and divisions
    TeamNamesConfrencesDivisions = GetAllTeamNamesConfrencesDivisions()
    
    # Get all team abbreviations
    TeamAbbrvs = GetAllTeamAbbrvs()
    
    # Concatinate the two lists
    CompleteTeamsList = []
    for i in range(len(TeamNamesConfrencesDivisions)):
        CompleteTeamsList.append([TeamNamesConfrencesDivisions[i][0], TeamAbbrvs[i], TeamNamesConfrencesDivisions[i][1], TeamNamesConfrencesDivisions[i][2]])

    # Create a dataframe from the list
    CompleteTeamsDF = pd.DataFrame(CompleteTeamsList)
    
    # Add the column names
    CompleteTeamsDF.columns = ['TeamName', 'TeamAbbrv', 'Conference', 'Division']
    
    # Create a .csv file with all the team names, abbreviations, confrences, and divisions
    csvPathName = f'./data/team-attribute-lists/Teams-List.csv'
    pd.DataFrame(CompleteTeamsDF).to_csv(csvPathName, index = False, header = False)

    """
    # Read the csv back for testing purposes
    TestDF = pd.read_csv(f'./data/team-attribute-lists/Teams-List.csv', header = None)
    TestDF.columns = ['TeamName', 'TeamAbbrv', 'Conference', 'Division']
    print(TestDF)
    """
    
CreateCompleteTeamList()

Request succeeded with status code 200
Request succeeded with status code 200
                 TeamName TeamAbbrv Conference Division
0       Arizona Cardinals       ARI        NFC     West
1         Atlanta Falcons       ATL        NFC    South
2        Baltimore Ravens       BAL        AFC    North
3           Buffalo Bills       BUF        AFC     East
4       Carolina Panthers       CAR        NFC    South
5           Chicago Bears       CHI        NFC    North
6      Cincinnati Bengals       CIN        AFC    North
7        Cleveland Browns       CLE        AFC    North
8          Dallas Cowboys       DAL        NFC     East
9          Denver Broncos       DEN        AFC     West
10          Detroit Lions       DET        NFC    North
11      Green Bay Packers        GB        NFC    North
12         Houston Texans       HOU        AFC    South
13     Indianapolis Colts       IND        AFC    South
14   Jacksonville Jaguars       JAC        AFC    South
15     Kansas City Chiefs 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  TeamsDFAFC.drop(labels = [4, 9, 14], inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  TeamsDFNFC.drop(labels = [4, 9, 14], inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  TeamsDFAFC.rename(columns = {'East': 'TeamName'}, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-cop