In [33]:
# Header block to include all modules that must be imported ahead of time
# Only needs to be run once per session, and each time a new module is added

import requests
import json
import pandas as pd # this will need to be installed via command line first
import glob
import os

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [35]:
# Method to create a dataframe of all nfl players and their salary cap hits
# Relevent player data is from OverTheCap.com and Wikipedia.

def CombineSalaryLists():
    # The directory containing the player salary data from all 32 nfl teams
    SalaryDataPath = "./data/team-salary-lists/"
    
    # Get a list of all the files in the directory
    SalaryCSVList = glob.glob(SalaryDataPath + "/*.csv")
    
    # Create a list to store the individual dataframes
    SalariesDFList = []
    
    # Loop through each file and append it to the list
    for file in SalaryCSVList:
        # Read the file into a dataframe
        df = pd.read_csv(file, header = None, names = ['Name', 'Team', 'Salary'])
        # Append the dataframe to the list
        SalariesDFList.append(df)
    
    # Combine all the dataframes into one
    SalariesDF = pd.concat(SalariesDFList, ignore_index=True)

    return SalariesDF


In [36]:
# Method to create a dataframe of all nfl players and relevent attributes
# Relevent player data is from CBS sports and Wikipedia.

def CombineAttributesLists():
    # The directory containing the player salary data from all 32 nfl teams
    AttriutesDataPath = "./data/player-attributes-lists/"
    
    # Get a list of all the files in the directory
    AttriutesCSVList = glob.glob(AttriutesDataPath + "/*.csv")
    
    # Create a list to store the individual dataframes
    AttriutesDFList = []
    
    # Loop through each file and append it to the list
    for file in AttriutesCSVList:
        # Read the file into a dataframe
        df = pd.read_csv(file, header = None, names = ['Name', 'Team', 'POS', 'HT', 'WT', 'DOB', 'AGE', 'EXP'])
        # Append the dataframe to the list
        AttriutesDFList.append(df)
    
    # Combine all the dataframes into one
    AttriutesDF = pd.concat(AttriutesDFList, ignore_index=True)
    
    return AttriutesDF


In [37]:
# Method to create a master player list
# Combines the salary and attributes dataframes together

def CreateMasterPlayerList():
    # Get the salary and attributes dataframes
    SalariesDF = CombineSalaryLists()
    AttributesDF = CombineAttributesLists()
    
    # Set unique keys for both dataframes
    SalariesDF['Key'] = SalariesDF['Name'][0][0] + SalariesDF['Name'][1] + SalariesDF['Team']
    AttributesDF['Key'] = AttributesDF['Name'][0][0] + AttributesDF['Name'][1] + AttributesDF['Team']
    
    # Merge the dataframes using the created keys, use suffixes to differentiate the similar columns
    MasterDF = pd.merge(SalariesDF, AttributesDF, on='Key', how = 'inner', suffixes=('_sal', '_attr'))
    # Drop the redundant key column
    # MasterDF.drop(columns = ['Key'], inplace = True)
    
    return MasterDF

CreateMasterPlayerList()