In [2]:
# Header block to include all modules that must be imported ahead of time
# Only needs to be run once per session, and each time a new module is added

import requests
import json
import pandas as pd # this will need to be installed via command line first
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [5]:
# Method to create a master player list
# Combines the salary and attributes dataframes together

def CreateMasterPlayerList():
    # Get the salary and attributes dataframes and set their column names
    SalariesDF = pd.read_csv(f'./data/team-salary-lists/zMaster-Player-Salary-List.csv', header = None)
    SalariesDF.columns = ['Name', 'Team', 'Salary', 'Key']
    AttributesDF = pd.read_csv(f'./data/player-attributes-lists/zMaster-Player-Attributes-List.csv', header = None)
    AttributesDF.columns = ['Name', 'Team', 'Position', 'Height', 'Weight', 'DOB', 'Age', 'Experience', 'Key']
    
    # Merge the dataframes using the created keys, use suffixes to differentiate the similar columns
    MasterDF = pd.merge(SalariesDF, AttributesDF, on='Key', how = 'inner', suffixes=('_sal', '_attr'))
    # Drop the redundant columns
    MasterDF.drop(columns = ['Name_attr', 'Team_attr'], inplace = True)
    
    # Rename, re-order columns for readability
    MasterDF.columns = ['Name', 'Team', 'CapHit', 'PlayerID', 'Position', 'Height', 'Weight', 'DOB', 'Age', 'YrsExp']
    NewColumnOrder = ['PlayerID', 'Name', 'Team', 'Position', 'Height', 'Weight', 'DOB', 'Age', 'YrsExp', 'CapHit']
    MasterDF = MasterDF.loc[:, NewColumnOrder]
    
    # Check the datafame for duplicate rows (there almost certainly will be)
    if (MasterDF.duplicated(subset=['PlayerID']).any()):
        # print('There are duplicates in the master player list')
        # print(MasterDF[MasterDF.duplicated(subset=['PlayerID'], keep = False)])
        # The 1st, 4th rows of the 4 row duplicate groups are the correct rows to keep
        MasterDF = MasterDF[~(MasterDF.duplicated('PlayerID', keep = 'first') & MasterDF.duplicated('PlayerID', keep = 'last'))]
        
    # Create a csv file of the master player list
    MasterDF.to_csv(f'./data/Master-Player-List.csv', index = False)
    
    return 0

CreateMasterPlayerList()

0