In [1]:
import pandas as pd

##### Functions for recommendation system

In [3]:
# Helper function for rec system
def check_categories(cat_list, features_list): # provided a list of categories from rec system function,
                                               # check to make sure all provided categories are valid
    # map a number to a category
    cat_dict = {'1': 'pts',
                '2': 'trb',
                '3': 'ast',
                '4': 'stl',
                '5': 'blk',
                '6': 'tov',
                '7': 'fg_pct',
                '8': 'fg2_pct',
                '9': 'fg3_pct',
               '10': 'ft_pct',
               '11': 'ws'}
    
    # container to hold categories
    feats = []
    
    # loop through each category
    for cat in cat_list:
        # clean each category name
        cat = cat.strip()
        
        # check if category is numeric
        try:
            # if number is out of range, ask for in range category
            while int(cat) not in range(1,12):
                cat = input(f'Category {cat} does not exist. \nPlease select category:\n')
            feats.append(cat_dict[cat])
            
        # if category is not a number        
        except:
            # create quit option
            if cat == 'q':
                return ['q']
            
            # if reb is provided change to trb (total rebounds)
            if cat == 'reb':
                feats.append('trb')
            elif cat == '':
                continue
            elif cat not in features_list:
                while cat not in features_list:
                    cat = input(f'Category {cat} does not exist. \nPlease replace with a valid category:\n')
                    try:
                        cat = cat_dict[cat]
                    except:
                        pass
                feats.append(cat)
            else:
                feats.append(cat)
    return feats

In [4]:
# Another helper
def check_position(pos):
    pos_dict = {'1': 'PG',
               '2': 'SG',
               '3': 'SF',
               '4': 'PF',
               '5': 'C'}
    
    return pos_dict[pos]

In [2]:
def get_projected_ws(neighbors):
    projections = pd.read_pickle('win_share_projections.pickle')
    projections.player = [player.title() for player in projections.player]
    return neighbors.merge(projections[['player','projected_ws']], on = ['player'])

In [7]:
# Recommendation system 
def rec_similar_players(given_df = pd.read_pickle('final_df.pickle')):
    import pandas as pd
        
    # DataFrame setup
    data = given_df
    
    if type(given_df) != pd.core.frame.DataFrame:
        data = input('Given data is not a pandas dataframe. \nPlease provide a pandas dataframe: \n')
    
    data.columns = [col.lower() for col in data.columns]
    
    dupes = data[data.duplicated(['pid','age'],keep=False)].sort_values(by=['player','g'],ascending=False)

    data = data.drop(dupes.index)

    data = pd.concat([data, dupes[dupes.tm == 'TOT']])
    
    try:
        data = data.rename(columns={'3p':'fg3',
                                    '3pa':'fg3a',
                                    '2p':'fg2',
                                    '2pa':'fg2a',
                                    '3p_pct':'fg3_pct',
                                    '2p_pct':'fg2_pct'})
    except:
        pass
    
    # Merge data with win share projections
#     data = data.merge(projections[['pid','season','projected_ws']], on=['pid','season'], how='left')
    
    # Features of interest 
    features = ['mp', 'fg_pct', 'fg2_pct', 'fg3_pct', 'ft_pct',
                'pts', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'ws']
    
    print('Possible categories: \n 1. Points (pts) \n 2. Rebounds (trb) \n 3. Assists (ast) \n 4. Steals (stl) \n 5. Blocks (blk) \n 6. Turnovers (tov) \n 7. Field goal percentage (fg_pct) \n 8. Mid-range Field goal percentage (fg2_pct) \n 9. 3-point percentage (fg3_pct) \n 10: Free-throw percentage (ft_pct) \n 11: Win Share (ws)')
    
    # ask for categories
    categories = input('\n(Optional) \nPlease select categories, ordered by importance (Separated by commas(,)): \n').split(',')
    cols = ['player','pos']
    # create a quit option
    if categories == ['q']:
        return 'Exit Recommender'
    
    # check if categories were given
    elif categories == ['']:
        cols.extend(features)
    else:
        categories = check_categories(categories, features)
        
        if categories == ['q']:
            return 'Exit Recommender'
        
        features = list(dict.fromkeys(categories))
        cols.extend(features)

    # Setting up df to have only recent players 
    main_df = data[data.season == 2019][cols]
    main_df.player = [player_name.title() for player_name in main_df.player]
    
    # ask for player name that you want to compare
    name = input('Please input player name: \n').title()
    print('')
    
    if name == 'q'.title():
        return 'Exit Recommender'
    
    # if no name is provided, just give top players based on categories
    elif name == '':

        # if certain categories were specified, sort the players by the categories
        if 'ws' not in features:
            print('No player selected! Here are the top 20 players based on the categories you selected: ')
            return main_df.sort_values(by=features, ascending=False).reset_index().drop('index',axis=1).iloc[:20]
            
        # if not, sort players by win share
        else:
            print('No player selected! Here are the top 20 players by projected win share: ')
            ret = main_df.sort_values(by='ws', ascending=False).reset_index().drop('index',axis=1).iloc[:20]
            return get_projected_ws(ret).sort_values(by='projected_ws')

    # check to see if player exists in dataframe
    elif name not in list(main_df.player):
        while name not in list(main_df.player):
            name = input('Player is not in current database. \nPlease input player name: \n').title()
            print('')
    
    print('Possible positions: \n 1. PG (Point Guard) \n 2. SG (Shooting Guard)\n 3. SF (Small Forward)\n 4. PF (Power Forward)\n 5. C  (Center)')
    
    # ask for a position to recommend
    position = input('\n(Optional) \nPlease specify position: \n')
    
    if position == 'q':
        return 'Exit Recommender'
    
    # check if position input is numerical
    try:
        if int(position):
            while int(position) > 5:
                position = input('Please specify a position: \n')
                
            position = check_position(position)
    except ValueError:
        pass
    
    # set player to the inputted name
    player = main_df[main_df.player == name].index[0]
    player = main_df.loc[player]
    
    # subtract all other players' stats from player and aggregate for a 'distance' from input player
    # sort resulting dataframe by this distance
    # save the indeces of these players
    indeces = (main_df[main_df.columns[2:]] - player[2:]).abs().sum(axis=1).sort_values().index
    
    # select the players from the main dataframe
    neighbors = main_df.loc[indeces].dropna(subset=cols[2:])
    
    # return the player and the top 10 neighbors
    if position == '':
#         print(player)
        ret = neighbors.iloc[:11].reset_index().drop('index',axis=1)
        return get_projected_ws(ret).sort_values(by='projected_ws')
    else:
#         print(player)
        ret = neighbors[neighbors.pos == position.upper()].iloc[:11].reset_index().drop('index',axis=1)
        return get_projected_ws(ret).sort_values(by='project_ws')

In [8]:
rec_similar_players()

Possible categories: 
 1. Points (pts) 
 2. Rebounds (trb) 
 3. Assists (ast) 
 4. Steals (stl) 
 5. Blocks (blk) 
 6. Turnovers (tov) 
 7. Field goal percentage (fg_pct) 
 8. Mid-range Field goal percentage (fg2_pct) 
 9. 3-point percentage (fg3_pct) 
 10: Free-throw percentage (ft_pct) 
 11: Win Share (ws)

(Optional) 
Please select categories, ordered by importance (Separated by commas(,)): 

Please input player name: 


No player selected! Here are the top 20 players by projected win share: 


FileNotFoundError: [Errno 2] No such file or directory: 'win_share_projections.pickle'