In [1]:
import time
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import re
from scipy.spatial.distance import cdist
from datetime import date
from numpy import asarray

pd.options.mode.chained_assignment = None  # default='warn'

In [2]:
#Import Datasets

season_df = pd.read_csv('/Users/yushunli/Documents/Data Science/jupyter_notebook/Fantasy Football/2013_to_2022_data.csv')
draft_df = pd.read_csv('/Users/yushunli/Documents/Data Science/jupyter_notebook/Fantasy Football/1994_to_2022_draftclass.csv')

In [3]:
#Create a Position Rank columns by Season
season_df['Pos_Rank'] = season_df.groupby(['Pos', 'Season'])['Fantasy_Points'].rank(ascending = False, method = 'min')

## Calculating Similarity:
- Weight between 50% (abs. % difference of fantasy points) and 50% of euclidian distance of min-max scaled vector of tgts, recs, yards and TDs

In [4]:
def find_peers(season_df, target):
    a_df = season_df.loc[season_df.Player == target]
    a_df.reset_index(drop=True, inplace=True)
    position = a_df.Pos[0]
    min_age = a_df.Age.min()
    max_age = a_df.Age.max()
    return_df = season_df.loc[(season_df.Age >= min_age) & (season_df.Age <= max_age) & (season_df.Pos == position)] 
    #return_df = season_df.loc[(season_df.Age <= max_age) & (season_df.Pos == position)] 
    return return_df

In [5]:
def abs_difference(peer_df, target):
    peer_df = peer_df.drop_duplicates(subset = ['Player', 'Age'], keep='first')
    peer_pivot = peer_df.pivot(index = 'Player', columns = 'Age', values = 'Fantasy_Points').dropna(axis=0)
    reference_row =peer_pivot.loc[peer_pivot.index == target].iloc[0]
    peer_fantasy = round(abs(peer_pivot.sub(reference_row) / reference_row),2)
    peer_fantasy.columns = 'Age_' + peer_fantasy.columns.astype(int).astype(str)
    peer_fantasy['Avg'] = round(peer_fantasy.mean(axis = 1),2)
    compare = peer_fantasy.sort_values(by = 'Avg', ascending = True)
    fantasy_points = compare.loc[compare.index != target]
    fantasy_points = fantasy_points.sort_index()
    fantasy_points.sort_values(by = 'Avg', ascending = True, inplace=True)
    return fantasy_points

In [6]:
def euclid_rank(df, target_player, age):
    target = df.loc[df.Player == target_player]
    non = df.loc[df.Player != target_player]
    #Extract Feature Data
    ret_target = target.loc[:, target.columns.str.endswith("_Scaled")]
    ret_non = non.loc[:, target.columns.str.endswith("_Scaled")]
    #Calculate Euclidian Distance
    euclid = cdist(ret_non, ret_target,  'euclid')
    euclid = euclid.round(decimals=2)
    names = non.Player
    string = 'Age_{}'.format(str(age))
    col = [string]
    df = pd.DataFrame(data = euclid, index=names, columns = col)
    return df

In [7]:
def euclid_compare(peer_df, target):
    
    #Create list of Age ranges
    age_range = []
    
    #For Loop to Create Individual Dataframes of all unique ages
    for i in peer_df.Age.unique():
        a = str(i) 
        a = a[:-2]
        b = int(a)
        age_range.append(b)
        name = 'season_'+str(a)
        vars()[name] = peer_df.loc[peer_df.Age == i]
        #Run euclid rank
        euclid_name = 'euclid_'+str(a)
        vars()[euclid_name] = euclid_rank(df = vars()[name], target_player = target, age = a)
    
    age_range.sort()
    x = min(age_range)
    df_name = 'euclid_' +str(x)
    base_df = vars()[df_name]

    age_range_mod = age_range[1:]
    
    #Join all age dfs
    for i in age_range_mod:
        name = 'euclid_' +str(i)
        base_df = pd.merge(base_df, vars()[name], how = 'inner', on = 'Player')
    #Return Result
    else:
        base_df['Avg'] = round(base_df.mean(axis=1),2)
        return base_df.sort_values(by = 'Avg', ascending = True)

### Draft Similarity
- Find the relative draft similarity of the peers
- Calculate a Pick Score that's the average of both abs draft position and positional draft position
- Apply the Pick Score to the seasonal differences but weighted by seasons played
    - The more seasons played, the less pick score affects the similarity, and vice versa

In [8]:
def draft_position(output_df, draft_df, target):
    
    name_list = []
    
    for i in output_df.index:
        name_list.append(i)
    
    name_list.append(target)
    #Filter for output players
    peer_draft = draft_df.loc[draft_df.Player.isin(name_list)]
    
    return peer_draft

In [9]:
def draft_similarity(peer_draft, draft_df):
    #Identify the Target Player's draft position
    target_draft = peer_draft.loc[peer_draft.Player == target].iloc[0]
    
    #Calculate the Abs. Pick Difference
    peer_draft.loc[:,'Pick_Diff_Abs'] = abs(peer_draft['Pick'] - target_draft['Pick'])
    peer_draft.loc[:,'Pos_Pick_Diff_Abs'] = abs(peer_draft['Position_Pick'] - target_draft['Position_Pick'])
    peer_draft.loc[:,'Pick_Diff_Weight'] = round(1-peer_draft['Pick_Diff_Abs']/(32*7),2) #Total Picks
    
    #Calculate the average number of players drafted for each position
    agg = draft_df.groupby(by = ['Season', 'Pos'], as_index=False).count()
    agg = agg.groupby('Pos').mean()
    agg['Avg_Players_Drafted'] = round(agg['Player'],0)
    draft_avg = agg['Avg_Players_Drafted']
    draft_avg
    
    #Calculate the Positional Pick Difference
    position = peer_draft.Pos.mode()[0]
    Pos_Pick_Num = draft_avg.loc[draft_avg.index == position][0]
    peer_draft.loc[:,'Pos_Pick_Diff_Weight'] = round(1-peer_draft['Pos_Pick_Diff_Abs']/(Pos_Pick_Num),2) #Number of Players in the Position
    peer_draft.loc[:,'Pick_Score'] = round((peer_draft['Pos_Pick_Diff_Weight'] + peer_draft['Pick_Diff_Weight'])/2,2)
    peer_draft.sort_values(by = 'Pick_Score', ascending = False, inplace = True)
    peer_score = peer_draft[['Player', 'Pick_Score']]
    peer_score.set_index('Player', inplace=True)
    return peer_score

In [10]:
def draft_score_weighting(output_df, peer_score):
    #Weight the Pick Score based on the number of seasons played
    seasons_played = len(output_df.columns)-1

    #Divide Pick Score to Similarity Score - Weighted by the seasons played
    #The longer they've played, the impact the draft similarity has on the result
    peer_score_similarity = round(output_df.div(output_df.join(peer_score)['Pick_Score'], axis=0),2)
    output_df2 = round((output_df*seasons_played + peer_score_similarity)/(seasons_played+1),2)
    output_df2.sort_values(by = 'Avg', ascending = True, inplace = True)
    #output_df2.reset_index(inplace=True)
    return output_df2

In [11]:
def calculate_similarities(target, season_df, draft_df):
    #Find Peers
    peer_df = find_peers(season_df = season_df, target = target)
    
    #Calculate Abs. % Difference of each season
    fantasy_points = abs_difference(peer_df = peer_df, target = target)
    
    #Calculate Euclidean Distance
    euclid = euclid_compare(peer_df = peer_df, target = target)
    euclid = euclid.sort_index()
    euclid.sort_values(by = 'Avg', ascending = True, inplace=True)
    
    #Aggregate and Average the 2 metrics
    output_df = (fantasy_points + euclid) / 2
    output_df.sort_values(by = 'Avg', ascending=True, inplace=True)
    output_df = output_df.head(25)
    
    #Add the Draft Similarity Scores
    peer_draft = draft_position(output_df, draft_df, target)
    peer_score = draft_similarity(peer_draft, draft_df)
    final_output = draft_score_weighting(output_df, peer_score)
    final_output.dropna(subset=['Avg'], inplace=True)
    final_output = final_output.loc[final_output.Avg < 1]
    return final_output
    #return output_df

In [12]:
target = 'Christian Watson'
output = calculate_similarities(target = target, season_df = season_df, draft_df = draft_df)
output

Age,Age_23,Avg
Player,Unnamed: 1_level_1,Unnamed: 2_level_1
Curtis Samuel,0.06,0.06
Zay Jones,0.14,0.14
Tyler Lockett,0.14,0.14
Gabriel Davis,0.15,0.15
Brandon Aiyuk,0.15,0.15
Amari Cooper,0.16,0.16
Diontae Johnson,0.17,0.17
Marquise Brown,0.17,0.17
DeVonta Smith,0.19,0.19
Deebo Samuel,0.19,0.19


## Project Future Stats

In [13]:
player_list = []

for i in output.index:
    player_list.append(i)

In [14]:
age = output.columns[-2]
age = age.replace("Age_", "")
age = float(age)

In [15]:
projection_stats = season_df[season_df.Player.isin(player_list)]
projection_stats = projection_stats.loc[projection_stats.Age > age]

In [16]:
proj_points = projection_stats.pivot(index = 'Player', columns = 'Age', values = 'Fantasy_Points')
proj_points = proj_points.reindex(output.index)
proj_points.fillna(value = 0, inplace=True)
proj_points['Total'] = proj_points.sum(axis = 1)
proj_points = proj_points.loc[proj_points.Total > 0]
proj_points.drop(columns = 'Total', inplace=True)
proj_points

Age,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Curtis Samuel,172.0,7.0,144.0,0.0,0.0,0.0,0.0,0.0,0.0
Zay Jones,35.0,28.0,82.0,155.0,0.0,0.0,0.0,0.0,0.0
Tyler Lockett,104.0,96.0,188.0,192.0,215.0,203.0,199.0,0.0,0.0
Brandon Aiyuk,189.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Amari Cooper,178.0,207.0,189.0,168.0,209.0,0.0,0.0,0.0,0.0
Diontae Johnson,176.0,219.0,132.0,0.0,0.0,0.0,0.0,0.0,0.0
Marquise Brown,177.0,120.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
DeVonta Smith,209.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Deebo Samuel,64.0,292.0,138.0,0.0,0.0,0.0,0.0,0.0,0.0
Christian Kirk,120.0,168.0,200.0,0.0,0.0,0.0,0.0,0.0,0.0


In [121]:
weight = output[['Avg']]
weight = weight.reindex(proj_points.index)
weight['flip_avg'] = 1 - weight['Avg']
weight['weight'] = round(weight['flip_avg']/weight.flip_avg.sum(),4)
weight

Age,Avg,flip_avg,weight
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Amari Cooper,0.19,0.81,0.0617
Marquise Brown,0.2,0.8,0.0609
Curtis Samuel,0.23,0.77,0.0586
Stefon Diggs,0.24,0.76,0.0579
Christian Kirk,0.24,0.76,0.0579
Kenny Stills,0.29,0.71,0.0541
Jordan Matthews,0.3,0.7,0.0533
Robert Woods,0.31,0.69,0.0526
Tyler Boyd,0.31,0.69,0.0526
Jarvis Landry,0.32,0.68,0.0518


In [122]:
proj_points[25.0].multiply(weight['weight']).sum()

135.1832

In [76]:
proj_rank = projection_stats.pivot(index = 'Player', columns = 'Age', values = 'Pos_Rank')
proj_rank = proj_rank.reindex(output.index)
proj_rank

Age,25.0,26.0,27.0,28.0,29.0,30.0
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Amari Cooper,9.0,17.0,26.0,9.0,,
Marquise Brown,47.0,,,,,
Rueben Randle,,,,,,
Curtis Samuel,186.0,33.0,,,,
Stefon Diggs,11.0,21.0,3.0,7.0,4.0,
Christian Kirk,26.0,11.0,,,,
Kenny Stills,25.0,53.0,59.0,135.0,160.0,
Jordan Matthews,103.0,102.0,182.0,,,
Robert Woods,31.0,10.0,16.0,13.0,49.0,59.0
Tyler Boyd,23.0,35.0,32.0,38.0,,
