In [205]:
%matplotlib inline
import numpy as np
import sklearn as sk
import scipy as sp
import requests
from bs4 import BeautifulSoup
import pandas as pd
import sqlite3
from sqlite_api import *
from sklearn import linear_model
from sklearn import cluster
from sklearn.preprocessing import StandardScaler
import sklearn as sk
from math import sqrt
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

## Common Drafting Strategies

Three Tiers of Priority:
 
    1. RB, WR
    2. QB, TE
    3. DEF, K

RB and WR are prioritized extremely heavily, often filling the starting roster and getting two benched per position before even dipping into the top QBs and TEs. DEF and K are always picked last and do not have as much of an impact of the other positions. For this reason, the main challenge when creating a realistic drafting AI is deciding when to start prioritizing the second tier above the current first tier available positions.

Ideal Bench Distribution:
    
    6 Total
    0-1 QBs
    3-5 RBs
    3-5 WRs
    0-1 TEs
    0 K
    0 DEF


## AI Algorithm

The AI will always pick the player with the most predicted fantasy points in his respective position, so the AI really just needs to be able to figure out what position is most needed and cross-reference that with the players available in those respective positions to make its choice.

The AI will primarily use three levels of data to make its decision. The first is a result of clustering the available players to draft for every position

In [206]:
start_nums = {
    'QB': 1,
    'RB': 2,
    'WR': 2,
    'TE': 1,
    'DEF': 1,
    'K': 1,
    'FLEX': 1
}

In [207]:
def is_done(X, labels, limit):
    prev_group = labels[0]
    prev_n = X[0]
    for idx in range(1, len(X)):
        cur_group = labels[idx]
        cur_n = X[idx]
        if prev_n - cur_n > limit:
            if cur_group == prev_group:
                return False
        
        prev_group = cur_group
        prev_n = cur_n
    
    return True

In [226]:
# keep creating more clusters until a min dist of ?

def cluster_players(json_df, limit_per_player=6, num_players=6):
    df = json_df
    X = np.array(df['PredFantasyPoints']).reshape(len(df),1)
    X = X[:limit_per_player*num_players]
    
    ac = cluster.AgglomerativeClustering(n_clusters=2, affinity='euclidean', linkage='average')
    ac.fit(X)
    
    n_clust = 2

    while not is_done(X, ac.labels_, sqrt(X.std())):
        n_clust += 1
        ac = cluster.AgglomerativeClustering(n_clusters=n_clust, affinity='euclidean', linkage='average')
        ac.fit(X)
        
    tier_df = pd.DataFrame(columns=['Player', 'PredFantasyPoints', 'Tier'])
    
    cur_tier = 1
    prev_group = ac.labels_[0]
    for idx in range(len(X)):
        row = df.iloc[idx]
        if ac.labels_[idx] != prev_group:
            cur_tier += 1
        
        prev_group = ac.labels_[idx]
        tier_df = tier_df.append({'Player':row.Player, 'PredFantasyPoints': row.PredFantasyPoints, 
                                  'Tier': cur_tier}, ignore_index=True)
    
    print(str(X[idx])+': '+str(ac.labels_[idx]))
    return tier_df

In [760]:
def tier_players(json_df, limit_per_player=6, num_participants=6, min_tiers=4):
    tier_counts = list()
    dist_between_tiers = sqrt(json_df['PredFantasyPoints'].std())

    while (len(tier_counts) < min_tiers):
        tier_counts = list()
        cur_tier = 1
        prev_points = 999
        
        count = 0
        for cur_points in json_df['PredFantasyPoints']:
            if prev_points - cur_points > dist_between_tiers:
                cur_tier += 1
                tier_counts.append(1)
            else:
                tier_counts[cur_tier-2] += 1
            prev_points = cur_points
        
            count += 1
            if count >= num_participants*2:
                break

        # if not enough tiers, make a stricter metric
        dist_between_tiers *= 0.9

    return tier_counts

In [791]:
QB = pd.read_json('QB2017.json', orient='index')[['Player', 'PredFantasyPoints']].sort('PredFantasyPoints', ascending=False)
RB = pd.read_json('RB2017.json', orient='index')[['Player', 'PredFantasyPoints']].sort('PredFantasyPoints', ascending=False)
WR = pd.read_json('WR2017.json', orient='index')[['Player', 'PredFantasyPoints']].sort('PredFantasyPoints', ascending=False)
TE = pd.read_json('TE2017.json', orient='index')[['Player', 'PredFantasyPoints']].sort('PredFantasyPoints', ascending=False)
K = pd.read_json('K2017.json', orient='index')[['Player', 'PredFantasyPoints']].sort('PredFantasyPoints', ascending=False)
DEF = pd.read_json('DF2017.json', orient='index')[['Team', 'PredFantasyPoints']].sort('PredFantasyPoints', ascending=False)
WR = WR[WR.PredFantasyPoints > 0]

In [792]:
QB['Position'] = 'QB'
RB['Position'] = 'RB'
WR['Position'] = 'WR'
TE['Position'] = 'TE'
DEF['Position'] = 'DEF'
K['Position'] = 'K'
DEF.columns = ['Player', 'PredFantasyPoints', 'Position']

In [793]:
All = QB.append(RB, ignore_index=True)
All = All.append(WR, ignore_index=True)
All = All.append(TE, ignore_index=True)
All = All.append(DEF, ignore_index=True)
All = All.append(K, ignore_index=True)
All = All.sort('PredFantasyPoints', ascending=False)

In [794]:
TiersQB = tier_players(QB)
TiersRB = tier_players(RB)
TiersWR = tier_players(WR)
TiersTE = tier_players(TE)
TiersK = tier_players(K)
TiersDEF = tier_players(DEF)

In [795]:
Tiers = {
    'QB': TiersQB,
    'RB': TiersRB,
    'WR': TiersWR,
    'TE': TiersTE,
    'K': TiersK,
    'DEF': TiersDEF
}

In [796]:
def pos_needed(roster_dist, ideal_dist):
    needed_dist = dict()
    for pos in roster_dist:
        needed_dist[pos] = ideal_dist[pos] - roster_dist[pos]
    return needed_dist

In [797]:
def calc_tier_skewness(pos_tiers, num_participants = 6):
    residuals = 0
    full_dist = list()
    for tier in range(len(pos_tiers)):
        for i in range(pos_tiers[tier]):
            full_dist.append(tier+1)
    
    mean_tier = np.mean(full_dist)
    
    for cur_tier in full_dist:
        residuals += (cur_tier - mean_tier)**3
        
    return residuals/(np.std(full_dist)**3)

In [798]:
'''
Calculates skewness of the tier distribution, but emphasizes how much it is skewed right '''
def calc_right_skewness(pos_tiers):
    lst = pos_tiers.copy()
    score = 0
    for t in range(len(Tiers['TE'])-1):
        score += calc_tier_skewness(lst) * sum(lst)
        lst = lst[:len(lst)-1]
    return score

In [799]:
def pos_pick(Tiers, needed_dist, ideal_dist, draft_priority):
    pos_metrics = dict()
    n_players = sum(Tiers['QB'])/2
    
    # calculate tier skews
    max_skew = -999
    min_skew = 999
    
    tier_skews = dict()
    for pos in needed_dist:
        #if pos == 'DEF': continue
        tier_skews[pos] = calc_right_skewness(Tiers[pos])
        if tier_skews[pos] < min_skew:
            min_skew = tier_skews[pos]
        if tier_skews[pos] > max_skew:
            max_skew = tier_skews[pos]
    
    # normalize tier skews
    for pos in tier_skews:
        norm_skew = tier_skews[pos]
        norm_skew += min_skew
        norm_skew /= (max_skew - min_skew)
        tier_skews[pos] = 1 - norm_skew
        
    for pos in tier_skews:
        pos_metrics[pos] = (needed_dist[pos]/ideal_dist[pos])*(draft_priority[pos]) * tier_skews[pos]

    return pos_metrics
        

In [950]:
def draft_pick(players, roster_dist, ideal_dist, draft_priority, z_scales, positions=['QB','RB','TE','WR','DEF','K']):
    needed_dist = pos_needed(roster_dist, ideal_dist)
    df = players.copy()
    
    # calculate tier dist for every position
    tiers = dict()
    means = dict()
    stds = dict()
    for pos in positions:
        pos_df = players[players.Position == pos]
        means[pos] = pos_df['PredFantasyPoints'][:round(2*len(pos_df)/3)].mean()
        stds[pos] = pos_df['PredFantasyPoints'][:round(2*len(pos_df)/3)].std()
        tiers[pos] = tier_players(pos_df)
    
    # calculate metrics for how much algorithm wants to draft every position
    pos_metrics = pos_pick(tiers, needed_dist, ideal_dist, draft_priority)
    
    print(means)
    print(stds)
    # normalize predicted points across positions to z-scores
    def z_scorify(row, means, stds):
        return (row['PredFantasyPoints'] - means[row['Position']])/stds[row['Position']]
    df['Z'] = df.apply(z_scorify, args=(means, stds,), axis=1)
    
    # extremify z scores based on z_scales dict
    def z_extremify(row, z_scales):
        return row['Z']*z_scales[row['Position']]
    df['ModZ'] = df.apply(z_extremify, args=(z_scales,), axis=1)
    
    # scale player dist by position metrics to calc overall value
    def mod_points(row, pos_metrics):
        return row['ModZ'] * pos_metrics[row['Position']]
    df['DraftValue'] = df.apply(mod_points, args=(pos_metrics,), axis=1)
    
    top3 = []
    for row in df.sort('DraftValue', ascending=False)[:3].iterrows():
        top3.append(row)
        
    return top3

In [951]:
z_scales = {
    'QB': 1.85,
    'RB': 1.05,
    'TE': 1.08,
    'WR': 1.17,
    'K': 1.0,
    'DEF': 1.0
}

In [953]:
test = draft_pick(All, start_roster, ideal_dist, draft_priority, z_scales)

{'WR': 73.78179837064167, 'DEF': 117.95805245168097, 'RB': 70.47804491125585, 'K': 111.5913865155, 'QB': 175.96331357723483, 'TE': 68.00411417329198}
{'WR': 30.827956462456964, 'DEF': 2.324975785696157, 'RB': 40.2005563996368, 'K': 4.830183761938088, 'QB': 91.13082084945853, 'TE': 23.09056884888942}


In [954]:
test

[(235, Player               Julian Edelman
  PredFantasyPoints           145.504
  Position                         WR
  Z                           2.32652
  ModZ                        2.72203
  DraftValue                  37.8924
  Name: 235, dtype: object), (69, Player               Le'Veon Bell
  PredFantasyPoints         176.624
  Position                       RB
  Z                         2.64042
  ModZ                      2.77244
  DraftValue                36.9836
  Name: 69, dtype: object), (70, Player               Ezekiel Elliott
  PredFantasyPoints            169.257
  Position                          RB
  Z                            2.45714
  ModZ                            2.58
  DraftValue                   34.4165
  Name: 70, dtype: object)]

In [887]:
test['AbsZ'] = abs(test['Z'])
test[['Position', 'AbsZ']].groupby('Position').std()

Unnamed: 0_level_0,AbsZ
Position,Unnamed: 1_level_1
DEF,1.186954
K,4.238982
QB,0.510024
RB,0.547378
TE,0.581201
WR,0.602643


In [917]:
test[test.Position == 'DEF'].sort('PredFantasyPoints', ascending=False).head()

Unnamed: 0,Player,PredFantasyPoints,Position,Z,ModZ,DraftValue
535,MIN,123.275034,DEF,2.286898,2.286898,5.950733
536,ARI,122.011733,DEF,1.743537,1.743537,4.536854
537,DEN,121.640761,DEF,1.583977,1.583977,4.121665
538,HOU,119.707939,DEF,0.752647,0.752647,1.958462
539,SEA,119.518787,DEF,0.671291,0.671291,1.746765


In [919]:
test.sort('DraftValue', ascending=False)

Unnamed: 0,Player,PredFantasyPoints,Position,Z,ModZ,DraftValue
235,Julian Edelman,145.503579,WR,2.326518,2.722026,37.892367
69,Le'Veon Bell,176.624349,RB,2.640419,2.772440,36.983627
70,Ezekiel Elliott,169.256531,RB,2.457142,2.579999,34.416522
236,T.Y. Hilton,138.147442,WR,2.087898,2.442841,34.005940
237,Julio Jones,135.057889,WR,1.987679,2.325585,32.373654
238,Terrelle Pryor Sr.,130.622312,WR,1.843798,2.157243,30.030231
239,Odell Beckham Jr,127.862909,WR,1.754288,2.052517,28.572371
240,DeAndre Hopkins,127.830272,WR,1.753229,2.051278,28.555128
71,DeMarco Murray,151.503950,RB,2.015542,2.116319,28.231146
72,David Johnson,151.073639,RB,2.004838,2.105080,28.081217


In [869]:
test[['Position', 'Z']].groupby('Position').max()

Unnamed: 0_level_0,Z
Position,Unnamed: 1_level_1
DEF,2.286898
K,2.374839
QB,1.497758
RB,2.640419
TE,2.217575
WR,2.326518


In [806]:
test_roster = {
    'QB': 1,
    'RB': 3,
    'WR': 2,
    'TE': 1,
    'DEF': 0,
    'K': 0
}

In [807]:
# total count should be 14

start_roster = {
    'QB': 0,
    'RB': 0,
    'WR': 0,
    'TE': 0,
    'DEF': 0,
    'K': 0
}

# custom for players
ideal_dist = {
    'QB': 1,
    'RB': 5,
    'WR': 5,
    'TE': 1,
    'DEF': 1,
    'K': 1
}

# 1 is max, 0 is lowest
draft_priority = {
    'QB': 3.0,
    'RB': 5.0,
    'WR': 5.0,
    'TE': 3.0,
    'DEF': 1.0,
    'K': 1.0
}

In [692]:
def mock_draft(Tiers, start_roster, ideal_dist, draft_priority):
    for draft_round in range(13):
        pick_dist = pos_pick(Tiers, pos_needed(start_roster, ideal_dist), ideal_dist, draft_priority)
        best_score = 0
        best_pos = ''
        print(pick_dist)
        for pos in pick_dist:
            if pick_dist[pos] > best_score:
                best_score = pick_dist[pos]
                best_pos = pos
                
        print('Drafting '+str(best_pos))
        start_roster[best_pos] = start_roster[best_pos] + 1
    
    return start_roster

In [693]:
mock_draft(Tiers, start_roster, ideal_dist, draft_priority)

{'WR': 13.920651117636007, 'DEF': 2.6020985734576829, 'K': 2.1652475555848723, 'RB': 13.339740571956895, 'QB': 7.7978485245705942, 'TE': 5.352390670581606}
Drafting WR
{'WR': 11.136520894108806, 'DEF': 2.6020985734576829, 'K': 2.1652475555848723, 'RB': 13.339740571956895, 'QB': 7.7978485245705942, 'TE': 5.352390670581606}
Drafting RB
{'WR': 11.136520894108806, 'DEF': 2.6020985734576829, 'K': 2.1652475555848723, 'RB': 10.671792457565516, 'QB': 7.7978485245705942, 'TE': 5.352390670581606}
Drafting WR
{'WR': 8.3523906705816042, 'DEF': 2.6020985734576829, 'K': 2.1652475555848723, 'RB': 10.671792457565516, 'QB': 7.7978485245705942, 'TE': 5.352390670581606}
Drafting RB
{'WR': 8.3523906705816042, 'DEF': 2.6020985734576829, 'K': 2.1652475555848723, 'RB': 8.0038443431741371, 'QB': 7.7978485245705942, 'TE': 5.352390670581606}
Drafting WR
{'WR': 5.5682604470544028, 'DEF': 2.6020985734576829, 'K': 2.1652475555848723, 'RB': 8.0038443431741371, 'QB': 7.7978485245705942, 'TE': 5.352390670581606}
Draf

{'DEF': 1, 'K': 0, 'QB': 1, 'RB': 5, 'TE': 1, 'WR': 5}

In [None]:
class StartRoster:
    

In [3]:
for pos in ['QB', 'RB', 'WR', 'TE', 'K', 'DEF']:   
    url = 'https://football.fantasysports.yahoo.com/f1/draftanalysis?tab=SD&pos='+pos+'&sort=DA_AP'
    r  = requests.get(url, headers={'User-agent': 'Mozilla/5.0'})
    contents = BeautifulSoup(r.text, 'lxml')