# User Inputs

In [1]:
#==========
# General Setting
#==========

# set core path
path = '/Users/Mark/Documents/Github/Fantasy_Football/'

# set name of database
db_name = 'Simulation_Inputs.sqlite3'

# set year
year = 2018

# position constraints, in order: QB, RB, WR, TE, FLEX
pos_require = [1, 0, 0, 1, 6] 

# setting the salary cap
salary_cap = 290

# set inflation amount
inflation = 1.15

my_keepers = {}
my_keepers['Alvin Kamara'] = 31
my_keepers['Nick Chubb'] = 18

# Load Packages

In [14]:
# core packages
import pandas as pd
import numpy as np
import os
import sqlite3
import random

# linear optimization
from cvxopt import matrix
from cvxopt.glpk import ilp
from scipy.stats import skewnorm

# jupyter specifications
pd.options.mode.chained_assignment = None
from IPython.core.interactiveshell import InteractiveShell
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# plotting functions
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
# change directory temporarily to helper scripts
os.chdir(path + 'Scripts/Analysis/Helper_Scripts')

# load custom plot functions
from my_plot import PrettyPlot
PrettyPlot(plt)

# load custom helper functions
from helper_functions import *;

# NEW

In [4]:
# connect to database and pull in positional data
conn = sqlite3.connect(path + 'Data/' + db_name)
qb = pd.read_sql_query('SELECT * FROM ' + 'QB_Sim_' + str(year), con=conn)
rb = pd.read_sql_query('SELECT * FROM ' + 'RB_Sim_' + str(year), con=conn)
wr = pd.read_sql_query('SELECT * FROM ' + 'WR_Sim_' + str(year), con=conn)
te = pd.read_sql_query('SELECT * FROM ' + 'TE_Sim_' + str(year), con=conn)

flex = pd.concat([rb, wr, te], axis=0)

In [5]:
salary_data = pd.read_csv('/Users/Mark/Desktop/Jupyter Projects/Fantasy Football/Projections/salaries.csv')
salary_data = salary_data.dropna(axis=1)

In [6]:
def long_to_wide(df):
    
    # add an index from 0 to 10000 for each player to use at column heading
    df['idx'] = len(df.player.unique())*list(range(0, 10000))
    
    # pivot dataframe to wide format with player on rows and 0-10000 as cols
    df = df.pivot(index='player', columns='idx', values='pred')
    
    return df

In [7]:
def df_shuffle(df):
    
    # store the index before converting to numpy
    idx = df.index
    df = df.values
    
    # shuffle each row separately, inplace, and convert o df
    _ = [np.random.shuffle(i) for i in df]
    
    return pd.DataFrame(df, index=idx)

In [8]:
def random_select(all_df):
    
    names = []
    points = []
    salaries = []
    
    # select random number between 0-10000
    ran_num = random.randint(0, 9999)
    
    for df in all_df:
        names.extend(df.iloc[:, ran_num].index)
        points.extend(df.iloc[:, ran_num].values)
        salaries.extend(df.loc[:, 'salary'] + df.loc[:, 'salary']*(skewnorm.rvs(2, size=1)*5/100))
    
    points = np.array(points)*-1
    
    return names, points, salaries

In [9]:
def A_matrix(all_df, vec):
    
    # get all the row-lengths of each df to determine number of samples
    shapes = [df.shape[0] for df in all_df]
    
    # intialize A matrix by multiplying length one by vec and appending 0 to start pattern
    A = shapes[0]*vec
    A.append(0)

    # repeat the same pattern for the inner position requirements
    for i in range(1, len(shapes)-1):
        
        A.extend(shapes[i]*vec)
        A.append(0)
        
    # adjust the pattern slightly for the final position requirement
    A.extend((shapes[-1]-1)*vec)
    A.append(1)

    # convert A into a matrix for integer optimization
    A = matrix(A, size=(len(vec), np.sum(shapes)), tc='d')
    
    return A

In [19]:
def run_opt(A, points, salaries, salary_cap, pos_require):
    
    c = matrix(points, tc='d')
    G = matrix(salaries, tc='d').T
    h = matrix(salary_cap, size=(1,1), tc='d')
    b = matrix(pos_require, size=(len(pos_require), 1), tc='d')
    
    (status, x) = ilp(c, G, h, A=A, b=b, B=set(range(0, len(points))))
    
    return x

In [20]:
def pull_results(x, names, points, salaries, name_results, point_results, salary_results):
    
    x = np.array(x)[:, 0]==1
    name_results.append(list(np.array(names)[x]))
    point_results.append(list(np.array(points)[x]))
    salary_results.append(list(np.array(salaries)[x]))
    
    return name_results, point_results, salary_results

In [21]:
#==========
# Prepare DataFrames for Simulation
#==========

# convert all data from long to wide and store in a list of dataframes
all_df = [long_to_wide(df) for df in [qb, rb, wr, te, flex]]

# temp add salary before figuring out better way
all_df = [pd.merge(df, salary_data, how='inner', left_index=True, right_on='player').sort_values('salary', ascending=False) for df in all_df]
all_df = [df.set_index('player', drop=True) for df in all_df]

for i in range(0, len(all_df)):
    all_df[i]['salary'] = all_df[i].salary*inflation

In [22]:
# create A matrix
A = A_matrix(all_df, [1, 0, 0, 0, 0])

# create empty matrices
point_results = []
name_results = []
salary_results = []

# loop through N times and simulate best results
for i in range(0, 1000):
    
    # pull out names, points, and salaries from random distribution
    names, points, salaries= random_select(all_df)
    
    # run linear integer optimization
    x = run_opt(A, points, salaries, salary_cap, pos_require)
    
    # pull out the actual names, points, and salaries
    name_results, point_results, salary_results = pull_results(x, names, points, salaries, 
                                                               name_results, point_results, salary_results)

In [23]:
name_results = pd.DataFrame(name_results)
point_results = pd.DataFrame(point_results)*-1
total_points = point_results.sum(axis=1)
salary_results = pd.DataFrame(salary_results)
total_salary = salary_results.sum(axis=1)
results = pd.concat([name_results, total_points, total_salary, point_results, salary_results], axis=1)
results.columns = range(0, results.shape[1])


In [24]:
results = results.sort_values(by=8, ascending=False).reset_index(drop=True)
results

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
0,Drew Brees,David Njoku,Ezekiel Elliott,Leonard Fournette,Alex Collins,JuJu Smith-Schuster,Tevin Coleman,David Njoku,2636.176417,280.745127,...,295.695641,282.601649,4.601870,3.467854,109.070161,90.505028,32.488984,25.527059,11.603209,3.480963
1,Tom Brady,Jesse James,Le'Veon Bell,Josh Gordon,Jarvis Landry,JuJu Smith-Schuster,Duke Johnson,Tevin Coleman,2613.326280,285.786624,...,242.477687,259.414839,20.088676,2.559992,117.345304,45.041632,43.856326,26.076734,18.964898,11.853061
2,Drew Brees,Ben Watson,Le'Veon Bell,Mike Evans,Josh Gordon,JuJu Smith-Schuster,Peyton Barber,Ben Watson,2610.303454,282.448103,...,344.112969,258.028139,4.902903,2.305947,120.038265,77.600495,46.075294,26.675170,2.425015,2.425015
3,Cam Newton,David Njoku,Todd Gurley,Melvin Gordon,Dion Lewis,Corey Davis,Sammy Watkins,Chris Carson,2602.580647,280.694799,...,272.743590,299.598068,9.226764,3.705349,114.418417,89.647419,28.309711,14.154856,14.154856,7.077428
4,Drew Brees,Jordan Reed,Todd Gurley,Julio Jones,JuJu Smith-Schuster,Rex Burkhead,Tevin Coleman,Chris Ivory,2560.529847,289.104161,...,263.203900,307.422175,4.558954,6.921879,115.083174,104.405354,26.101339,17.796367,11.864245,2.372849
5,Cam Newton,Evan Engram,Kareem Hunt,Dalvin Cook,Jay Ajayi,Chris Hogan,Tarik Cohen,Jamaal Williams,2546.598655,283.236538,...,345.196675,299.342724,9.765689,15.597574,96.412080,94.088898,31.362966,23.231827,6.969548,5.807957
6,Russell Wilson,Jack Doyle,Todd Gurley,Melvin Gordon,Alshon Jeffery,Robert Woods,DeVante Parker,Isaiah Crowell,2542.089036,287.857013,...,299.406281,308.727907,8.069234,3.492118,114.532817,89.737052,38.964773,24.795764,5.903753,2.361501
7,Russell Wilson,Jimmy Graham,Leonard Fournette,Christian McCaffrey,Tyreek Hill,Sammy Watkins,Marquise Goodwin,Jimmy Graham,2540.792263,282.926462,...,267.618394,304.641568,8.618530,10.310355,91.114208,82.937292,53.734020,14.017570,11.681309,10.513178
8,Russell Wilson,Jordan Reed,Le'Veon Bell,Christian McCaffrey,Larry Fitzgerald,Devontae Booker,Isaiah Crowell,Marlon Mack,2539.573336,288.495967,...,268.637745,299.425561,8.612214,7.590968,115.695218,82.973338,65.443759,3.505916,2.337277,2.337277
9,Tom Brady,Jesse James,Kareem Hunt,Christian McCaffrey,Jordan Howard,Jordan Reed,Peyton Barber,Jesse James,2538.160740,280.380995,...,326.848505,276.876717,19.049243,2.358420,100.914491,86.324444,59.576025,7.295023,2.431674,2.431674


# OLD

In [None]:
rb = rb.append(rookie_rb).reset_index(drop=True)

In [None]:
flex = rb.append(wr)
flex = flex.reset_index(drop=True)

In [None]:
# set inflation
qb['salary'] = qb.salary * 1.15
rb['salary'] = rb.salary * 1.15
wr['salary'] = wr.salary * 1.15
te['salary'] = te.salary * 1.15
flex['salary'] = flex.salary * 1.15

In [None]:
# remove keepers
keepers = ['Christian McCaffrey',
           'Michael Thomas',
           'Josh Gordon',
           'Larry Fitzgerald',
           'Deshaun Watson',
           'Travis Kelce',
           'Russell Wilson',
           'Todd Gurley',
           'Devin Funchess',
           'David Johnson',
           'Alex Collins',
           'Ezekiel Elliott',
           'Kareem Hunt',
           'Jerick McKinnon',
           'Josh Gordon',
           'Allen Robinson', 
           'Julian Edelman',
           'Tyreek Hill',
           'Marvin Jones']

def drop_keepers(df, keepers):
    for player in keepers:
        idx = df[df.player == player].index
        df = df.drop(idx, axis=0).reset_index(drop=True)
    
    return df

rb = drop_keepers(rb, keepers)
wr = drop_keepers(wr, keepers)
flex = drop_keepers(flex, keepers)
te = drop_keepers(te, keepers)
qb = drop_keepers(qb, keepers)

# Functions

In [None]:
plt.hist(80*(skewnorm.rvs(2, size=1000)*5/100))

In [None]:
from scipy.stats import skewnorm
80+80*(skewnorm.rvs(1, size=1)*4/100)

In [None]:

def grab_player(df):
    
    rindex = random.randint(0, df.shape[0]-1)
    rcol = random.randint(4, 1003)
    
    points = df.iloc[rindex, rcol] + df.iloc[rindex, 3]
    money = df.iloc[rindex, 1] + df.iloc[rindex, 1]*(skewnorm.rvs(.5, size=1)*4/100)
    name = df.iloc[rindex, 0]
    
    return(name, points, money[0])

In [None]:
def pull_specific_player(name):
    
    # specify keepers for team
    alvin_idx = flex[flex.player == 'Alvin Kamara'].index
    dalvin_idx = flex[flex.player == 'Dalvin Cook'].index

    # create temporary flex dataframe that does not contain keepers
    tmp_flex = flex.drop(alvin_idx, axis=0)
    tmp_flex = tmp_flex.drop(dalvin_idx, axis=0).reset_index(drop=True)
    
    if name == None:
        # set specific flex to tmp_flex if no specific player should be pulled
        specific_flex = tmp_flex
    else:
        # set specific flex to the player named in function input
        idx = tmp_flex[tmp_flex.player == name].index[0]
        specific_flex = pd.DataFrame([tmp_flex.iloc[idx, :], tmp_flex.iloc[idx, :]]).reset_index(drop=True)
        
        # drop specific player from tmp_flex that other players are drawn from
        tmp_flex = tmp_flex.drop(idx, axis=0).reset_index(drop=True)
        
    return tmp_flex, specific_flex

In [None]:
def sim_par(n):
        
    rcol = random.randint(4, 1002)
    alvin_pts = rb.iloc[1, rcol] + rb.iloc[1, 3]
    dalvin_pts = rb.iloc[44, rcol] + rb.iloc[44, 3]

    if (n+1) % 500000 == 0:
            print(str(datetime.datetime.now())[:-7])
            print('Completed ' + str(n+1) + ' iterations')
            
    qb_name, qb_pts, qb_money = grab_player(qb)

    rb1_name, rb1_pts, rb1_money = 'Alvin Kamara', alvin_pts, 20
    rb2_name, rb2_pts, rb2_money = 'Dalvin Cook', dalvin_pts, 31
    
    wr1_name, wr1_pts, wr1_money = grab_player(tmp_flex)
    wr2_name, wr2_pts, wr2_money = grab_player(tmp_flex)
        
    te_name, te_pts, te_money = grab_player(te)
        
    flex1_name, flex1_pts, flex1_money = grab_player(tmp_flex)            
    flex2_name, flex2_pts, flex2_money = grab_player(specific_flex)
    
    salary = qb_money + rb1_money + rb2_money + wr1_money + wr2_money + te_money+ flex1_money + flex2_money
    
    y = qb_pts + rb1_pts + rb2_pts + wr1_pts + wr2_pts + te_pts + flex1_pts + flex2_pts - np.exp(.1*(salary-293))

    if (salary > 310) | (salary < 250) | (y < 115):
        return
            
    sort_money = pd.Series([qb_money, rb1_money, rb2_money, wr1_money, wr2_money, te_money, flex1_money, flex2_money],
                                index = [qb_name, rb1_name, rb2_name, wr1_name, wr2_name, te_name, flex1_name, flex2_name]).sort_values(ascending=False)
    
    # 1. extend list w/ player names, 2. append total points, 3. append total salary, 4. extend salary values
    results = list(sort_money.index)
    results.append(y)
    results.append(salary)
    results.extend(sort_money.values)
        
    return results

In [None]:
from multiprocessing import Pool
import time
import random

def run_sim_par(iterations):
    
    p = Pool(processes=4)

    t1 = time.time()
    
    result = p.map(sim_par, range(iterations))
    p.close()
    p.join()
    
    df = pd.DataFrame([i for i in result if i is not None])
    df.columns = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'ppg', 'total_salary',
                  'm1', 'm2', 'm3', 'm4', 'm5', 'm6', 'm7', 'm8']
    
    print('Time Elapsed: ' + str(round(time.time()-t1,2)))

    return df

In [None]:
def show_results(results, cols, added_cost=0):
    
    # plot histogram and number of results that made it
    print('')
    print('o Number of Results Above 115 PPG:', results.shape[0])
    print('')
    print('')                           
    
    for col in cols:
        
        counts = pd.DataFrame(results.loc[:,'p'+col].value_counts()[:15])
        money = pd.DataFrame(results.groupby('p'+col)['m'+col].agg(np.mean).astype('int'))

        combined = pd.merge(counts, money, how='inner', left_index=True, right_index=True)
        combined.columns = ['Counts', 'Avg_Cost']  
        combined['Remaining $'] = 300 - (combined.Avg_Cost + added_cost + 30 + 21).astype('int')
        
        print('o Most Common ' + '#' + col + ' Players')
        print('------------------------')
        display(combined)
        print('')
        print('')

# Standard Simulation

In [None]:
tmp_flex, specific_flex = pull_specific_player(None)
df = run_sim_par(3000000)

In [None]:
show_results(df, ['1', '2', '3', '4'], added_cost=0)

## Le'Veon First Pick

In [None]:
tmp_flex, specific_flex = pull_specific_player("Le'Veon Bell")
lev = run_sim_par(3000000)

In [None]:
show_results(lev, ['2', '3', '4'], added_cost=115)

## Le'Veon First Pick + Require WR

In [None]:
tmp_flex, specific_flex = pull_specific_player("Le'Veon Bell")
lev_wr = run_sim_par(3000000)

In [None]:
show_results(combined_lev, ['2', '3', '4', '5'], added_cost = 115)

## AB First Pick

In [None]:
tmp_flex, specific_flex = pull_specific_player('Antonio Brown')
combined_ab = run_sim_par(3000000)

In [None]:
show_results(combined_ab, ['1', '2', '3', '4'], added_cost = 108)

## DeAndre First Pick

In [None]:
tmp_flex, specific_flex = pull_specific_player('DeAndre Hopkins')
combined_deandre = run_sim_par(3000000)

In [None]:
show_results(combined_deandre, ['1', '2', '3', '4'], added_cost = 100)

## Odell First Pick

In [None]:
tmp_flex, specific_flex = pull_specific_player('Odell Beckham')
combined_obj = run_sim_par(3000000)

In [None]:
show_results(combined_obj, ['1', '2', '3', '4'], added_cost=97)

# Late Round Dudes

# Create Visualizations

In [None]:
names = ["L.Bell", 'M.Evans', 'D.Adams', 'TY.Hilton', 'J.Mixon', 'AJ.Green', 'M.Gordon']

In [None]:
names_evans = ['', '', '', '', '', '', '',
               'B.Cooks', 'A.Cooper', 'D.Baldwin', 'K.Drake', 'J.Landry']

In [None]:
names_adams = ['', '', '', '', '', '', '', 
               '', '', '', '', '', 
               'B.Cooks', 'J.Landry', 'A.Cooper', 'K.Drake', 'D.Henry']

In [None]:
names_hilton = ['', '', '', '', '', '', '',
                '', '', '', '', '', 
                '', '', '', '', '', 
                'B.Cooks', 'J.Landry', 'D.Baldwin', 'G.Tate', 'A.Cooper']

In [None]:
names_mixon  = ['', '', '', '', '', '', '',
                '', '', '', '', '', 
                '', '', '', '', '',
                '', '', '', '', '',
                'B.Cooks', 'J.Landry', 'D.Baldwin', 'A.Cooper', 'D.Thomas']

In [None]:
names_green  = ['', '', '', '', '', '', '',
                '', '', '', '', '', 
                '', '', '', '', '',
                '', '', '', '', '',
                '', '', '', '', '',
                'JuJu', 'A.Rodgers', 'M.Crabtree', 'E.Sanders', 'R.Woods']

In [None]:
names_melvin  = ['', '', '', '', '', '', '',
                 '', '', '', '', '', 
                 '', '', '', '', '',
                 '', '', '', '', '',
                 '', '', '', '', '',
                 '', '', '', '', '',
                'JuJu', 'A.Rodgers', 'C.Hogan', 'M.Crabtree', 'R.Woods']

In [None]:
names_evans_cooks  = ['', '', '', '', '', '', '',
                 '', '', '', '', '', 
                 '', '', '', '', '',
                 '', '', '', '', '',
                 '', '', '', '', '',
                 '', '', '', '', '',
                 '', '', '', '', '',
                'C.Newton', 'R.Anderson', 'W.Fuller', 'D.Brees']

In [None]:
combined_3[(combined_3.p2 == 'Mike Evans') & (combined_3.p3 == 'Kenyan Drake')].p6.value_counts()[:5]

In [None]:
all_greens = [(56,144,161), (63,161,180), (76,173,192), (95,182,199), (114,191,206), (133,199,212), (152,208,219)]
greens = ['', '', '', '', '', '', '',
          (50,138,152), (76,173,192), (95,182,199), (133,199,212), (160,220,227)]

all_yellows=[(255,188,13), (255,195,38), (255,202,64), (255,209,89), (255,216,115), (255,223,140), (255,230,166)]
yellows=[(0,0,0), (255,188,13), (255,195,38), (255,202,64), (255,209,89), (255,216,115), (255,223,140)]

all_reds = [(189,81,4), (213,92,5), (238,103,5), (250,116,19), (251,131,44), (251,145,69), (252,160,94)]
reds=[(50,50,50), (180,71,3), (213,92,5), (250,116,19), (251,145,69), (252,160,94), (254,180,112)]

In [None]:
from pygraphviz import *

A=AGraph()

# set some default node attributes
A.node_attr['style']='filled'
A.node_attr['shape']='circle'
A.node_attr['fixedsize']='true'
A.node_attr['fontcolor']='#FFFFFF'

for i in range(7):
    A.add_edge(0,i)
    n=A.get_node(i)
    n.attr['fillcolor']='#%02x%02x%02x' % reds[i]
    n.attr['width']= 1
    n.attr['label'] = names[i]
    
# mike evans
for i in range(7,12):
    A.add_edge(1,i)
    n=A.get_node(i)
    n.attr['fillcolor']='#%02x%02x%02x' % greens[i]
    n.attr['width']= 1
    n.attr['label'] = names_evans[i]
    
# davante adams
for i in range(12,17):
    A.add_edge(2,i)
    n=A.get_node(i)
    n.attr['fillcolor']='#%02x%02x%02x' % greens[i-5]
    n.attr['width']= 1
    n.attr['label'] = names_adams[i]
    
# ty hilton
for i in range(17,22):
    A.add_edge(3,i)
    n=A.get_node(i)
    n.attr['fillcolor']='#%02x%02x%02x' % greens[i-10]
    n.attr['width']= 1
    n.attr['label'] = names_hilton[i]
    
# joe mixon
for i in range(22,27):
    A.add_edge(4,i)
    n=A.get_node(i)
    n.attr['fillcolor']='#%02x%02x%02x' % greens[i-15]
    n.attr['width']= 1
    n.attr['label'] = names_mixon[i]
    
# aj green
for i in range(27,32):
    A.add_edge(5,i)
    n=A.get_node(i)
    n.attr['fillcolor']='#%02x%02x%02x' % greens[i-20]
    n.attr['width']= 1
    n.attr['label'] = names_green[i]
    
# melvin gordon
for i in range(32,37):
    A.add_edge(6,i)
    n=A.get_node(i)
    n.attr['fillcolor']='#%02x%02x%02x' % greens[i-25]
    n.attr['width']= 1
    n.attr['label'] = names_melvin[i]
    

#print(A.string()) # print to screen
A.draw('star.pdf',prog="circo") # draw to png using circo

In [None]:
# evans - cooks
for i in range(37,41):
    A.add_edge(7,i)
    n=A.get_node(i)
    #n.attr['fillcolor']='#%02x%02x%02x' % greens[i-25]
    n.attr['width']= 1
    n.attr['label'] = names_evans_cooks[i]
    
# evans - cooks
for i in range(41,45):
    A.add_edge(8,i)
    n=A.get_node(i)
    #n.attr['fillcolor']='#%02x%02x%02x' % greens[i-25]
    n.attr['width']= 1
    n.attr['label'] = names_evans_cooks[i-4]

In [None]:
combined_3.p4.value_counts()[2:10]