# User Inputs

In [None]:
#==========
# General Setting
#==========

# set core path
path = '/Users/Mark/Documents/Github/Fantasy_Football/'

# set name of database
db_name = 'Simulation_Inputs.sqlite3'

# set year
year = 2018

# position constraints, in order: QB, RB, WR, TE, FLEX
pos_require = [1, 0, 0, 1, 5] 

# setting the salary cap
salary_cap = 290

# set inflation amount
inflation = 1.15

my_keepers = {}
my_keepers['Alvin Kamara'] = 31
my_keepers['Nick Chubb'] = 18

# Load Packages

In [None]:
# core packages
import pandas as pd
import numpy as np
import os
import sqlite3
import random
import time

# linear optimization
from cvxopt import matrix
from cvxopt.glpk import ilp
from scipy.stats import skewnorm

# jupyter specifications
pd.options.mode.chained_assignment = None
from IPython.core.interactiveshell import InteractiveShell
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# plotting functions
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# change directory temporarily to helper scripts
os.chdir(path + 'Scripts/Analysis/Helper_Scripts')

# load custom plot functions
from my_plot import PrettyPlot
PrettyPlot(plt)

# load custom helper functions
from helper_functions import *;

# Pull in Data

In [None]:
# connect to database and pull in positional data
conn = sqlite3.connect(path + 'Data/' + db_name)
qb = pd.read_sql_query('SELECT * FROM ' + 'QB_Sim_' + str(year), con=conn)
rb = pd.read_sql_query('SELECT * FROM ' + 'RB_Sim_' + str(year), con=conn)
wr = pd.read_sql_query('SELECT * FROM ' + 'WR_Sim_' + str(year), con=conn)
te = pd.read_sql_query('SELECT * FROM ' + 'TE_Sim_' + str(year), con=conn)

flex = pd.concat([rb, wr, te], axis=0)

In [None]:
salary_data = pd.read_csv('/Users/Mark/Desktop/Jupyter Projects/Fantasy Football/Projections/salaries.csv')
salary_data = salary_data.dropna(axis=1)

# Helper Functions

In [None]:
def long_to_wide(df):
    
    # add an index from 0 to 10000 for each player to use at column heading
    df['idx'] = len(df.player.unique())*list(range(0, 10000))
    
    # pivot dataframe to wide format with player on rows and 0-10000 as cols
    df = df.pivot(index='player', columns='idx', values='pred')
    
    return df

In [None]:
def df_shuffle(df):
    
    # store the index before converting to numpy
    idx = df.index
    df = df.values
    
    # shuffle each row separately, inplace, and convert o df
    _ = [np.random.shuffle(i) for i in df]
    
    return pd.DataFrame(df, index=idx)

In [None]:
def skews(all_df):
    
    # initialize numpy array for appending skews
    salary_skews = np.zeros(shape=(1,10000), dtype='float')
    
    # loop through each positional dataframe
    for df in all_df:
        
        # pull out the salary column and convert to numpy array
        salaries = df.loc[:, 'salary'].values.reshape(-1,1)
        
        # create a skews normal distribution of uncertainty for salaries
        skews = (skewnorm.rvs(2, size=10000)*5/100).reshape(1, -1)
        
        # create a p x m matrix with dot product, where p is the number of players
        # and m is the number of skewed uncertainties, e.g. 320 players x 10000 skewed errors
        skews = np.dot(salaries, skews)
        
        # concatenate each position's results
        salary_skews = np.concatenate((salary_skews, skews))
    
    return salary_skews[1:,:]

In [None]:
def random_select(all_df, salary_skews):
    
    names = []
    points = []
    salaries = []
    
    # select random number between 0-10000
    ran_num = random.randint(0, 9999)
    
    for df in all_df:
        names.extend(df.iloc[:, ran_num].index)
        points.extend(df.iloc[:, ran_num].values)
        salaries.extend(df.loc[:, 'salary'].values)
    
    names = np.array(names)
    points = np.array(points)*-1
    salaries = salaries + salary_skews[:, ran_num]
    
    return names, points, salaries

In [None]:
def A_matrix(all_df, vec):
    '''
    This function creates the A matrix that is critical for the ILP solution being equal
    to the positional constraints specified. I identified the given pattern empirically:
    1. Repeat the vector [1, 0, 0, 0, ...] N times for each player for a given position.
       The number of trailing zeros is equal to p-1 positions to draft.
    2. After the above vector is repeated N times for a given player, append a 0 before
       repeating the same pattern for the next player. Repeat for all players up until the 
       last position.
    3. for the last poition, repeat the pattern N-1 times and append a 1 at the end.
    This pattern allows the b vector, e.g. [1, 2, 2, 1] to set the constraints on the positions
    selected by the ILP solution.
    '''
    # get all the row-lengths of each df to determine number of samples
    shapes = [df.shape[0] for df in all_df]
    
    # intialize A matrix by multiplying length one by vec and appending 0 to start pattern
    A = shapes[0]*vec
    A.append(0)

    # repeat the same pattern for the inner position requirements
    for i in range(1, len(shapes)-1):
        
        A.extend(shapes[i]*vec)
        A.append(0)
        
    # adjust the pattern slightly for the final position requirement
    A.extend((shapes[-1]-1)*vec)
    A.append(1)

    # convert A into a matrix for integer optimization
    A = matrix(A, size=(len(vec), np.sum(shapes)), tc='d')
    
    return A

In [None]:
def run_opt(A, points, salaries, salary_cap, pos_require):
    
    '''
    This function sets up and solves the integer Linear Programming problem 
    c = n x 1 -- c is the vector of points to be optimized
    G = m x n -- G is the salaries of the corresponding players / points (m=1 in this case)
    h = m x 1 -- h is the salary cap (m=1 in this case)
    A = p x n -- A sparse binary matrix that must be developed so b equals player constraints
    b = p x 1 -- b is a vector with player requirements, e.g. [QB, RB, WR] = [1, 2, 2]
    
    Solve:
    c'*n -- minimize
    
    Subject to:
    G*x <= h
    A*x = b
    '''
    
    # generate the c matrix with the point values to be optimized
    c = matrix(points, tc='d')
    
    # generate the G matrix that contains the salary values for constraining
    G = matrix(salaries, tc='d').T
    
    # generate the h matrix with the salary cap constraint
    h = matrix(salary_cap, size=(1,1), tc='d')
    
    # generate the b matrix with the number of position constraints
    b = matrix(pos_require, size=(len(pos_require), 1), tc='d')
    
    # solve the integer LP problem
    (status, x) = ilp(c, G, h, A=A, b=b, B=set(range(0, len(points))))
    
    return x

In [None]:
def pull_results(x, names, points, salaries, results, counts):
    
    # find all LP results chosen and equal to 1
    x = np.array(x)[:, 0]==1
    
    for i, p in enumerate(names[x]):
        
        counts['names'][p] += 1
    
        if counts['points'][p] == 0:
            counts['points'][p] = []
        counts['points'][p].append(points[x][i])
    
        if counts['salary'][p] == 0:
            counts['salary'][p] = []
        counts['salary'][p].append(salaries[x][i])
    
    # pull out the corresponding names, points, and salaries for chosen players
    # to append to the higher level results dataframes
    results['names'].append(list(names[x]))
    results['points'].append(list(points[x]))
    results['salary'].append(list(salaries[x]))
    
    
    
    return results, counts

# Prepate Data for Simulation

In [None]:
#==========
# Prepare DataFrames for Simulation
#==========

#--------
# Extract Data to Wide Format
#--------

# convert all data from long to wide and store in a list of dataframes
all_df = [long_to_wide(df) for df in [qb, rb, wr, te, flex]]

#--------
# Merge Salary to Player and Add Inflation
#--------

# temp add salary before figuring out better way
all_df = [pd.merge(df, salary_data, how='inner', left_index=True, right_on='player').sort_values('salary', ascending=False) for df in all_df]
all_df = [df.set_index('player', drop=True) for df in all_df]

for i in range(0, len(all_df)):
    all_df[i]['salary'] = all_df[i].salary*inflation

#--------
# Calculate Salary Skews
#--------

salary_skews = skews(all_df)

# Run Simulation

In [None]:
#==========
# Run the Simulation
#==========

# create A matrix
vec = [1]
vec.extend([0]*(len(pos_require)-1))
A = A_matrix(all_df, vec)

# create empty matrices
results = {}
results['names'] = []
results['points'] = []
results['salary'] = []

counts = {}
counts['names'] = pd.Series(0, index=names).to_dict()
counts['points'] = pd.Series([0], index=names).to_dict()
counts['salary'] = pd.Series([0], index=names).to_dict()

start = time.time()
# loop through N times and simulate best results
for i in range(0, 1000):
    
    # pull out names, points, and salaries from random distribution
    names, points, salaries= random_select(all_df, salary_skews)
    
    # run linear integer optimization
    x = run_opt(A, points, salaries, salary_cap, pos_require)
    
    # pull out the actual names, points, and salaries
    results, counts = pull_results(x, names, points, salaries, results, counts)
    
time.time()-start

In [None]:
%timeit names, points, salaries= random_select(all_df, salary_skews)

In [None]:
%timeit run_opt(A, points, salaries, salary_cap, pos_require)

In [None]:
%timeit pull_results(x, names, points, salaries, results, counts)

In [None]:
name_results = pd.DataFrame(name_results)
point_results = pd.DataFrame(point_results)*-1
total_points = point_results.sum(axis=1)
salary_results = pd.DataFrame(salary_results)
total_salary = salary_results.sum(axis=1)
results = pd.concat([name_results, total_points, total_salary, point_results, salary_results], axis=1)
results.columns = range(0, results.shape[1])

In [None]:
results = results.sort_values(by=7, ascending=False).reset_index(drop=True)
results

# OLD

In [None]:
rb = rb.append(rookie_rb).reset_index(drop=True)

In [None]:
flex = rb.append(wr)
flex = flex.reset_index(drop=True)

In [None]:
# set inflation
qb['salary'] = qb.salary * 1.15
rb['salary'] = rb.salary * 1.15
wr['salary'] = wr.salary * 1.15
te['salary'] = te.salary * 1.15
flex['salary'] = flex.salary * 1.15

In [None]:
# remove keepers
keepers = ['Christian McCaffrey',
           'Michael Thomas',
           'Josh Gordon',
           'Larry Fitzgerald',
           'Deshaun Watson',
           'Travis Kelce',
           'Russell Wilson',
           'Todd Gurley',
           'Devin Funchess',
           'David Johnson',
           'Alex Collins',
           'Ezekiel Elliott',
           'Kareem Hunt',
           'Jerick McKinnon',
           'Josh Gordon',
           'Allen Robinson', 
           'Julian Edelman',
           'Tyreek Hill',
           'Marvin Jones']

def drop_keepers(df, keepers):
    for player in keepers:
        idx = df[df.player == player].index
        df = df.drop(idx, axis=0).reset_index(drop=True)
    
    return df

rb = drop_keepers(rb, keepers)
wr = drop_keepers(wr, keepers)
flex = drop_keepers(flex, keepers)
te = drop_keepers(te, keepers)
qb = drop_keepers(qb, keepers)

# Functions

In [None]:
plt.hist(80*(skewnorm.rvs(2, size=1000)*5/100))

In [None]:
from scipy.stats import skewnorm
80+80*(skewnorm.rvs(1, size=1)*4/100)

In [None]:

def grab_player(df):
    
    rindex = random.randint(0, df.shape[0]-1)
    rcol = random.randint(4, 1003)
    
    points = df.iloc[rindex, rcol] + df.iloc[rindex, 3]
    money = df.iloc[rindex, 1] + df.iloc[rindex, 1]*(skewnorm.rvs(.5, size=1)*4/100)
    name = df.iloc[rindex, 0]
    
    return(name, points, money[0])

In [None]:
def pull_specific_player(name):
    
    # specify keepers for team
    alvin_idx = flex[flex.player == 'Alvin Kamara'].index
    dalvin_idx = flex[flex.player == 'Dalvin Cook'].index

    # create temporary flex dataframe that does not contain keepers
    tmp_flex = flex.drop(alvin_idx, axis=0)
    tmp_flex = tmp_flex.drop(dalvin_idx, axis=0).reset_index(drop=True)
    
    if name == None:
        # set specific flex to tmp_flex if no specific player should be pulled
        specific_flex = tmp_flex
    else:
        # set specific flex to the player named in function input
        idx = tmp_flex[tmp_flex.player == name].index[0]
        specific_flex = pd.DataFrame([tmp_flex.iloc[idx, :], tmp_flex.iloc[idx, :]]).reset_index(drop=True)
        
        # drop specific player from tmp_flex that other players are drawn from
        tmp_flex = tmp_flex.drop(idx, axis=0).reset_index(drop=True)
        
    return tmp_flex, specific_flex

In [None]:
def sim_par(n):
        
    rcol = random.randint(4, 1002)
    alvin_pts = rb.iloc[1, rcol] + rb.iloc[1, 3]
    dalvin_pts = rb.iloc[44, rcol] + rb.iloc[44, 3]

    if (n+1) % 500000 == 0:
            print(str(datetime.datetime.now())[:-7])
            print('Completed ' + str(n+1) + ' iterations')
            
    qb_name, qb_pts, qb_money = grab_player(qb)

    rb1_name, rb1_pts, rb1_money = 'Alvin Kamara', alvin_pts, 20
    rb2_name, rb2_pts, rb2_money = 'Dalvin Cook', dalvin_pts, 31
    
    wr1_name, wr1_pts, wr1_money = grab_player(tmp_flex)
    wr2_name, wr2_pts, wr2_money = grab_player(tmp_flex)
        
    te_name, te_pts, te_money = grab_player(te)
        
    flex1_name, flex1_pts, flex1_money = grab_player(tmp_flex)            
    flex2_name, flex2_pts, flex2_money = grab_player(specific_flex)
    
    salary = qb_money + rb1_money + rb2_money + wr1_money + wr2_money + te_money+ flex1_money + flex2_money
    
    y = qb_pts + rb1_pts + rb2_pts + wr1_pts + wr2_pts + te_pts + flex1_pts + flex2_pts - np.exp(.1*(salary-293))

    if (salary > 310) | (salary < 250) | (y < 115):
        return
            
    sort_money = pd.Series([qb_money, rb1_money, rb2_money, wr1_money, wr2_money, te_money, flex1_money, flex2_money],
                                index = [qb_name, rb1_name, rb2_name, wr1_name, wr2_name, te_name, flex1_name, flex2_name]).sort_values(ascending=False)
    
    # 1. extend list w/ player names, 2. append total points, 3. append total salary, 4. extend salary values
    results = list(sort_money.index)
    results.append(y)
    results.append(salary)
    results.extend(sort_money.values)
        
    return results

In [None]:
from multiprocessing import Pool
import time
import random

def run_sim_par(iterations):
    
    p = Pool(processes=4)

    t1 = time.time()
    
    result = p.map(sim_par, range(iterations))
    p.close()
    p.join()
    
    df = pd.DataFrame([i for i in result if i is not None])
    df.columns = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'ppg', 'total_salary',
                  'm1', 'm2', 'm3', 'm4', 'm5', 'm6', 'm7', 'm8']
    
    print('Time Elapsed: ' + str(round(time.time()-t1,2)))

    return df

In [None]:
def show_results(results, cols, added_cost=0):
    
    # plot histogram and number of results that made it
    print('')
    print('o Number of Results Above 115 PPG:', results.shape[0])
    print('')
    print('')                           
    
    for col in cols:
        
        counts = pd.DataFrame(results.loc[:,'p'+col].value_counts()[:15])
        money = pd.DataFrame(results.groupby('p'+col)['m'+col].agg(np.mean).astype('int'))

        combined = pd.merge(counts, money, how='inner', left_index=True, right_index=True)
        combined.columns = ['Counts', 'Avg_Cost']  
        combined['Remaining $'] = 300 - (combined.Avg_Cost + added_cost + 30 + 21).astype('int')
        
        print('o Most Common ' + '#' + col + ' Players')
        print('------------------------')
        display(combined)
        print('')
        print('')

# Standard Simulation

In [None]:
tmp_flex, specific_flex = pull_specific_player(None)
df = run_sim_par(3000000)

In [None]:
show_results(df, ['1', '2', '3', '4'], added_cost=0)

## Le'Veon First Pick

In [None]:
tmp_flex, specific_flex = pull_specific_player("Le'Veon Bell")
lev = run_sim_par(3000000)

In [None]:
show_results(lev, ['2', '3', '4'], added_cost=115)

## Le'Veon First Pick + Require WR

In [None]:
tmp_flex, specific_flex = pull_specific_player("Le'Veon Bell")
lev_wr = run_sim_par(3000000)

In [None]:
show_results(combined_lev, ['2', '3', '4', '5'], added_cost = 115)

## AB First Pick

In [None]:
tmp_flex, specific_flex = pull_specific_player('Antonio Brown')
combined_ab = run_sim_par(3000000)

In [None]:
show_results(combined_ab, ['1', '2', '3', '4'], added_cost = 108)

## DeAndre First Pick

In [None]:
tmp_flex, specific_flex = pull_specific_player('DeAndre Hopkins')
combined_deandre = run_sim_par(3000000)

In [None]:
show_results(combined_deandre, ['1', '2', '3', '4'], added_cost = 100)

## Odell First Pick

In [None]:
tmp_flex, specific_flex = pull_specific_player('Odell Beckham')
combined_obj = run_sim_par(3000000)

In [None]:
show_results(combined_obj, ['1', '2', '3', '4'], added_cost=97)

# Late Round Dudes

# Create Visualizations

In [None]:
names = ["L.Bell", 'M.Evans', 'D.Adams', 'TY.Hilton', 'J.Mixon', 'AJ.Green', 'M.Gordon']

In [None]:
names_evans = ['', '', '', '', '', '', '',
               'B.Cooks', 'A.Cooper', 'D.Baldwin', 'K.Drake', 'J.Landry']

In [None]:
names_adams = ['', '', '', '', '', '', '', 
               '', '', '', '', '', 
               'B.Cooks', 'J.Landry', 'A.Cooper', 'K.Drake', 'D.Henry']

In [None]:
names_hilton = ['', '', '', '', '', '', '',
                '', '', '', '', '', 
                '', '', '', '', '', 
                'B.Cooks', 'J.Landry', 'D.Baldwin', 'G.Tate', 'A.Cooper']

In [None]:
names_mixon  = ['', '', '', '', '', '', '',
                '', '', '', '', '', 
                '', '', '', '', '',
                '', '', '', '', '',
                'B.Cooks', 'J.Landry', 'D.Baldwin', 'A.Cooper', 'D.Thomas']

In [None]:
names_green  = ['', '', '', '', '', '', '',
                '', '', '', '', '', 
                '', '', '', '', '',
                '', '', '', '', '',
                '', '', '', '', '',
                'JuJu', 'A.Rodgers', 'M.Crabtree', 'E.Sanders', 'R.Woods']

In [None]:
names_melvin  = ['', '', '', '', '', '', '',
                 '', '', '', '', '', 
                 '', '', '', '', '',
                 '', '', '', '', '',
                 '', '', '', '', '',
                 '', '', '', '', '',
                'JuJu', 'A.Rodgers', 'C.Hogan', 'M.Crabtree', 'R.Woods']

In [None]:
names_evans_cooks  = ['', '', '', '', '', '', '',
                 '', '', '', '', '', 
                 '', '', '', '', '',
                 '', '', '', '', '',
                 '', '', '', '', '',
                 '', '', '', '', '',
                 '', '', '', '', '',
                'C.Newton', 'R.Anderson', 'W.Fuller', 'D.Brees']

In [None]:
combined_3[(combined_3.p2 == 'Mike Evans') & (combined_3.p3 == 'Kenyan Drake')].p6.value_counts()[:5]

In [None]:
all_greens = [(56,144,161), (63,161,180), (76,173,192), (95,182,199), (114,191,206), (133,199,212), (152,208,219)]
greens = ['', '', '', '', '', '', '',
          (50,138,152), (76,173,192), (95,182,199), (133,199,212), (160,220,227)]

all_yellows=[(255,188,13), (255,195,38), (255,202,64), (255,209,89), (255,216,115), (255,223,140), (255,230,166)]
yellows=[(0,0,0), (255,188,13), (255,195,38), (255,202,64), (255,209,89), (255,216,115), (255,223,140)]

all_reds = [(189,81,4), (213,92,5), (238,103,5), (250,116,19), (251,131,44), (251,145,69), (252,160,94)]
reds=[(50,50,50), (180,71,3), (213,92,5), (250,116,19), (251,145,69), (252,160,94), (254,180,112)]

In [None]:
from pygraphviz import *

A=AGraph()

# set some default node attributes
A.node_attr['style']='filled'
A.node_attr['shape']='circle'
A.node_attr['fixedsize']='true'
A.node_attr['fontcolor']='#FFFFFF'

for i in range(7):
    A.add_edge(0,i)
    n=A.get_node(i)
    n.attr['fillcolor']='#%02x%02x%02x' % reds[i]
    n.attr['width']= 1
    n.attr['label'] = names[i]
    
# mike evans
for i in range(7,12):
    A.add_edge(1,i)
    n=A.get_node(i)
    n.attr['fillcolor']='#%02x%02x%02x' % greens[i]
    n.attr['width']= 1
    n.attr['label'] = names_evans[i]
    
# davante adams
for i in range(12,17):
    A.add_edge(2,i)
    n=A.get_node(i)
    n.attr['fillcolor']='#%02x%02x%02x' % greens[i-5]
    n.attr['width']= 1
    n.attr['label'] = names_adams[i]
    
# ty hilton
for i in range(17,22):
    A.add_edge(3,i)
    n=A.get_node(i)
    n.attr['fillcolor']='#%02x%02x%02x' % greens[i-10]
    n.attr['width']= 1
    n.attr['label'] = names_hilton[i]
    
# joe mixon
for i in range(22,27):
    A.add_edge(4,i)
    n=A.get_node(i)
    n.attr['fillcolor']='#%02x%02x%02x' % greens[i-15]
    n.attr['width']= 1
    n.attr['label'] = names_mixon[i]
    
# aj green
for i in range(27,32):
    A.add_edge(5,i)
    n=A.get_node(i)
    n.attr['fillcolor']='#%02x%02x%02x' % greens[i-20]
    n.attr['width']= 1
    n.attr['label'] = names_green[i]
    
# melvin gordon
for i in range(32,37):
    A.add_edge(6,i)
    n=A.get_node(i)
    n.attr['fillcolor']='#%02x%02x%02x' % greens[i-25]
    n.attr['width']= 1
    n.attr['label'] = names_melvin[i]
    

#print(A.string()) # print to screen
A.draw('star.pdf',prog="circo") # draw to png using circo

In [None]:
# evans - cooks
for i in range(37,41):
    A.add_edge(7,i)
    n=A.get_node(i)
    #n.attr['fillcolor']='#%02x%02x%02x' % greens[i-25]
    n.attr['width']= 1
    n.attr['label'] = names_evans_cooks[i]
    
# evans - cooks
for i in range(41,45):
    A.add_edge(8,i)
    n=A.get_node(i)
    #n.attr['fillcolor']='#%02x%02x%02x' % greens[i-25]
    n.attr['width']= 1
    n.attr['label'] = names_evans_cooks[i-4]

In [None]:
combined_3.p4.value_counts()[2:10]