# User Inputs

In [8]:
#==========
# General Setting
#==========

# set core path
path = '/Users/Mark/Documents/Github/Fantasy_Football/'

# postgres login information
pg_log = {
    'USER': 'postgres',
    'PASSWORD': 'Ctdim#1bf!!!!!',
    'HOST': 'localhost',
    'PORT': '5432', 
    'DATABASE_NAME': 'fantasyfootball'
}

# create engine for connecting to database
engine = create_engine('postgres+psycopg2://{}:{}@{}:{}/{}'.format(pg_log['USER'], pg_log['PASSWORD'], pg_log['HOST'],
                                                                   pg_log['PORT'], pg_log['DATABASE_NAME']))

# define dictionary that contains all relevant point values
pts_dict = {}
pts_dict['QB'] = [0.04, 5, 0.1, 7, -2, -1]
pts_dict['RB'] = [0.1, 0.1, 0.5, 7]
pts_dict['WR'] = [0.1, 0.5, 7]
pts_dict['TE'] = [0.1, 0.5, 7]

# set random user id
user_id=20

# specify schema and table to write out intermediate results
write_info = {
    'schema': 'website',
    'table': 'test',
    'if_exists': 'append'
}

# set year
year = 2018

# Load Packages

In [9]:
# core packages
import pandas as pd
import numpy as np
import os
import sqlite3
import random
import time

# sql packages
import sqlalchemy
import psycopg2
from sqlalchemy import create_engine

# linear optimization
from cvxopt import matrix
from cvxopt.glpk import ilp
from scipy.stats import skewnorm

# jupyter specifications
pd.options.mode.chained_assignment = None
from IPython.core.interactiveshell import InteractiveShell
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# plotting functions
import matplotlib.pyplot as plt
import seaborn as sns

In [10]:
# change directory temporarily to helper scripts
os.chdir(path + 'Scripts/Analysis/Helper_Scripts')

# load custom plot functions
from my_plot import PrettyPlot
PrettyPlot(plt)

# load custom helper functions
from helper_functions import *;

# Pull in Data

In [11]:
salary_data = pd.read_csv('/Users/Mark/Desktop/Jupyter Projects/Fantasy Football/Projections/salaries.csv')
salary_data = salary_data.dropna(axis=1)

salary_data.to_sql('salaries', engine, schema='websitedev', 
                    if_exists='replace', index=False)

In [12]:
#%%writefile "/Users/Mark/Documents/Github/Fantasy_Football/Scripts/Analysis/simulation.py"

def create_player_map(p):

    # create a list of unique players and associated ID
    player_list = pd.DataFrame(p)
    player_list['map_id'] = range(0, player_list.shape[0])
    
    # create the player name to integer mapping dictionary
    player_list = player_list.set_index(0).to_dict()
    player_to_int = player_list['map_id']
    
    # reverse it and create the integer to player mapping
    int_to_player = {}
    for key, val in player_to_int.items():
        int_to_player[val] = key
        
    return player_to_int, int_to_player

def custom_data(pg_log, write_info, pts_dict, user_id, prior_repeats=5, 
                dist_size=1500, set_year=2018, show_plots=False):
    
        '''
        The initialization of this Class reads in all of the statistical projection data and
        translates it into clusters and projection distributions given a particular scoring schema.
        The data is then stored in the self.data object, which will be accessed through the analysis.
        
        Input: A database that contains statistical projections, a dictionary that contains the points
               for each category, and number of prior repeats to use for Bayesian updating.
        Return: Stores all the player projection distributions in that self.data object.
        '''
        
        # create empty dataframe to store all player distributions
        data = pd.DataFrame()
        
        # create engine for connecting to database
        engine = create_engine('postgres+psycopg2://{}:{}@{}:{}/{}'.format(pg_log['USER'], 
                                                                            pg_log['PASSWORD'],
                                                                            pg_log['HOST'],
                                                                            pg_log['PORT'],
                                                                            pg_log['DATABASE_NAME']))
        
        #==========
        # Loop through each position and pull / analyze the data
        #==========
        
        for pos in ['aQB', 'bRB', 'cWR', 'dTE']:
                
            # print current position update
            print('Loading and Preparing ' + pos[1:] + ' Data')
            
            #--------
            # Connect to Database and Pull Player Data
            #--------
            
            df_train_results = pd.read_sql_query('SELECT * FROM {}."{}_Train_Results_{}"' \
                                                 .format(write_info['schema'], pos[1:], str(set_year)), engine)
            df_test_results = pd.read_sql_query('SELECT * FROM {}."{}_Test_Results_{}"' \
                                                .format(write_info['schema'], pos[1:], str(set_year)), engine)
            df_train = pd.read_sql_query('SELECT * FROM {}."{}_Train_{}"' \
                                         .format(write_info['schema'], pos[1:], str(set_year)), engine)
            df_predict = pd.read_sql_query('SELECT * FROM {}."{}_Predict_{}"' \
                                           .format(write_info['schema'], pos[1:], str(set_year)), engine)

            #--------
            # Calculate Fantasy Points for Given Scoring System and Cluster
            #--------
            
            # pull in data results from dataframe
            df_train_results, df_test_results = format_results(df_train_results, df_test_results, 
                                                               df_train, df_predict, 
                                                               pts_dict[pos[1:]])
            df_train_results = df_train_results.drop('year', axis=1)
            
            # initialize cluster with train and test results
            cluster = Clustering(df_train_results, df_test_results)

            # fit decision tree and apply nodes to players
            cluster.fit_and_predict_tree(print_results=False)
            
            #--------
            # Use Bayesian Updating to Create Points Distributions
            #--------

            # create distributions of data
            distributions = cluster.create_distributions(prior_repeats=prior_repeats,
                                                         dist_size=dist_size,  
                                                         show_plots=show_plots)
            
            # add position to the distributions
            distributions['pos'] = pos
            
            # append each position of data to master dataset
            data = pd.concat([data, distributions], axis=0)
            
        # add flex data
        flex = data[data.pos.isin(['bRB', 'cWR', 'dTE'])]
        flex['pos'] = 'eFLEX'
        data = pd.concat([data, flex])
        
        # format the data for later use
        data = data.reset_index(drop=True)
        data = data.rename(columns={0: 'player'})
        
        # create player to integer mapping and reverse mapping
        player_to_int, int_to_player = create_player_map(data.player.unique())
        
        # create position to integer mappings
        pos_to_int = {'aQB': 1,
                      'bRB': 2,
                      'cWR': 3,
                      'dTE': 4,
                      'eFLEX': 5}

        # map all values to integers
        data.player = data.player.map(player_to_int)
        data.pos = data.pos.map(pos_to_int)

        # add user id information
        data['userid'] = user_id
        
        # convert all data to int16
        data = data.astype('int16')
        
        try:
            sql = '''DELETE FROM website.{} WHERE userid={};'''.format(write_info['table'], user_id)
            with engine.begin() as conn:
                conn.execute(sql)
        except:
            pass
        
        # generate a dtypes dictionary
        dtypes = {}
        dtypes['player'] = sqlalchemy.types.SMALLINT()
        dtypes['pos'] = sqlalchemy.types.SMALLINT()
        dtypes['userid'] = sqlalchemy.types.SMALLINT()

        for i in range(1, dist_size+1):
            dtypes[i] = sqlalchemy.types.SMALLINT()

        # write out results
        data.to_sql(write_info['table'], engine, schema=write_info['schema'], 
                    if_exists=write_info['if_exists'], index=False, dtype=dtypes)
        
        int_to_player = pd.DataFrame.from_dict(int_to_player, orient='index')
        try:
            int_to_player.to_sql('intplayermap', engine, schema=write_info['schema'], 
                                 if_exists='fail', index=False)
        except:
            pass
        
        return data
    
    
class FF_Simulation():

    #==========
    # Creating Player Distributions for Given Settings
    #==========
    
    def __init__(self, write_info, user_id):
        
        # pull in the points data for the given user
        self.data = pd.read_sql_query('SELECT * FROM {}.{} WHERE userid={}'.format(write_info['schema'], 
                                                                                   write_info['table'],
                                                                                   user_id), engine)
        self.data = self.data.drop('userid', axis=1).astype('int16')
        
        # pull in the integer to player mapping data and convert to dictionary
        int_to_player = pd.read_sql_query('SELECT * FROM {}.intplayermap'.format(write_info['schema']),
                                          engine).to_dict()['0']
        
        # create position to integer mappings
        int_to_pos = {1: 'aQB',
                      2: 'bRB',
                      3: 'cWR',
                      4: 'dTE',
                      5: 'eFLEX'}
        
        # map position and player back to actual values
        self.data.pos = self.data.pos.map(int_to_pos)
        self.data.player = self.data.player.map(int_to_player)

    
    def return_data(self):
        '''
        Returns self.data if necessary.
        '''
        return self.data
    
    
    #==========
    # Running the Simulation for Given League Settings and Keepers
    #==========
    
    def run_simulation(self, league_info, to_drop, to_add, iterations=500):        
        '''
        Method that runs the actual simulation and returns the results.
        
        Input: Projected player data and salaries with variance, the league 
               information (e.g. position requirements, and salary caps), and 
               information about players selected by your team and other teams.
        Returns: The top team results (players selected and salaries), as well
                 as counts of players selected, their salary they were selected at,
                 and the points the scored when selected.
        '''
        #--------
        # Pull Out Data, Salaries, and Inflation for Current Simulation
        #--------
        
        # create a copy of self.data for current iteration settings
        data = self.data.copy()
        
        # pull out min salary from data
        min_salary = data.salary.min()
        
        # if the total + the minimum salary 
        if np.sum(to_add['salaries']) + min_salary > league_info['salary_cap']:
            print('''The selected salaries equal {}, the cheapest projected
                     player costs {}, and the max salary cap is {}.  
                     
                     As a result, no player is able to be selected from the optimization.
                     Select lower salaries or increase the salary cap to continue.'''.format(np.sum(to_add['salaries']), 
                                                                                             min_salary,
                                                                                             league_info['salary_cap']))
            return [], []
        
         # give an extra dollar to prevent errors with minimum salaried players
        league_info['salary_cap'] = league_info['salary_cap'] + 1
       
        # drop other selected players + calculate inflation metrics
        data, drop_proj_sal, drop_act_sal = self._drop_players(data, league_info, to_drop)
        
        # drop your selected players + calculate inflation metrics
        data, league_info, to_add, add_proj_sal, add_act_sal = self._add_players(data, league_info, to_add)
        
        pos_require = list(league_info['pos_require'].values())
        
        # calculate inflation based on drafted players and their salaries
        inflation = self._calc_inflation(league_info, drop_proj_sal, drop_act_sal, add_proj_sal, add_act_sal)

        # determine salaries, skew distributions, and number of players for each position
        data, salaries, salary_skews, pos_counts = self._pull_salary_poscounts(data, inflation)
        
        #--------
        # Initialize Matrix and Results Dictionary for Simulation
        #--------
        
        # generate the A matrix for the simulation constraints
        A = self._Amatrix(pos_counts, league_info['pos_require'])

        # pull out the names of all players and set to names
        names = data.index
        dict_names = list(data.index)
        dict_names.extend(to_add['players'])
        
        # create empty matrices
        results = {}
        results['names'] = []
        results['points'] = []
        results['salary'] = []

        # create empty dictionaries
        counts = {}
        counts['names'] = pd.Series(0, index=dict_names).to_dict()
        counts['points'] = pd.Series(0, index=dict_names).to_dict()
        counts['salary'] = pd.Series(0, index=dict_names).to_dict()
        
        # shuffle the random data--both salary skews and the point projections
        _ = [np.random.shuffle(row) for row in salary_skews]
        data = self._df_shuffle(data)
                
        #--------
        # Run the Simulation Loop
        #--------
            
        trial_counts = 0
        for i in range(0, iterations):
    
            # every N trials, randomly shuffle each run in salary skews and data
            if i % 50 == 0:
                _ = [np.random.shuffle(row) for row in salary_skews]
                data = self._df_shuffle(data)

            # pull out a random selection of points and salaries
            points, salaries_tmp = self._random_select(data, salaries, salary_skews)
            
            # run linear integer optimization
            x = self._run_opt(A, points, salaries_tmp, league_info['salary_cap'], pos_require)

            # pull out and store the selected names, points, and salaries
            results, self.counts, trial_counts = self._pull_results(x, names, points, salaries_tmp, 
                                                                    to_add, results, counts, trial_counts)
        
        # format the results after the simulation loop is finished
        self.results = self._format_results(results)
        
        return self.results, self.counts
    
    #==========
    # Helper Functions for the Simulation Loop
    #==========

    #--------
    # Salary (+Inflation) and Keeper Setup
    #--------
    
    def add_salaries(self, salaries):
        '''
        Input: Salaries for all players in the dataset.
        Return: The self.data dataframe that has salaries appended to it.
        '''
        #--------
        # Merge salaries and points on names to ensure matches
        #--------
        
        # merge the salary and prediction data together on player
        self.data = pd.merge(self.data, salaries, how='inner', left_on='player', right_on='player')
        
        # sort values and move player to the index of the dataframe
        self.data = self.data.sort_values(by=['pos', 'salary'], ascending=[True, False]).set_index('player')
    
    
    @staticmethod
    def _drop_players(data, league_info, to_drop):
        '''
        Drops a list of players that are chosen as by other teams and calculates actual 
        salary vs. expected salary for inflation calculation.
        
        Input: Data for a given simulation run, league information (e.g. total salary cap), 
               and a dictionary of players with their salaries to drop. 
        Return: The players that remain available for the simulation, along with metrics
                for salary inflation.
        '''
        
        #--------
        # Dropping Other Team's Players
        #--------
        
        # find players from data that will be dropped and remove them from other data
        drop_data = data[data.index.isin(to_drop['players'])]
        other_data = data.drop(drop_data.index, axis=0)
        
        # pull out the projected and actual salaries for the players that are being kept
        drop_proj_salary = drop_data.salary.drop_duplicates().sum()
        drop_act_salary = np.sum(to_drop['salaries'])
        
        return other_data, drop_proj_salary, drop_act_salary
    
    
    @staticmethod
    def _add_players(data, league_info, to_add):
        '''
        Removes a list of players that are chosen as to_add and calculates inflation based
        on their added salary vs. expected salary.
        
        Input: Data for a given simulation run, league information (e.g. total salary cap), 
               and a dictionary of players with their salaries to keep. 
        Return: The players that remain available for the simulation, the players to be kept,
                and metrics to calculate salary inflation.
        '''
        print('starting add players')
        # pull data for players that have been added to your team and split out other players
        add_data = data[data.index.isin(to_add['players'])]
        other_data = data.drop(add_data.index, axis=0)

        # pull out the salaries of your players and sum
        add_proj_salary = add_data.salary.drop_duplicates().sum()
        add_act_salary = np.sum(to_add['salaries'])
        
        # update the salary for your team to subtract out drafted player's salaries
        league_info['salary_cap'] = float(league_info['salary_cap'] - add_act_salary)
        
        # add the mean points scored by the players who have been added
        to_add['points'] = -1.0*(add_data.drop(['pos', 'salary'],axis=1).mean(axis=1).values)
        
        # create list of letters to append to position for proper indexing
        letters = ['a', 'b', 'c', 'd', 'e']

        # loop through each position in the pos_require dictionary
        for i, pos in enumerate(league_info['pos_require'].keys()):

            # create a unique label based on letter and position
            pos_label = letters[i]+pos

            # loop through each player that has been selected  
            for player in list(add_data[add_data.pos==pos_label].index):

                # if the position is still required to be filled:
                if league_info['pos_require'][pos] > 0:

                    # subtract out the current player from the position count
                    league_info['pos_require'][pos] = league_info['pos_require'][pos] - 1

                    # and remove that player from consideration for filling other positions
                    add_data = add_data[add_data.index != player]
        
        print(league_info['pos_require'])
        return other_data, league_info, to_add, add_proj_salary, add_act_salary
    
    
    @staticmethod
    def _calc_inflation(league_info, drop_proj_sal, drop_act_sal, add_proj_sal, add_act_sal):
        '''
        Method to calculate inflation based on players selected and the expected salaries.
        '''
        # add up the total actual and projected salaries for all keepers
        projected_salary = drop_proj_sal + add_proj_sal
        actual_salary = drop_act_sal + add_act_sal
        
        # calculate the salary inflation due to the keepers
        total_cap = league_info['num_teams'] * league_info['initial_cap']
        inflation = (total_cap-actual_salary) / (total_cap-projected_salary)
        
        return inflation
        
        
    def _pull_salary_poscounts(self, data, inflation):
        '''
        Method to pull salaries from the data dataframe, create salary skews, and determine
        the position counts for the A matrix in the simulation
        
        Input: Data for current simulation and inflation metric
        Return: The data without salary column, the inflated salary numpy array, a dataframe of salaru
                skews for current simulation, and a count of positions in the dataframe 
        '''
        #--------
        # Extract salaries into numpy array and drop salary from points data
        #--------

        # set salaries to numpy array and multiply by inflation
        salaries = data.salary.values*inflation

        # calculate salary skews for each player's salary
        salary_skews = self._skews(salaries)

        # extract the number of counts for each position for later creating A matrix
        pos_counts = list(data.pos.value_counts().sort_index())

        # drop salary from the points dataframe and reset the columns from 0 to N
        data = data.drop(['pos', 'salary'], axis=1)
        data.columns = [i for i in range(0, len(data.columns))]
        
        return data, salaries, salary_skews, pos_counts
        
        
    @staticmethod
    def _skews(salaries):
        '''
        Input: Internal method that accepts the salaries input for each player in the dataset.
        Return: Right skewed salary uncertainties, scaled to the actual salary of the player.
        '''
        # pull out the salary column and convert to numpy array
        _salaries = salaries.reshape(-1,1)

        # create a skews normal distribution of uncertainty for salaries
        skews = (skewnorm.rvs(10, size=1000)*.07).reshape(1, -1)

        # create a p x m matrix with dot product, where p is the number of players
        # and m is the number of skewed uncertainties, e.g. 320 players x 10000 skewed errors
        salary_skews = np.dot(_salaries, skews)

        return salary_skews
        
    #--------
    # Setting up and Running the Simulation
    #--------
    
    @staticmethod
    def _Amatrix(pos_counts, pos_require):
        '''
        This function creates the A matrix that is critical for the ILP solution being equal
        to the positional constraints specified. I identified the given pattern empirically:
        1. Repeat the vector [1, 0, 0, 0, ...] N times for each player for a given position.
           The number of trailing zeros is equal to p-1 positions to draft.
        2. After the above vector is repeated N times for a given player, append a 0 before
           repeating the same pattern for the next player. Repeat for all players up until the 
           last position.
        3. for the last poition, repeat the pattern N-1 times and append a 1 at the end.
        This pattern allows the b vector, e.g. [1, 2, 2, 1] to set the constraints on the positions
        selected by the ILP solution.
        '''
        
        #--------
        # Initialize the Vector Pattern and Matrix
        #--------
        
        # create A matrix
        vec = [1]
        vec.extend([0]*(len(pos_require)-1))
        
        # intialize A matrix by multiplying length one by vec and appending 0 to start pattern
        A = pos_counts[0]*vec
        A.append(0)

        #--------
        # Repeat the Pattern Until Last Position
        #--------
        
        # repeat the same pattern for the inner position requirements
        for i in range(1, len(pos_counts)-1):

            A.extend(pos_counts[i]*vec)
            A.append(0)

        #--------
        # Finish the Pattern for the Last Position
        #--------
        
        # adjust the pattern slightly for the final position requirement
        A.extend((pos_counts[-1]-1)*vec)
        A.append(1)

        # convert A into a matrix for integer optimization
        A = matrix(A, size=(len(vec), np.sum(pos_counts)), tc='d')

        return A
    
    
    @staticmethod
    def _df_shuffle(df):
        '''
        Input: A dataframe to be shuffled, row-by-row indepedently.
        Return: The same dataframe whose columns have been shuffled for each row.
        '''
        # store the index before converting to numpy
        idx = df.index
        df = df.values

        # shuffle each row separately, inplace, and convert o df
        _ = [np.random.shuffle(i) for i in df]

        return pd.DataFrame(df, index=idx)
    
    
    @staticmethod
    def _run_opt(A, points, salaries, salary_cap, pos_require):
        '''
        This function sets up and solves the integer Linear Programming problem 
        c = n x 1 -- c is the vector of points to be optimized
        G = m x n -- G is the salaries of the corresponding players / points (m=1 in this case)
        h = m x 1 -- h is the salary cap (m=1 in this case)
        A = p x n -- A sparse binary matrix that must be developed so b equals player constraints
        b = p x 1 -- b is a vector with player requirements, e.g. [QB, RB, WR] = [1, 2, 2]

        Solve:
        c'*n -- minimize

        Subject to:
        G*x <= h
        A*x = b
        '''
        
        # generate the c matrix with the point values to be optimized
        c = matrix(points, tc='d')

        # generate the G matrix that contains the salary values for constraining
        G = matrix(salaries, tc='d').T

        # generate the h matrix with the salary cap constraint
        h = matrix(salary_cap, size=(1,1), tc='d')

        # generate the b matrix with the number of position constraints
        b = matrix(pos_require, size=(len(pos_require), 1), tc='d')

        # solve the integer LP problem
        (status, x) = ilp(c, G, h, A=A, b=b, B=set(range(0, len(points))))

        return x
    
    @staticmethod
    def _random_select(data, salaries, salary_skews):
        '''
        Random column selection for trial in simulation
        
        Input: Data, salaries, and salary skews
        Return: Randomly selected array of points and salaries + skews for a given trial
        '''
        # select random number between 0-10000
        ran_num = random.randint(0, 999)

        # pull out a random column of points 
        points = data.iloc[:, ran_num].values.astype('double')*-1.0

        # pull out a random skew and add to the original salaries
        salaries_tmp = salaries + salary_skews[:, ran_num]
        salaries_tmp = salaries_tmp.astype('double')
        
        return points, salaries_tmp
    
    
    #==========
    # Formatting and Displaying All Results
    #==========
    
    @staticmethod
    def _pull_results(x, names, points, salaries, to_add, results, counts, trial_counts):
        '''
        This method pulls in each individual result from the simulation loop and stores it in dictionaries.
        
        Input: Names, points, and salaries for the current simulation lineup.
        Return: Dictionaries with full results and overall simulation counts, continuously updated.
        '''
        # find all LP results chosen and equal to 1
        x = np.array(x)[:, 0]==1

        if len(names[x]) != len(np.unique(names[x])):
            return results, counts, trial_counts
        
        trial_counts += 1
        
        names_ = list(names[x])
        names_.extend(to_add['players'])
        
        points_ = list(points[x])
        points_.extend(to_add['points'])
        
        salaries_ = list(salaries[x])
        salaries_.extend(to_add['salaries'])
        
        for i, p in enumerate(names_):

            counts['names'][p] += 1

            if counts['points'][p] == 0:
                counts['points'][p] = []
            counts['points'][p].append(points_[i])

            if counts['salary'][p] == 0:
                counts['salary'][p] = []
            counts['salary'][p].append(salaries_[i])

        # pull out the corresponding names, points, and salaries for chosen players
        # to append to the higher level results dataframes
        results['names'].append(names_)
        results['points'].append(points_)
        results['salary'].append(salaries_)

        return results, counts, trial_counts
    
    
    @staticmethod
    def _format_results(results):
        '''
        After the simulation loop, this method pulls results from the dictionary and formats
        into dataframes.
        
        Input: The results dictionary with all results
        Return: A formatted dataframe with all results
        '''
        
        # create dataframes for the names of selected players, their points scored, and salaries
        name_results = pd.DataFrame(results['names'])
        point_results = pd.DataFrame(results['points'])*-1
        total_points = point_results.sum(axis=1)
        salary_results = pd.DataFrame(results['salary'])
        total_salary = salary_results.sum(axis=1)
        
        # concatenate names, points, and salaries altogether
        results_df = pd.concat([name_results, total_points, total_salary, point_results, salary_results], axis=1)
        
        # rename columns to numbers
        results_df.columns = range(0, results_df.shape[1])
        
        # find the first numeric column that corresponds to points scored and sort by that column
        first_num_col = results_df.dtypes[results_df.dtypes=='float64'].index[0]
        results_df = results_df.sort_values(by=first_num_col, ascending=False)

        return results_df
    
    
    def density_plot(self, player):
        '''
        Creates density player showing points scored and salary selected for a given player
        '''
        
        # pull out points and salary for a given player
        sal = np.array(self.counts['salary'][player])
        
        # create and displayjoint distribution plot
        sns.distplot(sal)
        plt.show()
        
    def show_most_selected(self, to_add, iterations, num_show=20):
        
        counts = pd.DataFrame.from_dict(self.counts['names'], orient='index').rename(columns={0: 'Percent Drafted'})
        counts = counts.sort_values(by='Percent Drafted', 
                                    ascending=False)[len(to_add['players']):].head(num_show) / iterations
        
        avg_sal = {}
        for key, value in self.counts['salary'].items():
            avg_sal[key] = np.mean(value)

        avg_sal = pd.DataFrame.from_dict(avg_sal, orient='index').rename(columns={0: 'Average Salary'})
        avg_sal = pd.merge(counts, avg_sal, how='inner', left_index=True, 
                           right_index=True).sort_values(by='Percent Drafted', ascending=False)
        
        fig = plt.figure(figsize=(15,4)) # Create matplotlib figure

        ax = fig.add_subplot(111) # Create matplotlib axes
        ax2 = ax.twinx() # Create another axes that shares the same x-axis as ax.

        width = 0.4
        
        avg_sal['Average Salary'].plot(kind='bar', color='blue', ax=ax2, width=width, 
                               position=0, align='center')
        counts.plot(kind='bar', color='red', ax=ax, width=width, position=1, align='center')


        ax.set_ylabel('Percent Drafted')
        ax2.set_ylabel('Average Price')

        plt.show()

In [13]:
d = custom_data(pg_log, write_info, pts_dict=pts_dict, user_id=user_id)

Loading and Preparing QB Data
Loading and Preparing RB Data
Loading and Preparing WR Data
Loading and Preparing TE Data


In [14]:
# instantiate simulation class and add salary information to data
test = FF_Simulation(write_info, user_id)
test.add_salaries(salary_data)

In [15]:
test.return_data()

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,1493,1494,1495,1496,1497,1498,1499,1500,pos,salary
player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Aaron Rodgers,288,328,388,250,383,310,271,305,359,267,...,265,274,318,327,338,305,320,261,aQB,18
Tom Brady,288,327,387,249,382,309,270,304,358,266,...,265,274,317,326,337,304,319,260,aQB,17
Cam Newton,239,272,326,204,323,258,223,253,301,218,...,217,225,266,272,282,253,267,212,aQB,8
Carson Wentz,227,258,299,202,295,244,216,240,278,214,...,213,219,247,256,263,240,250,211,aQB,8
Russell Wilson,285,324,385,246,381,307,267,302,356,263,...,261,271,315,324,334,301,317,257,aQB,7
Deshaun Watson,230,259,307,199,305,246,216,243,285,211,...,210,217,255,259,268,242,255,205,aQB,7
Drew Brees,290,328,387,253,383,311,273,306,359,269,...,268,276,319,328,338,306,321,263,aQB,4
Matt Ryan,241,268,310,215,307,256,229,253,290,226,...,225,231,262,268,275,253,263,222,aQB,4
Matthew Stafford,241,266,304,217,301,255,230,252,286,227,...,226,232,261,266,272,252,261,223,aQB,4
Ben Roethlisberger,261,291,339,230,335,278,247,274,316,243,...,242,249,284,291,299,273,286,239,aQB,3


In [None]:
np.random.seed(123)

# set league information, included position requirements, number of teams, and salary cap
league_info = {}
league_info['pos_require'] = {'QB': 1, 'RB': 2, 'WR': 2, 'TE': 1, 'FLEX': 2}
league_info['num_teams'] = 12
league_info['initial_cap'] = 295
league_info['salary_cap'] = 295

# input information for players and their associated salaries selected by other teams
to_drop = {}
to_drop['players'] = ["Le'Veon Bell"]
to_drop['salaries'] = [100]

# input information for players and their associated salaries selected by your team
to_add = {}
to_add['players'] = ["JuJu Smith-Schuster", "Melvin Gordon", "Drew Brees", "Joe Mixon",
                     "Corey Davis", "George Kittle", "Alvin Kamara", "Sammy Watkins"]
to_add['salaries'] = [25, 80, 5, 60, 15, 5, 90, 12]

# run the simulation
iterations=750
results, counts = test.run_simulation(league_info, to_drop, to_add, iterations=iterations)

In [None]:
test.show_most_selected(to_add, iterations, num_show=30)

In [None]:
test.density_plot("JuJu Smith-Schuster")

In [None]:
test.return_data()