In [182]:
import csv
import numpy as np
import pandas as pd
from pandas import Series,DataFrame

In [183]:
class Player():
    def __init__(self, position, name, salary, points, value, team):
        self.self = self
        self.position = position
        self.name = name
        self.salary = salary
        self.points = points
        self.value = value
        self.team = team
        
    def __iter__(self):
        return iter(self.list)
    
    def __str__(self):
        return "{} {} {} {} {}".format(self.name,self.position,self.salary, self.points, self.team)

In [184]:
# This csv contains our predictions and salaries for each player. 
# We parse each row of the csv and convert it into a Player object.
with open('DKSalaries.csv', 'r') as data:
    reader = csv.reader(data)
    reader.next()
    players = []
    for row in reader:
        name = row[1]
        position = row[0]
        salary = int(row[2])
        points = float(row[4])
        value = points / salary 
        team = row[5]
        player = Player(position, name, salary, points, value, team)
        players.append(player)

**The Greedy Approach**

Greedy Algorithms are quite simple. You prioritize a list of objects, and then you select the objects in that order, as long as they don't violate some constraint. For our scenario, the constraints are the overall budget you can spend on your team, and the required number of players in each position. The tricky part in developing a greedy algorithm is determining the correct way to prioritize the objects.
One way that seems reasonable to prioritize players is by their expected point production. We would select the available player that we predict to have the best game, as long as we can afford them, and have room in their position group. Let's see what type of team this approach creates:

In [185]:
def points_knapsack(players):
    budget = 50000
    current_team_salary = 0
    constraints = {
        'P':2,
        'C':1,
        '1B':1,
        '2B':1,
        '3B':1,
        'SS':1,
        'OF':3
        }
    
    counts = {
        'P':0,
        'C':0,
        '1B':0,
        '2B':0,
        '3B':0,
        'SS':0,
        'OF':0
        }
    
    players.sort(key=lambda x: x.points, reverse=True)
    team = []
    
    for player in players:
        nam = player.name
        pos = player.position
        if "/" in pos:
            pos=pos[:pos.index("/")]
        if "P" in pos:
            pos="P"
        sal = player.salary
        pts = player.points
        if counts[pos] < constraints[pos] and current_team_salary + sal <= budget:
            team.append(player)
            counts[pos] = counts[pos] + 1
            current_team_salary += sal

    return team

In [186]:
team = points_knapsack(players)
points = 0
salary = 0 
for player in team:
    points += player.points
    salary += player.salary
    print player
print "\nPoints: {}".format(points)
print "Salary: {}".format(salary)

Drew Pomeranz SP 10600 21.771 Bos
Jeremy Hellickson SP 7500 15.182 Phi
Mike Trout OF 5200 10.564 LAA
Mookie Betts OF 5300 10.407 Bos
David Ortiz 1B 5600 10.256 Bos
Matt Carpenter 2B/3B 5400 9.846 StL
Ian Desmond OF 4300 9.766 Tex
Xander Bogaerts SS 4500 9.629 Bos

Points: 97.421
Salary: 48400


Oops.

There's a slight problem here: we didn't end up with enough players on our team. A valid team has 10 players, ours only has 8. After picking 8 players, we have used 48,400 of our 50,000 budget. We don't have enough money left to afford even the cheapest available player, and we still need four more players. There are ways around this: rather than adding a player as long as he doesn't put us over the budget, we could make sure that after adding him we still had the budget left to fill out our team with the cheapest possible players at each position.

This approach would leave us with a very "top-heavy" team: a few really good players, and a few "bottom of the barrel" players. This isn't necessarily a bad thing, but let's try a different approach.

Prioritizing by Points per Dollar

Rather than prioritizing by expected point production, let's try prioritizing by expected points per dollar of cost.

In [187]:
def value_knapsack(players):
    budget = 50000
    current_team_salary = 0
    constraints = {
        'P':2,
        'C':1,
        '1B':1,
        '2B':1,
        '3B':1,
        'SS':1,
        'OF':3
        }
    
    counts = {
        'P':0,
        'C':0,
        '1B':0,
        '2B':0,
        '3B':0,
        'SS':0,
        'OF':0
        }
    
    players.sort(key=lambda x: x.value, reverse=True)
    team = []
    
    for player in players:
        nam = player.name
        pos = player.position
        if "/" in pos:
            pos=pos[:pos.index("/")]
        if "P" in pos:
            pos="P"
        sal = player.salary
        pts = player.points
        if counts[pos] < constraints[pos] and current_team_salary + sal <= budget:
            team.append(player)
            counts[pos] = counts[pos] + 1
            current_team_salary += sal

    return team

In [188]:
team = value_knapsack(players)
points = 0
salary = 0
for player in team:
    points += player.points
    salary += player.salary
    print player
print "\nPoints: {}".format(points)
print "Salary: {}".format(salary)

Brett Nicholas C 2100 7.5 Tex
Josh Bell OF 2700 7.75 Pit
Prince Fielder 1B 2100 5.831 Tex
Marcell Ozuna OF 3200 8.371 Mia
Odubel Herrera OF 3000 7.745 Phi
Johnny Giavotella 2B 2300 5.878 LAA
Freddy Galvis SS 2300 5.839 Phi
Martin Perez SP 4000 9.958 Tex
Matt Cain SP 4000 8.509 SF
Martin Prado 3B 3400 7.034 Mia

Points: 74.415
Salary: 29100


We got a valid team, but it doesn't seem quite "optimal." We ended up using only 29300 of our 50000 budget. We optimized for "value" but that's not exactly what we want, we want to optimize for points.

Improving on Points per Dollar

This approach seems promising though; we have a team full of under-valued players. We can try to replace some of the under-valued players that aren't predicted to get many points with some players with worse points per dollar ratios, but more expected points.

In [189]:
def improved_knapsack(players):
    budget = 50000
    current_team_salary = 0
    constraints = {
        'P':2,
        'C':1,
        '1B':1,
        '2B':1,
        '3B':1,
        'SS':1,
        'OF':3
        }
    
    counts = {
        'P':0,
        'C':0,
        '1B':0,
        '2B':0,
        '3B':0,
        'SS':0,
        'OF':0
        }
    
    players.sort(key=lambda x: x.value, reverse=True)
    team = []
    
    for player in players:
        nam = player.name
        pos = player.position
        if "/" in pos:
            pos=pos[:pos.index("/")]
        if "P" in pos:
            pos="P"
        sal = player.salary
        pts = player.points
        if counts[pos] < constraints[pos] and current_team_salary + sal <= budget:
            team.append(player)
            counts[pos] = counts[pos] + 1
            current_team_salary += sal
    
    players.sort(key=lambda x: x.points, reverse=True)
    for player in players:
        nam = player.name
        pos = player.position
        sal = player.salary
        pts = player.points
        if player not in team:
            pos_players = [ x for x in team if x.position == pos]
            pos_players.sort(key=lambda x: x.points)
            for pos_player in pos_players:
                if (current_team_salary + sal - pos_player.salary) <= budget and pts > pos_player.points:
                    team[team.index(pos_player)] = player
                    current_team_salary = current_team_salary + sal - pos_player.salary
                    break
    return team

In [190]:
team = improved_knapsack(players)
points = 0
salary = 0
p=[]
for player in team:
    points += player.points
    salary += player.salary
    p.append(str(player).split(" "))
    print player
print "\nPoints: {}".format(points)
print "Salary: {}".format(salary)

Sandy Leon C 3200 7.909 Bos
Mookie Betts OF 5300 10.407 Bos
David Ortiz 1B 5600 10.256 Bos
Ian Desmond OF 4300 9.766 Tex
Mike Trout OF 5200 10.564 LAA
Johnny Giavotella 2B 2300 5.878 LAA
Freddy Galvis SS 2300 5.839 Phi
Jeremy Hellickson SP 7500 15.182 Phi
Drew Pomeranz SP 10600 21.771 Bos
Anthony Rendon 3B 3700 7.589 Was

Points: 105.161
Salary: 50000


In [191]:
mlbDF=pd.read_csv('DKSalaries.csv')
mlbDF.head()

Unnamed: 0,Position,Name,Salary,GameInfo,AvgPointsPerGame,teamAbbrev
0,C,A.J. Ellis,2800,LAD@Was 07:05PM ET,3.477,LAD
1,OF,Aaron Altherr,3300,Mia@Phi 07:05PM ET,0.0,Phi
2,OF,Aaron Hicks,3100,Bal@NYY 07:05PM ET,3.244,NYY
3,2B/3B,Aaron Hill,3800,SF@Bos 07:10PM ET,6.366,Bos
4,OF,Aaron Judge,3100,Bal@NYY 07:05PM ET,0.0,NYY


In [192]:
mlbDF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 315 entries, 0 to 314
Data columns (total 6 columns):
Position            315 non-null object
Name                315 non-null object
Salary              315 non-null int64
GameInfo            315 non-null object
AvgPointsPerGame    315 non-null float64
teamAbbrev          315 non-null object
dtypes: float64(1), int64(1), object(4)
memory usage: 14.8+ KB
