In [57]:
import pandas as pd
import numpy as np
import requests
from itertools import combinations

In [58]:
api_url = 'https://fantasy.premierleague.com/api/bootstrap-static/'

response = requests.get(api_url)

if response.status_code == 200:
    data = response.json()
    print("Succesfully fetched data!")
else:
    print(f"Error while fetching data. Status code: {response.status_code}")

Succesfully fetched data!


In [59]:
print(data.keys())

dict_keys(['chips', 'events', 'game_settings', 'game_config', 'phases', 'teams', 'total_players', 'elements', 'element_stats', 'element_types'])


In [60]:
players = pd.DataFrame(data['elements'])
players.head()

Unnamed: 0,can_transact,can_select,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,...,now_cost_rank,now_cost_rank_type,form_rank,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90
0,True,False,0.0,0.0,438098,0,0,-1,1,0,...,137,82,674,304,682,310,636,278,0.0,0.0
1,True,True,100.0,100.0,205651,0,0,-2,2,0,...,32,15,304,38,459,49,209,37,0.74,0.37
2,True,True,75.0,75.0,226597,0,0,2,-2,2,...,65,3,76,17,31,4,17,6,1.08,0.33
3,True,True,100.0,100.0,219847,0,0,-1,1,1,...,13,5,88,18,37,14,35,9,1.01,0.29
4,True,False,0.0,0.0,463748,0,0,0,0,0,...,637,63,426,52,515,59,559,68,0.0,0.0


In [61]:
print("Players DataFrame Columns:")
print(players.columns.tolist())


Players DataFrame Columns:
['can_transact', 'can_select', 'chance_of_playing_next_round', 'chance_of_playing_this_round', 'code', 'cost_change_event', 'cost_change_event_fall', 'cost_change_start', 'cost_change_start_fall', 'dreamteam_count', 'element_type', 'ep_next', 'ep_this', 'event_points', 'first_name', 'form', 'id', 'in_dreamteam', 'news', 'news_added', 'now_cost', 'photo', 'points_per_game', 'removed', 'second_name', 'selected_by_percent', 'special', 'squad_number', 'status', 'team', 'team_code', 'total_points', 'transfers_in', 'transfers_in_event', 'transfers_out', 'transfers_out_event', 'value_form', 'value_season', 'web_name', 'region', 'minutes', 'goals_scored', 'assists', 'clean_sheets', 'goals_conceded', 'own_goals', 'penalties_saved', 'penalties_missed', 'yellow_cards', 'red_cards', 'saves', 'bonus', 'bps', 'influence', 'creativity', 'threat', 'ict_index', 'starts', 'expected_goals', 'expected_assists', 'expected_goal_involvements', 'expected_goals_conceded', 'influence_

In [62]:
position_map = {
    1: 'GK',
    2: 'Defender',
    3: 'Midfielder',
    4: 'Forward'
}

price_updates = {
    'Mohamed Salah': 13.0,
    'Cole Palmer': 10.8,
    'Bukayo Saka': 10.4,
    'Nicolas Jackson': 7.9,
    'Łukasz Fabiański': 4.0
}

players_selected = players[
    ['first_name', 'second_name', 'team_code', 'element_type', 'total_points', 'points_per_game', 'starts', 'now_cost']
].copy()

players_selected['now_cost'] = players_selected['now_cost'] / 10

players_selected['element_type'] = players_selected['element_type'].map(position_map)

players_selected[['total_points', 'points_per_game', 'now_cost']] = players_selected[
    ['total_points', 'points_per_game', 'now_cost']
].apply(pd.to_numeric, errors='coerce')

players_selected = players_selected.loc[players_selected['points_per_game'] > 0.0]

players_selected['full_name'] = players_selected['first_name'] + ' ' + players_selected['second_name']

# Remove Bryan Mbeumo's row from the DataFrame
#mbeumo_row = players_selected.loc[players_selected['full_name'] == 'Bryan Mbeumo'].copy()
#players_selected = players_selected.loc[players_selected['full_name'] != 'Bryan Mbeumo']

# Update player prices that have been already selected at a different price
for player, new_price in price_updates.items():
    players_selected.loc[players_selected['full_name'] == player, 'now_cost'] = new_price

players_selected['Cost/Points'] = (players_selected['now_cost'] / players_selected['total_points']).round(2)

print(players_selected)



         first_name             second_name  team_code element_type  \
1           Gabriel       Fernando de Jesus          3      Forward   
2           Gabriel    dos Santos Magalhães          3     Defender   
3               Kai                 Havertz          3      Forward   
5           Jurriën                  Timber          3     Defender   
6        Jorge Luiz            Frello Filho          3   Midfielder   
..              ...                     ...        ...          ...   
686          Jørgen           Strand Larsen         39      Forward   
687    Toti António                   Gomes         39     Defender   
689           André  Trindade da Costa Neto         39   Midfielder   
690  Carlos Roberto            Forbs Borges         39   Midfielder   
691           Alfie                    Pond         39     Defender   

     total_points  points_per_game  starts  now_cost  \
1               9              0.8       2       6.8   
2              60              4.6 

In [63]:
max_indeces = players_selected[['total_points', 'points_per_game', 'now_cost']].idxmax()
max_rows = players_selected.loc[max_indeces]
print(max_rows)


    first_name second_name  team_code element_type  total_points  \
401    Mohamed       Salah         14   Midfielder           151   
401    Mohamed       Salah         14   Midfielder           151   
428     Erling     Haaland         43      Forward            98   

     points_per_game  starts  now_cost       full_name  Cost/Points  
401             10.8      14      13.0   Mohamed Salah         0.09  
401             10.8      14      13.0   Mohamed Salah         0.09  
428              6.5      15      14.9  Erling Haaland         0.15  


In [64]:
# Create a sorted copy of the DataFrame
sorted_players = players_selected.sort_values(by='now_cost', ascending=True)
sorted_players = sorted_players.reset_index(drop=True)

# Display the sorted DataFrame
sorted_players.head()


Unnamed: 0,first_name,second_name,team_code,element_type,total_points,points_per_game,starts,now_cost,full_name,Cost/Points
0,Alfie,Pond,39,Defender,1,1.0,0,3.9,Alfie Pond,3.9
1,Yerson,Mosquera,39,Defender,3,0.6,5,3.9,Yerson Mosquera,1.3
2,James,Hill,91,Defender,1,1.0,0,3.9,James Hill,3.9
3,Aaron,Cresswell,21,Defender,4,0.8,0,3.9,Aaron Cresswell,0.98
4,Charlie,Taylor,20,Defender,6,1.0,2,3.9,Charlie Taylor,0.65


In [65]:
# Knapsack algorithm with global team constraints

def find_optimal_team(max_cost, seed_team, all_players, starters=None):
    """
    Finds the optimal team within the given cost constraint, ensuring no more than 3 players per team.

    Parameters:
    - max_cost (float): Maximum allowed cost for the team.
    - seed_team (list): List of player IDs to start with.
    - all_players (pd.DataFrame): DataFrame containing all player data.
    - starters (list, optional): List of player IDs already selected (e.g., starters when selecting bench).

    Returns:
    - optimal_team (list): List of player IDs representing the optimal team.
    """
    if starters:
        starters = starters.copy()
        goalkeeper = seed_team[:1]
        seed_team = seed_team[1:]
        #all_players = all_players.copy()
        #all_players = all_players.loc[all_players['element_type'] != 'GK']
    else:
        starters = []

    # Initialize the optimal team with the seed team
    optimal_team = seed_team.copy()

    # Initialize team counts based on the seed team
    team_counts = {}
    for player_id in optimal_team:
        team = all_players.loc[player_id, 'team_code']
        team_counts[team] = team_counts.get(team, 0) + 1

    # Incorporate starters into team counts to enforce global constraints
    for player_id in starters:
        team = all_players.loc[player_id, 'team_code']
        team_counts[team] = team_counts.get(team, 0) + 1

    # Sort players by Cost/Points ratio in ascending order
    sorted_players = all_players.sort_values(by='Cost/Points', ascending=True)

    for idx, row in sorted_players.iterrows():
        # Skip players already in optimal_team or starters
        if idx in optimal_team or idx in starters:
            continue

        player_team = row['team_code']

        # Current team count including seed_team and starters
        current_team_count = team_counts.get(player_team, 0)

        if current_team_count >= 3:
            # Find players in the optimal team from the same team and same position
            same_team_players = [
                pid for pid in optimal_team
                if all_players.loc[pid, 'team_code'] == player_team and 
                   all_players.loc[pid, 'element_type'] == row['element_type']
            ]

            if not same_team_players:
                # No players to replace in this team and position
                continue

            # Find the player with the worst Cost/Points ratio in the same team and position
            worst_cp_player = max(
                same_team_players, 
                key=lambda pid: all_players.loc[pid, 'Cost/Points']
            )
            worst_cp_ratio = all_players.loc[worst_cp_player, 'Cost/Points']

            # Compare Cost/Points ratios
            if row['Cost/Points'] < worst_cp_ratio:
                # Check if swapping maintains the cost constraint
                current_cost = all_players.loc[optimal_team, 'now_cost'].sum()
                if starters:
                    current_cost += 4.0
                new_cost = current_cost - all_players.loc[worst_cp_player, 'now_cost'] + row['now_cost']

                if new_cost <= max_cost:
                    # Replace the worst player with the new player
                    optimal_team.remove(worst_cp_player)
                    optimal_team.append(idx)
                    # Update team counts
                    team_counts[player_team] -= 1  # Removing worst_cp_player
                    team_counts[player_team] += 1  # Adding new player
        else:
            # Find players in the same position within the optimal team
            same_pos_players = [
                pid for pid in optimal_team
                if all_players.loc[pid, 'element_type'] == row['element_type']
            ]

            if not same_pos_players:
                continue  # No player to potentially replace

            # Find the player with the minimum total points in the same position
            min_points_player = min(
                same_pos_players, 
                key=lambda pid: all_players.loc[pid, 'total_points']
            )

            if row['total_points'] > all_players.loc[min_points_player, 'total_points']:
                # Check if adding this player keeps the total cost within the limit
                current_cost = all_players.loc[optimal_team, 'now_cost'].sum()
                if starters:
                    current_cost += 4.0
                new_cost = current_cost - all_players.loc[min_points_player, 'now_cost'] + row['now_cost']

                if new_cost <= max_cost:
                    # Check team limit for the new player
                    new_player_team = row['team_code']
                    new_player_team_count = team_counts.get(new_player_team, 0)

                    if new_player_team_count < 3:
                        # Replace the player
                        optimal_team.remove(min_points_player)
                        optimal_team.append(idx)

                        # Update team counts
                        old_player_team = all_players.loc[min_points_player, 'team_code']
                        team_counts[old_player_team] -= 1
                        team_counts[new_player_team] = team_counts.get(new_player_team, 0) + 1
    if starters:
        return goalkeeper + optimal_team
    else:
        return optimal_team


In [66]:
# Pick seed starters and bench players

sorted_cpp = sorted_players.sort_values(by='Cost/Points', ascending=True).copy()

starter_gk = sorted_cpp[sorted_cpp['element_type'] == 'GK']
starter_def = sorted_cpp[sorted_cpp['element_type'] == 'Defender']
starter_mid = sorted_cpp[sorted_cpp['element_type'] == 'Midfielder']
starter_for = sorted_cpp[sorted_cpp['element_type'] == 'Forward']

g = 1
d = 3
m = 4
f = 3

goalkeepers = sorted_players[sorted_players['element_type'] == 'GK']
defenders = sorted_players[sorted_players['element_type'] == 'Defender']
midfielders = sorted_players[sorted_players['element_type'] == 'Midfielder']
forwards = sorted_players[sorted_players['element_type'] == 'Forward']

seed_starters = list(starter_gk.head(g).index) + list(starter_def.head(d).index) + list(starter_mid.head(m).index) + list(starter_for.head(f).index)
#seed_starters = list(goalkeepers.head(g).index) + list(defenders.head(d).index) + list(midfielders.head(m).index) + list(forwards.head(f).index)

fabianski = sorted_players[sorted_players['full_name'] == 'Łukasz Fabiański'].index

seed_bench = list(fabianski) + list(defenders.head(5-d).index) + list(midfielders.head(5-m).index) + list(forwards.head(3-f).index)

print(seed_starters)

print(seed_bench)
starter_gk.head(20)

[78, 243, 161, 174, 460, 384, 346, 257, 393, 419, 379]
[25, 0, 1, 50]


Unnamed: 0,first_name,second_name,team_code,element_type,total_points,points_per_game,starts,now_cost,full_name,Cost/Points
78,Dean,Henderson,31,GK,53,3.5,15,4.4,Dean Henderson,0.08
178,Matz,Sels,17,GK,62,4.1,15,4.7,Matz Sels,0.08
244,Jordan,Pickford,11,GK,58,4.1,14,4.9,Jordan Pickford,0.08
315,André,Onana,1,GK,65,4.3,15,5.2,André Onana,0.08
216,Robert,Sánchez,8,GK,52,3.7,14,4.8,Robert Sánchez,0.09
376,David,Raya Martin,3,GK,58,3.9,15,5.6,David Raya Martin,0.1
133,Mark,Flekken,94,GK,46,3.1,15,4.5,Mark Flekken,0.1
291,Nick,Pope,4,GK,48,3.2,15,5.0,Nick Pope,0.1
81,Arijanet,Muric,40,GK,39,2.8,14,4.4,Arijanet Muric,0.11
300,Bernd,Leno,54,GK,46,3.1,15,5.0,Bernd Leno,0.11


In [67]:
best_points = 0
final_team = None
selected_bench = None
selected_starters = None
bench_weight = 1
games = 15

all_players = sorted_players.copy()

min_seed_starters = round(sorted_players.loc[seed_starters, 'now_cost'].sum(), 1)
min_bench_cost = round(sorted_players.loc[seed_bench, 'now_cost'].sum(), 1)
max_starter_cost = 100 - min_bench_cost

for i in np.arange(min_seed_starters, max_starter_cost, 0.1):
    starters_budget = i
    bench_budget = 100 - starters_budget
    
    starting_team = find_optimal_team(starters_budget, seed_starters, sorted_cpp)
    bench = find_optimal_team(bench_budget, seed_bench, all_players, starting_team)

    bench_prob = 0.1

    starters_total_points = sorted_players.loc[starting_team, 'total_points'].sum()
    bench_total_points = sorted_players.loc[bench, 'total_points'].sum()

    combined_points = starters_total_points + bench_total_points * bench_prob

    if combined_points > best_points:
        #print(combined_points, best_points)
        best_points = combined_points
        selected_bench = bench
        selected_starters = starting_team
        bench_weight = bench_prob
        final_team = starting_team + bench


cost_st = sorted_players.loc[selected_starters, 'now_cost'].sum()
cost_be = sorted_players.loc[selected_bench, 'now_cost'].sum()

print(f"Starters cost: £{cost_st}m")
print(f"Bench cost: £{cost_be}m")

points_st = sorted_players.loc[selected_starters, 'total_points'].sum()
points_be = sorted_players.loc[selected_bench, 'total_points'].sum() * bench_weight

print(f"Starters total points: {points_st}")
print(f"Bench total points: {points_be}")
print(f"Bench weight: {bench_weight}")

# Calculate team statistics
best_total_points = points_st + points_be
best_comb_cost = sorted_players.loc[final_team, 'now_cost'].sum()
best_cpp = best_comb_cost / best_total_points

# Print the team
print(f"Total points: {best_total_points}")
print(f"Total cost: £{best_comb_cost}m")
print(f"Cost/point: {best_cpp:.2f}m\n")

# Define the order of positions
position_order = ['GK', 'Defender', 'Midfielder', 'Forward']

# Iterate through each position and print players
print(f"STARTERS: {d}-{m}-{f}")
for position in position_order:
    print(f"\nPosition: {position}")
    for player_idx in selected_starters:
        player = sorted_players.loc[player_idx]
        if player['element_type'] == position:
            print(f"Player: {player['first_name']} {player['second_name']} - {player['element_type']} - Team Code: {player['team_code']} - "
                f"Total points: {player['total_points']} - Cost: £{player['now_cost']}m - Cost / points: {player['now_cost'] / player['total_points']:.4f}m")

print("\nBENCH:")
for position in position_order:
    print(f"\nPosition: {position}")
    for player_idx in selected_bench:
        player = sorted_players.loc[player_idx]
        if player['element_type'] == position:
            print(f"Player: {player['first_name']} {player['second_name']} - {player['element_type']} - Team Code: {player['team_code']} - "
                f"Total points: {player['total_points']} - Cost: £{player['now_cost']}m - Cost / points: {player['now_cost'] / player['total_points']:.4f}m")


Starters cost: £82.7m
Bench cost: £17.1m
Starters total points: 956
Bench total points: 13.3
Bench weight: 0.1
Total points: 969.3
Total cost: £99.8m
Cost/point: 0.10m

STARTERS: 3-4-3

Position: GK
Player: André Onana - GK - Team Code: 1 - Total points: 65 - Cost: £5.2m - Cost / points: 0.0800m

Position: Defender
Player: Ola Aina - Defender - Team Code: 17 - Total points: 58 - Cost: £4.9m - Cost / points: 0.0845m
Player: Ashley Young - Defender - Team Code: 11 - Total points: 57 - Cost: £4.6m - Cost / points: 0.0807m
Player: Murillo Santiago Costa dos Santos - Defender - Team Code: 17 - Total points: 53 - Cost: £4.6m - Cost / points: 0.0868m

Position: Midfielder
Player: Bryan Mbeumo - Midfielder - Team Code: 94 - Total points: 96 - Cost: £7.6m - Cost / points: 0.0792m
Player: Cole Palmer - Midfielder - Team Code: 8 - Total points: 125 - Cost: £10.8m - Cost / points: 0.0864m
Player: Mohamed Salah - Midfielder - Team Code: 14 - Total points: 151 - Cost: £13.0m - Cost / points: 0.0861m