In [190]:
#import relevant libraries

import requests
import json
from pprint import pprint
import operator
import numpy
import pandas 

# from datetime module
from datetime import date
from datetime import timedelta
from pathlib import Path  

# My Team info
team_id = 3402291


# Modelling
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from scipy.stats import randint

# Tree Visualisation
from sklearn.tree import export_graphviz
from IPython.display import Image
#import graphviz


### Overall Player Transfer Data

In [165]:
# Request Information on all players today (now?)

base_url = 'https://fantasy.premierleague.com/api/'
today = date.today() - timedelta(days = 0)
yesterday = today - timedelta(days = 1) 

with open(f"/Users/thomasribaroff/Documents/GitHub/FullPipeline-FPLPricePredictor/feature-engineering-pipeline/Saved_Data/{today}/{today}.json") as f:
    r_all_players_today = json.load(f)
# r_all_players_today = requests.get(base_url+'bootstrap-static/').json()
pprint(r_all_players_today, indent=2, depth=1, compact=True)

print(today)

{ 'element_stats': [...],
  'element_types': [...],
  'elements': [...],
  'events': [...],
  'game_settings': {...},
  'phases': [...],
  'teams': [...],
  'total_players': 10697077}
2024-02-23


In [166]:
# Get today's date
# today = date.today()
# #yesterday = today - timedelta(days = 1) 
# print(today)

# # Define today's filepath
# today_filepath = Path('/Users/thomasribaroff/Documents/GitHub/FullPipeline-FPLPricePredictor/feature-engineering-pipeline/Saved_Data/{}'.format(today))

# Make the directory for the json file
# today_filepath.mkdir(parents=True, exist_ok=True)

# Save today's json file in the directory
# with open("/Users/thomasribaroff/Documents/GitHub/FullPipeline-FPLPricePredictor/feature-engineering-pipeline/Saved_Data/{}/{}.json".format(today,today), "w+") as f:
#     json.dump(r_all_players_today, f)

In [167]:
overall_events_data = pandas.DataFrame(r_all_players_today['events'])
todays_player_data = pandas.DataFrame(r_all_players_today['elements'])
total_players = r_all_players_today['total_players']

In [168]:
# Total Active players in the game at this point in time (estimate)
current_gameweek = numpy.where(overall_events_data.is_current)[0][0] + 1
total_active_players_estimate = int(round(total_players*(0.97)**current_gameweek))

In [169]:
# Net transfers of players at this moment 

all_players_transfers_in = todays_player_data.transfers_in
all_players_transfers_out = todays_player_data.transfers_out
net_transfers_overall_today = all_players_transfers_in - all_players_transfers_out

In [170]:
# Price Change so far this event 

all_players_price_change_for_current_event = todays_player_data.cost_change_event

In [171]:
# Players prices today

all_players_prices_today = todays_player_data.now_cost

In [172]:
# Player currently flagged red

list_of_all_players_status = todays_player_data.status
boolean_list_of_players_injured = [True if x == 'i' else False for x in list_of_all_players_status]

In [173]:
# Create DataFrame from dictionary of all this transfer data

price_change_dict = {
    "price_change_this_night": numpy.nan, #tomorrow, we define this as today_data.player_prices_today - yesterday_data.player_prices_today
    "net_transfers_in_out_since_last_price_change": 0, #tomorrow, we define this as yesterday_data.net_transfers_in_out_since_last_price_change + today_data_net_transfers_in_out_since_yesterday, unless price change occurs, then we reset to ="Net Transfers In/Out since yesterday
    "net_transfers_in_out_this_day": 0, #tomorrow, we define this as today_data.net_transfers_in_out_overall_as_of_today - yesterday_data.net_transfers_in_out_overall_as_of_today
    "price_change_so_far_for_this_event ": all_players_price_change_for_current_event,
    "total_active_players_estimate": total_active_players_estimate,
    "players_injured": boolean_list_of_players_injured,
    "player_prices_today" : all_players_prices_today,
    "net_transfers_in_out_overall_as_of_today" : net_transfers_overall_today, #not to be used in modelling
    "player_ids" : todays_player_data.id,
    "player_name": todays_player_data.web_name
}

# Dictionary into DataFrame
today_data = pandas.DataFrame(price_change_dict)


# Player transfer data is updated at least every 40 minutes, and probably every 30 or even every 15 minutes

#save today's dataframe to today's folder
today_filepath = Path('/Users/thomasribaroff/Documents/GitHub/FullPipeline-FPLPricePredictor/feature-engineering-pipeline/Saved_Data/{}'.format(today))
today_data.to_csv('{}/{}.csv'.format(today_filepath,today), index=False) 

In [174]:
def update_yesterday_data_rows_align(yesterday_df, today_df):
    # compare the ids from both datasets -> where a new player appears today, make sure you add that row to yesterday with nulls filling it
    #                                    -> where a player is removed today, make sure you remove that row yesterday 
    
    # Get the set of row identifiers from yesterday and today
    yesterday_ids = set(yesterday_df.player_ids)
    today_ids = set(today_df.player_ids)

    # Find new rows added today
    new_rows = today_ids - yesterday_ids

    # Add new rows with null values to yesterday's dataframe
    if new_rows:
        new_rows_data = today_df[today_df.player_ids.isin(new_rows)]
        yesterday_df = pandas.concat([yesterday_df, new_rows_data], ignore_index=True)

    # Find rows removed today
    removed_rows = yesterday_ids - today_ids

    # Remove rows from yesterday's dataframe
    if removed_rows:
        yesterday_df = yesterday_df[~yesterday_df.player_ids.isin(removed_rows)]
        
    # rename index
    today_df.index.rename('ids', inplace=True)
    yesterday_df.index.rename('ids', inplace=True)

    # sort values
    yesterday_df = yesterday_df.sort_values(by='player_ids')
    today_df = today_df.sort_values(by='player_ids')

    # reset indexes
    today_df.set_index('player_ids', inplace=True)
    yesterday_df.set_index('player_ids', inplace=True)

    return yesterday_df, today_df


In [175]:
#update yesterday's dataframe with today's results 

#get yesterday's saved results 
yesterday_filepath = Path('/Users/thomasribaroff/Documents/GitHub/FullPipeline-FPLPricePredictor/feature-engineering-pipeline/Saved_Data/{}/{}.csv'.format(yesterday,yesterday))
yesterday_data = pandas.read_csv(yesterday_filepath)

# #ensure rows align, accounting for players being added/removed overnight
yesterday_data, today_data = update_yesterday_data_rows_align(yesterday_data, today_data)

# #updates 
yesterday_data.price_change_this_night = today_data.player_prices_today - yesterday_data.player_prices_today
today_data.net_transfers_in_out_this_day = today_data.net_transfers_in_out_overall_as_of_today - yesterday_data.net_transfers_in_out_overall_as_of_today
today_data.net_transfers_in_out_since_last_price_change = yesterday_data.net_transfers_in_out_since_last_price_change

# Update net transfer since last price change in today's data 
yesterday_data.net_transfers_in_out_since_last_price_change = yesterday_data.net_transfers_in_out_since_last_price_change + yesterday_data.net_transfers_in_out_this_day

#reset rule if price changed overnight 
boolean_player_changes = numpy.squeeze(yesterday_data.price_change_this_night != 0)
today_data.net_transfers_in_out_since_last_price_change = [today_data.net_transfers_in_out_this_day[i] if boolean_player_changes[i] else (today_data.net_transfers_in_out_since_last_price_change[i] + today_data.net_transfers_in_out_this_day[i]) for i in today_data.net_transfers_in_out_since_last_price_change.index]

# #rewrite yesterday's file 
yesterday_data.to_csv(yesterday_filepath, index=False) #index=False?

# #rewrite today's file 
today_filepath = Path('/Users/thomasribaroff/Documents/GitHub/FullPipeline-FPLPricePredictor/feature-engineering-pipeline/Saved_Data/{}/{}.csv'.format(today,today))
today_data.to_csv(today_filepath) #index=False?

### Prelim Modelling

In [177]:
yesterday_data

Unnamed: 0_level_0,price_change_this_night,net_transfers_in_out_since_last_price_change,net_transfers_in_out_this_day,price_change_so_far_for_this_event,total_active_players_estimate,players_injured,player_prices_today,net_transfers_in_out_overall_as_of_today,player_name
player_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
129,1,132315,33965,0,4993946,False,50,-57459,Dunk
532,-1,-2749,-551,0,4993946,False,45,194668,Emerson
557,1,216469,62852,0,4993946,False,55,426401,Hee Chan
567,1,171480,44007,0,4993946,False,57,404632,Neto
590,-1,-119586,-24296,0,4993946,True,57,322437,Cunha


In [178]:
today_data

Unnamed: 0_level_0,price_change_this_night,net_transfers_in_out_since_last_price_change,net_transfers_in_out_this_day,price_change_so_far_for_this_event,total_active_players_estimate,players_injured,player_prices_today,net_transfers_in_out_overall_as_of_today,player_name
player_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,,-84,-13,0,4995264,False,44,-52986,Balogun
2,,-348,-102,0,4995264,False,39,-9315,Cédric
3,,-31,-17,0,4995264,False,44,-6477,M.Elneny
4,,-25,0,0,4995264,False,54,-7328,Fábio Vieira
5,,147882,46047,1,4995264,False,52,-404365,Gabriel
...,...,...,...,...,...,...,...,...,...
820,,18,6,0,4995264,False,45,23,Harrison
821,,106,43,0,4995264,False,45,113,Umeh-Chibueze
822,,108,48,0,4995264,False,45,124,Blacker
823,,22,5,0,4995264,False,45,27,Arblaster


In [198]:
dummy_data = today_data

In [285]:

dummy_filepath = Path('/Users/thomasribaroff/Documents/GitHub/FullPipeline-FPLPricePredictor/feature-engineering-pipeline/Saved_Data/dummy.csv')
dummy_data.to_csv(dummy_filepath) #index=False?

In [282]:
dummy_data = dummy_data.drop(labels=['net_transfers_in_out_this_day',
       'price_change_so_far_for_this_event ', 'total_active_players_estimate',
       'players_injured', 'player_prices_today',
       'net_transfers_in_out_overall_as_of_today'],axis=1)

In [163]:
# Read in yesterday's data, which should be a completed dataset if preprocessing has worked
yesterday_data = pandas.read_csv(yesterday_filepath)

# Split the data into features (X) and target (y)
X = yesterday_data.drop(['price_change_this_night','net_transfers_in_out_this_day','player_prices_today','net_transfers_in_out_overall_as_of_today', 'player_name'], axis=1)
y = yesterday_data['price_change_this_night']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)

# Fit a rudimentary RF Classifier 
rf = RandomForestClassifier()
rf.fit(X_train, y_train)

# Make Predictions for Test Set
y_pred = rf.predict(X_test)

# Evaluate Predictions
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9939393939393939


In [164]:
y_test

730    0
158    0
26     0
583    0
42     0
      ..
800    0
178    0
67     0
812    0
117    0
Name: price_change_this_night, Length: 165, dtype: int64

In [1]:
y_pred

NameError: name 'y_pred' is not defined

### Personal Team Data

In [16]:
# Request Information on my team

team_id=3402291
current_gameweek=21
base_url = 'https://fantasy.premierleague.com/api/'
r_my_team = requests.get(base_url+'entry/{}/event/{}/picks/'.format(team_id,current_gameweek)).json()
players_this_week = pandas.DataFrame(r_my_team['picks'])
players_this_week


Unnamed: 0,element,position,multiplier,is_captain,is_vice_captain
0,524,1,1,False,False
1,506,2,1,False,False
2,5,3,1,False,False
3,203,4,1,False,False
4,509,5,1,False,False
5,19,6,1,False,True
6,412,7,1,False,False
7,362,8,2,True,False
8,33,9,1,False,False
9,85,10,1,False,False


In [23]:

# Get a list of player IDs from team ID supplied 
players_this_week = pandas.DataFrame(r_my_team['picks'])

# Get list of all player IDs
overall_events_data = pandas.DataFrame(r_all_players_today['events'])
todays_player_data = pandas.DataFrame(r_all_players_today['elements'])
total_players = r_all_players_today['total_players']

# Makes a list of all players that is TRUE if player is in team ID supplied
boolean_list_of_players_in_my_team = [player in players_this_week.element for player in todays_player_data.id]
boolean_list_of_players_in_my_team

[True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 Fals

In [30]:
players_this_week.element.values

array([524, 506,   5, 203, 509,  19, 412, 362,  33,  85,  60, 409, 178,
       308, 290])

In [43]:
player_names = todays_player_data[[value in players_this_week.element.values for value in todays_player_data.id]]['web_name']


In [45]:
type(player_names)

pandas.core.series.Series