In [2]:
#import relevant libraries

import requests
import json
from pprint import pprint
import operator
import numpy
import pandas 

# from datetime module
from datetime import date
from datetime import timedelta
from pathlib import Path  

# My Team info
team_id = 3402291


# Modelling
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from scipy.stats import randint

# Tree Visualisation
from sklearn.tree import export_graphviz
from IPython.display import Image
import graphviz


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas


ModuleNotFoundError: No module named 'graphviz'

### Overall Player Transfer Data

In [143]:
# Request Information on all players today (now?)
base_url = 'https://fantasy.premierleague.com/api/'
r_all_players_today = requests.get(base_url+'bootstrap-static/').json()
pprint(r_all_players_today, indent=2, depth=1, compact=True)

{ 'element_stats': [...],
  'element_types': [...],
  'elements': [...],
  'events': [...],
  'game_settings': {...},
  'phases': [...],
  'teams': [...],
  'total_players': 10609675}


In [144]:
overall_events_data = pandas.DataFrame(r_all_players_today['events'])
todays_player_data = pandas.DataFrame(r_all_players_today['elements'])
total_players = r_all_players_today['total_players']

In [145]:
# Total Active players in the game at this point in time (estimate)
current_gameweek = numpy.where(overall_events_data.is_current)[0][0] + 1
total_active_players_estimate = int(round(total_players*(0.97)**current_gameweek,))

In [146]:
# Net transfers of players at this moment 

all_players_transfers_in = todays_player_data.transfers_in
all_players_transfers_out = todays_player_data.transfers_out
net_transfers_overall_today = all_players_transfers_in - all_players_transfers_out

In [147]:
# Price Change so far this event 

all_players_price_change_for_current_event = todays_player_data.cost_change_event

In [148]:
# Players prices today

all_players_prices_today = todays_player_data.now_cost

In [149]:
# Player currently flagged red

list_of_all_players_status = todays_player_data.status
boolean_list_of_players_injured = [True if x == 'i' else False for x in list_of_all_players_status]

In [150]:
# Create DataFrame from dictionary of all this transfer data

price_change_dict = {
    "price_change_this_night": 0, #tomorrow, we define this as today_data.player_prices_today - yesterday_data.player_prices_today
    "net_transfers_in_out_since_last_price_change": 0, #tomorrow, we define this as yesterday_data.net_transfers_in_out_since_last_price_change + today_data_net_transfers_in_out_since_yesterday, unless price change occurs, then we reset to ="Net Transfers In/Out since yesterday
    "net_transfers_in_out_since_yesterday": 0, #tomorrow, we define this as today_data.net_transfers_in_out_overall_as_of_today - yesterday_data.net_transfers_in_out_overall_as_of_today
    "price_change_so_far_for_this_event ": all_players_price_change_for_current_event,
    "total_active_players_estimate": total_active_players_estimate,
    "players_injured": boolean_list_of_players_injured,
    "player_prices_today" : all_players_prices_today,
    "net_transfers_in_out_overall_as_of_today" : net_transfers_overall_today, #not to be used in modelling
    "player_ids" : numpy.array(range(1,len(boolean_list_of_players_injured)+1))
}

# Dictionary into DataFrame
today_data = pandas.DataFrame(price_change_dict)
today_data.set_index("player_ids", inplace = True)

#remove bottom rows of new players, just for now, to keep consistency 
today_data = today_data[:789]

# Player transfer data is updated at least every 40 minutes, and probably every 30 or even every 15 minutes
# This dataframe is a snapshot at 8pm GMT Jan 29th

In [4]:
# Get today's date
today = date.today() + timedelta(days = 1) #REMEMBER TO ADD A DAY WHEN YOU DO ANALYSIS TO ACCOUNT FOR GMT
 
# Get yesterday date
yesterday = today - timedelta(days = 1) 

print("Today is:", today)
print("Yesterday was:", yesterday)

Today is: 2024-01-30
Yesterday was: 2024-01-29


In [136]:
#update yesterday's dataframe with today's results 

#get yesterday's saved results 
yesterday_filepath = Path('/Users/thomasribaroff/Documents/GitHub/FullPipeline-FPLPricePredictor/Pipeline-Steps/Saved_DataFrames/{}/{}.csv'.format(yesterday,yesterday))
yesterday_data = pandas.read_csv(yesterday_filepath,index_col=['player_ids'])

#updates 
yesterday_data.price_change_this_night = today_data.player_prices_today - yesterday_data.player_prices_today
yesterday_data.net_transfers_in_out_since_yesterday = today_data.net_transfers_in_out_overall_as_of_today - yesterday_data.net_transfers_in_out_overall_as_of_today


# Update net transfer since last price change in today's data (including reset rule if price change occured overnight)
today_data.net_transfers_in_out_since_last_price_change = yesterday_data.net_transfers_in_out_since_last_price_change + yesterday_data.net_transfers_in_out_since_yesterday
boolean_player_changes = [today_data.price_change_this_night != 0]
today_data.net_transfers_in_out_since_last_price_change = [0 if b else a for a, b in zip(today_data.net_transfers_in_out_since_last_price_change, boolean_player_changes[0])]

#rewrite yesterday's file 
yesterday_data.to_csv(yesterday_filepath) 

In [152]:
#save today's dataframe to today's folder

today_filepath = Path('/Users/thomasribaroff/Documents/GitHub/FullPipeline-FPLPricePredictor/Pipeline-Steps/Saved_DataFrames/{}'.format(today))
today_filepath.mkdir(parents=True, exist_ok=True)
today_data.to_csv('{}/{}.csv'.format(today_filepath,today)) 

In [153]:
today_data

Unnamed: 0_level_0,price_change_this_night,net_transfers_in_out_since_last_price_change,net_transfers_in_out_since_yesterday,price_change_so_far_for_this_event,total_active_players_estimate,players_injured,player_prices_today,net_transfers_in_out_overall_as_of_today
player_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,0,0,0,0,5596397,False,44,-52476
2,0,0,0,0,5596397,False,39,-7611
3,0,0,0,0,5596397,False,44,-6304
4,0,0,0,0,5596397,False,54,-7149
5,0,0,0,0,5596397,False,50,-1055602
...,...,...,...,...,...,...,...,...
785,0,0,0,0,5596397,False,44,134
786,0,0,0,0,5596397,False,40,828
787,0,0,0,0,5596397,False,45,29
788,0,0,0,0,5596397,False,45,175


### Prelim Modelling

In [None]:
# Read in yesterday's data, which should be a completed dataset if preprocessing has worked
yesterday_data = pandas.read_csv(yesterday_filepath,index_col=['player_ids'])

# Split the data into features (X) and target (y)
X = yesterday_data.drop('price_change_this_night', axis=1)
y = bank_data['price_change_this_night']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Fit a rudimentary RF Classifier 
rf = RandomForestClassifier()
rf.fit(X_train, y_train)

# Make Predictions for Test Set
y_pred = rf.predict(X_test)

# Evaluate Predictions
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

### Personal Team Data

In [None]:
# Request Information on my team

r_my_team = requests.get(base_url+'entry/{}/event/{}/picks/'.format(team_id,current_gameweek)).json()
my_players_ids = list(map(operator.itemgetter('element'), r_my_team['picks']))
my_players_ids

# Get list of all player IDs
total_list_of_players_ids = list(map(operator.itemgetter('id'), r_all_players_today['elements']))

# Makes a list of all players that is TRUE is player is in my team
boolean_list_of_players_in_my_team = [player in my_players_ids for player in total_list_of_players_ids]

# Use boolean list to exact all info about my players this week
my_players_total = [item for item, condition in zip(r_all_players_today['elements'], boolean_list_of_players_in_my_team) if condition]
my_players_total

# Print all my player's names
for players in my_players_total:
    print(players.get('web_name'))