<a href="https://colab.research.google.com/github/ruus77/FPL-Points-Predictions/blob/main/FPL_points_prediction_data_importing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np
from typing import List
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import OneHotEncoder

import os
from google.colab import userdata



In [None]:
repo_url = "https://github.com/olbauday/FPL-Core-Insights"
repo_name = "FPL-Core-Insights"

if not os.path.exists(repo_name):
    !git clone {repo_url}
else:
    %cd {repo_name}
    !git pull
    %cd ..

/content/FPL-Core-Insights
Already up to date.
/content


In [None]:
def data_import(seasons:List[str])->pd.DataFrame:
  gws = np.arange(1, 39)
  dfs24 = []
  dfs25 = []
  for gw in gws:
    for season in seasons:
      if season == "2024-2025":
        path = f"/content/FPL-Core-Insights/data/{season}/playermatchstats/GW{gw}/playermatchstats.csv"
        frame = pd.read_csv(path)
        frame = frame.assign(season=season,
                  gw=gw)
        dfs24.append(frame)
      else:
        try:
          path = f"/content/FPL-Core-Insights/data/{season}/By Gameweek/GW{gw}/playermatchstats.csv"
          frame = pd.read_csv(path)
          frame = frame.assign(season=season,
                  gw=gw)
          dfs25.append(frame)
        except Exception as e:
          print(f"path: {path} | exception: {e}")
  df24 = pd.concat(dfs24, axis=0)
  df25 = pd.concat(dfs25, axis=0)
  common_cols = [c for c in df24.columns if c in df25.columns]

  return pd.concat([df24, df25[common_cols]], axis=0, ignore_index=True)




In [None]:
season_list = ["2024-2025", "2025-2026"]
playermatchstats = data_import(seasons=season_list)

In [None]:
playermatchstats.isna().sum().sum()

np.int64(0)

In [None]:
playermatchstats.shape

(21156, 56)

In [None]:
playermatchstats.columns

Index(['player_id', 'match_id', 'minutes_played', 'goals', 'assists',
       'total_shots', 'xg', 'xa', 'xgot', 'shots_on_target',
       'successful_dribbles', 'big_chances_missed', 'touches_opposition_box',
       'touches', 'accurate_passes', 'chances_created', 'final_third_passes',
       'accurate_crosses', 'accurate_long_balls', 'tackles_won',
       'interceptions', 'recoveries', 'blocks', 'clearances',
       'headed_clearances', 'dribbled_past', 'duels_won', 'duels_lost',
       'ground_duels_won', 'aerial_duels_won', 'was_fouled', 'fouls_committed',
       'saves', 'goals_conceded', 'xgot_faced', 'goals_prevented',
       'sweeper_actions', 'gk_accurate_passes', 'gk_accurate_long_balls',
       'offsides', 'high_claim', 'tackles', 'accurate_passes_percent',
       'accurate_crosses_percent', 'accurate_long_balls_percent',
       'ground_duels_won_percent', 'aerial_duels_won_percent',
       'successful_dribbles_percent', 'tackles_won_percent', 'start_min',
       'finish_min'

In [None]:
playermatchstats.head()

Unnamed: 0,player_id,match_id,minutes_played,goals,assists,total_shots,xg,xa,xgot,shots_on_target,successful_dribbles,big_chances_missed,touches_opposition_box,touches,accurate_passes,chances_created,final_third_passes,accurate_crosses,accurate_long_balls,tackles_won,interceptions,recoveries,blocks,clearances,headed_clearances,dribbled_past,duels_won,duels_lost,ground_duels_won,aerial_duels_won,was_fouled,fouls_committed,saves,goals_conceded,xgot_faced,goals_prevented,sweeper_actions,gk_accurate_passes,gk_accurate_long_balls,offsides,high_claim,tackles,accurate_passes_percent,accurate_crosses_percent,accurate_long_balls_percent,ground_duels_won_percent,aerial_duels_won_percent,successful_dribbles_percent,tackles_won_percent,start_min,finish_min,team_goals_conceded,penalties_scored,penalties_missed,season,gw
0,17,24-25-prem-arsenal-vs-wolverhampton-wanderers,80,1,1,5,0.35,0.37,0.5,3,0,1,15,44,18,5.0,1,2,0,2,0,4,0,1,0,0,4,5,4,0,2,2,0,0,0.0,0.0,0,0,0,0,0,2,90.0,40.0,0.0,44.0,0.0,0.0,100.0,0,80,0,0,0,2024-2025,1
1,4,24-25-prem-arsenal-vs-wolverhampton-wanderers,90,1,1,5,0.45,0.04,0.73,1,1,0,10,44,14,2.0,2,0,1,1,1,1,0,2,0,2,8,11,5,3,3,2,0,0,0.0,0.0,0,0,0,0,0,1,78.0,0.0,100.0,45.0,38.0,33.0,100.0,0,90,0,0,0,2024-2025,1
2,15,24-25-prem-arsenal-vs-wolverhampton-wanderers,90,0,0,0,0.0,0.0,0.0,0,0,0,0,41,23,0.0,6,0,5,0,0,8,0,0,0,0,0,0,0,0,0,0,3,0,0.19,0.19,0,23,5,0,1,0,72.0,0.0,36.0,0.0,0.0,0.0,0.0,0,90,0,0,0,2024-2025,1
3,20,24-25-prem-arsenal-vs-wolverhampton-wanderers,90,0,0,1,0.06,0.06,0.0,0,0,0,0,50,33,4.0,1,0,0,2,0,3,0,3,0,0,6,4,6,0,3,3,0,0,0.0,0.0,0,0,0,0,0,3,89.0,0.0,0.0,60.0,0.0,0.0,67.0,0,90,0,0,0,2024-2025,1
4,9,24-25-prem-arsenal-vs-wolverhampton-wanderers,90,0,0,1,0.08,0.19,0.0,0,2,0,11,36,17,3.0,0,1,0,1,0,3,0,0,0,2,9,6,7,2,3,1,0,0,0.0,0.0,0,0,0,0,0,2,81.0,50.0,0.0,58.0,67.0,50.0,50.0,0,90,0,0,0,2024-2025,1


In [None]:
gws = np.arange(1, 39)
dfs25_26 = []
for gw in gws:
  path = f"/content/FPL-Core-Insights/data/2025-2026/By Gameweek/GW{gw}/playerstats.csv"
  frame = pd.read_csv(path)
  frame = frame.assign(season="2025-2026",
                  gw=gw)
  dfs25_26.append(frame)


In [None]:
playerstats_2425 = pd.read_csv("/content/FPL-Core-Insights/data/2024-2025/playerstats/playerstats.csv").assign(season="2024-2025")
playerstats_2526 = pd.concat(dfs25_26, axis=0, ignore_index=True)
len(playerstats_2425.columns), len(playerstats_2526.columns)

(59, 88)

In [None]:
playerstats = pd.concat([playerstats_2425, playerstats_2526[playerstats_2425.columns]], axis=0, ignore_index=True)[playerstats_2425.columns]

In [None]:
playerstats.head()

Unnamed: 0,id,status,chance_of_playing_next_round,chance_of_playing_this_round,now_cost,now_cost_rank,now_cost_rank_type,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,selected_by_percent,selected_rank,selected_rank_type,total_points,event_points,points_per_game,points_per_game_rank,points_per_game_rank_type,bonus,bps,form,form_rank,form_rank_type,value_form,value_season,dreamteam_count,transfers_in,transfers_in_event,transfers_out,transfers_out_event,ep_next,ep_this,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,expected_goals_per_90,expected_assists_per_90,expected_goal_involvements_per_90,expected_goals_conceded_per_90,influence,influence_rank,influence_rank_type,creativity,creativity_rank,creativity_rank_type,threat,threat_rank,threat_rank_type,ict_index,ict_index_rank,ict_index_rank_type,corners_and_indirect_freekicks_order,direct_freekicks_order,penalties_order,gw,set_piece_threat,season
0,5,u,0.0,0.0,4.0,645,55.0,0,0.0,0,0.0,0.0,602,69.0,0,0,0.0,573.0,63.0,0,0,0.0,464.0,53.0,0.0,0.0,0.0,0,0,4399,20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,566.0,63.0,0.0,549.0,45.0,0.0,512.0,31.0,0.0,566.0,63.0,,,,23,,2024-2025
1,6,a,100.0,100.0,5.6,100,10.0,0,0.0,1,-1.0,4.4,83,30.0,72,5,3.6,83.0,18.0,5,274,2.0,178.0,56.0,0.4,12.9,2.0,1220019,9591,879735,49584,2.0,2.5,0.71,1.01,1.72,14.7,0.04,0.05,0.09,0.79,270.2,168.0,62.0,220.1,122.0,25.0,172.0,130.0,14.0,66.0,136.0,35.0,,,,23,,2024-2025
2,7,a,100.0,100.0,4.7,323,209.0,0,0.0,-3,3.0,0.2,342,109.0,13,0,1.3,372.0,173.0,0,82,0.5,328.0,155.0,0.1,2.8,0.0,28956,538,46162,674,0.5,1.0,0.04,0.47,0.51,4.27,0.01,0.09,0.1,0.84,35.8,398.0,179.0,64.7,280.0,171.0,3.0,421.0,221.0,10.4,384.0,187.0,,,,23,,2024-2025
3,8,a,,,4.8,314,40.0,0,0.0,-2,2.0,0.2,383,132.0,13,0,1.6,327.0,106.0,0,39,0.0,685.0,241.0,0.0,2.7,0.0,25166,184,35333,549,0.0,0.5,0.0,0.03,0.03,4.45,0.0,0.01,0.01,1.24,49.2,383.0,133.0,13.7,392.0,135.0,0.0,707.0,249.0,6.3,415.0,139.0,,,,23,,2024-2025
4,9,a,100.0,100.0,6.7,38,19.0,0,0.0,-3,3.0,3.4,97,29.0,84,3,4.0,56.0,29.0,6,268,3.0,119.0,51.0,0.4,12.5,1.0,1099050,10248,1081194,41041,3.0,3.5,3.87,2.9,6.77,13.94,0.26,0.19,0.45,0.92,366.2,104.0,38.0,334.6,65.0,49.0,547.0,26.0,14.0,124.7,37.0,25.0,,,,23,,2024-2025


In [None]:
playerstats.shape

(47695, 59)

In [None]:
players_2425  = pd.read_csv("/content/FPL-Core-Insights/data/2024-2025/players/players.csv").assign(season="2024-2025")
players_2526  = pd.read_csv("/content/FPL-Core-Insights/data/2025-2026/players.csv").assign(season="2025-2026")
len(players_2425.columns), len(players_2526.columns)

(8, 8)

In [None]:
players = pd.concat([players_2425, players_2526], axis=0, ignore_index=True)


In [None]:
players.head()

Unnamed: 0,player_code,player_id,first_name,second_name,web_name,team_code,position,season
0,438098,1,F치bio,Ferreira Vieira,F치bio Vieira,3,Midfielder,2024-2025
1,100051017,735,Mikel,Arteta,Arteta,3,Unknown,2024-2025
2,616059,756,Jack,Porter,Porter,3,Goalkeeper,2024-2025
3,538182,790,Jimi,Gower,Gower,3,Midfielder,2024-2025
4,514307,793,Jack,Henry-Francis,Jack Henry-Francis,3,Midfielder,2024-2025


In [None]:
players.columns

Index(['player_code', 'player_id', 'first_name', 'second_name', 'web_name',
       'team_code', 'position', 'season'],
      dtype='object')

In [None]:
gws = np.arange(1, 39)
dfs = []


for gw in gws:
    try:
        path = f"/content/FPL-Core-Insights/data/2024-2025/matches/GW{gw}/matches.csv"
        frame = pd.read_csv(path)
        frame = frame.assign(season="2024-2025", gw=gw)
        dfs.append(frame)
        cols2425 = frame.columns
    except FileNotFoundError:
        pass

    try:
        path = f"/content/FPL-Core-Insights/data/2025-2026/By Gameweek/GW{gw}/matches.csv"
        frame = pd.read_csv(path)
        frame = frame.assign(season="2025-2026", gw=gw)
        dfs.append(frame)
        cols2526 = frame.columns
    except FileNotFoundError:
        pass

In [None]:
len(cols2526), len(cols2425)

(117, 104)

In [None]:
matches = pd.concat(dfs, axis=0, ignore_index=True)[cols2425]

In [None]:
matches.head()

Unnamed: 0,gameweek,kickoff_time,home_team,home_team_elo,home_score,away_score,away_team,away_team_elo,finished,match_id,match_url,home_possession,away_possession,home_expected_goals_xg,away_expected_goals_xg,home_total_shots,away_total_shots,home_shots_on_target,away_shots_on_target,home_big_chances,away_big_chances,home_big_chances_missed,away_big_chances_missed,home_accurate_passes,away_accurate_passes,home_accurate_passes_pct,away_accurate_passes_pct,home_fouls_committed,away_fouls_committed,home_corners,away_corners,home_xg_open_play,away_xg_open_play,home_xg_set_play,away_xg_set_play,home_non_penalty_xg,away_non_penalty_xg,home_xg_on_target_xgot,away_xg_on_target_xgot,home_shots_off_target,away_shots_off_target,home_blocked_shots,away_blocked_shots,home_hit_woodwork,away_hit_woodwork,home_shots_inside_box,away_shots_inside_box,home_shots_outside_box,away_shots_outside_box,home_passes,away_passes,home_own_half,away_own_half,home_opposition_half,away_opposition_half,home_accurate_long_balls,away_accurate_long_balls,home_accurate_long_balls_pct,away_accurate_long_balls_pct,home_accurate_crosses,away_accurate_crosses,home_accurate_crosses_pct,away_accurate_crosses_pct,home_throws,away_throws,home_touches_in_opposition_box,away_touches_in_opposition_box,home_offsides,away_offsides,home_yellow_cards,away_yellow_cards,home_red_cards,away_red_cards,home_tackles_won,away_tackles_won,home_tackles_won_pct,away_tackles_won_pct,home_interceptions,away_interceptions,home_blocks,away_blocks,home_clearances,away_clearances,home_keeper_saves,away_keeper_saves,home_duels_won,away_duels_won,home_ground_duels_won,away_ground_duels_won,home_ground_duels_won_pct,away_ground_duels_won_pct,home_aerial_duels_won,away_aerial_duels_won,home_aerial_duels_won_pct,away_aerial_duels_won_pct,home_successful_dribbles,away_successful_dribbles,home_successful_dribbles_pct,away_successful_dribbles_pct,fotmob_id,stats_processed,player_stats_processed,season,gw
0,1.0,2024-08-17 14:00:00,3.0,1946.9,2.0,0.0,39.0,1677.86,True,24-25-prem-arsenal-vs-wolverhampton-wanderers,/matches/wolverhampton-wanderers-vs-arsenal/2t...,53.0,47.0,1.24,0.47,18.0,9.0,6.0,3.0,2.0,1.0,1.0,1.0,357.0,307.0,85.0,82.0,17.0,14.0,8.0,2.0,0.97,0.4,0.26,0.07,1.24,0.47,1.7,0.19,6.0,5.0,6.0,1.0,0.0,0.0,12.0,6.0,6.0,3.0,420.0,375.0,188.0,160.0,169.0,147.0,17.0,20.0,46.0,41.0,3.0,4.0,20.0,29.0,18.0,15.0,50.0,18.0,0.0,1.0,2.0,2.0,0.0,0.0,13.0,12.0,72.0,63.0,8.0,7.0,1.0,6.0,15.0,14.0,3.0,4.0,58.0,57.0,41.0,44.0,48.0,52.0,17.0,13.0,57.0,43.0,9.0,8.0,47.0,50.0,4506265.0,True,False,2024-2025,1
1,1.0,2024-08-18 13:00:00,94.0,1711.08,2.0,1.0,31.0,1759.71,True,24-25-prem-brentford-vs-crystal-palace,/matches/crystal-palace-vs-brentford/38a4jj#45...,46.0,54.0,1.59,1.17,9.0,14.0,5.0,6.0,3.0,1.0,2.0,1.0,311.0,389.0,77.0,81.0,6.0,15.0,4.0,7.0,1.54,1.01,0.05,0.16,1.59,1.17,3.11,1.02,3.0,4.0,1.0,4.0,0.0,0.0,8.0,6.0,1.0,8.0,403.0,481.0,198.0,153.0,113.0,236.0,35.0,16.0,56.0,30.0,4.0,1.0,24.0,7.0,14.0,18.0,13.0,21.0,1.0,3.0,1.0,5.0,0.0,0.0,10.0,13.0,53.0,59.0,11.0,5.0,4.0,1.0,21.0,29.0,6.0,3.0,58.0,45.0,39.0,31.0,56.0,44.0,19.0,14.0,58.0,42.0,5.0,3.0,38.0,18.0,4506270.0,True,False,2024-2025,1
2,1.0,2024-08-18 15:30:00,8.0,1810.12,0.0,2.0,43.0,2050.57,True,24-25-prem-chelsea-vs-manchester-city,/matches/chelsea-vs-manchester-city/2d55kw#450...,48.0,52.0,1.01,0.77,10.0,11.0,3.0,5.0,1.0,1.0,1.0,0.0,436.0,477.0,89.0,89.0,12.0,9.0,4.0,3.0,0.97,0.74,0.04,0.03,1.01,0.77,0.7,0.82,3.0,3.0,4.0,3.0,0.0,0.0,6.0,6.0,4.0,5.0,490.0,535.0,215.0,267.0,221.0,210.0,17.0,17.0,49.0,49.0,6.0,1.0,33.0,9.0,17.0,18.0,26.0,27.0,2.0,1.0,1.0,1.0,0.0,0.0,14.0,9.0,70.0,56.0,10.0,10.0,3.0,4.0,13.0,17.0,3.0,3.0,36.0,48.0,33.0,43.0,43.0,57.0,3.0,5.0,38.0,63.0,4.0,15.0,36.0,65.0,4506271.0,True,False,2024-2025,1
3,1.0,2024-08-17 14:00:00,11.0,1706.85,0.0,3.0,36.0,1713.16,True,24-25-prem-everton-vs-brighton-&-hove-albion,/matches/everton-vs-brighton-hove-albion/2y16f...,38.0,62.0,0.45,1.43,9.0,10.0,1.0,5.0,0.0,2.0,0.0,1.0,266.0,492.0,77.0,86.0,8.0,8.0,1.0,5.0,0.45,1.32,0.01,0.11,0.45,1.43,0.05,2.37,4.0,3.0,4.0,2.0,0.0,1.0,6.0,9.0,3.0,1.0,344.0,575.0,153.0,330.0,113.0,162.0,45.0,20.0,55.0,47.0,4.0,4.0,24.0,29.0,27.0,14.0,18.0,29.0,7.0,1.0,1.0,1.0,1.0,0.0,11.0,13.0,48.0,62.0,13.0,8.0,2.0,4.0,11.0,19.0,2.0,1.0,54.0,48.0,36.0,35.0,51.0,49.0,18.0,13.0,58.0,42.0,5.0,6.0,31.0,33.0,4506266.0,True,False,2024-2025,1
4,1.0,2024-08-17 11:30:00,40.0,1568.33,0.0,2.0,14.0,1900.69,True,24-25-prem-ipswich-town-vs-liverpool,/matches/liverpool-vs-ipswich-town/2ugv0q#4506264,38.0,62.0,0.45,2.65,7.0,18.0,2.0,5.0,0.0,6.0,0.0,4.0,272.0,492.0,78.0,86.0,9.0,18.0,2.0,10.0,0.17,2.59,0.28,0.06,0.45,2.65,0.24,2.35,2.0,6.0,3.0,7.0,0.0,0.0,5.0,12.0,2.0,6.0,347.0,570.0,173.0,281.0,99.0,211.0,19.0,30.0,40.0,54.0,2.0,4.0,25.0,17.0,14.0,24.0,17.0,50.0,5.0,0.0,3.0,1.0,0.0,0.0,10.0,4.0,50.0,44.0,9.0,5.0,6.0,2.0,28.0,16.0,3.0,2.0,51.0,46.0,40.0,28.0,59.0,41.0,11.0,18.0,38.0,62.0,3.0,11.0,38.0,50.0,4506264.0,True,False,2024-2025,1


In [None]:
matches.shape

(872, 104)

In [None]:
cols_to_drop = [c for c in matches.columns if "away" in c or "home" in c]

matches.drop(columns=cols_to_drop, inplace=True)

In [None]:
players.head()

Unnamed: 0,player_code,player_id,first_name,second_name,web_name,team_code,position,season
0,438098,1,F치bio,Ferreira Vieira,F치bio Vieira,3,Midfielder,2024-2025
1,100051017,735,Mikel,Arteta,Arteta,3,Unknown,2024-2025
2,616059,756,Jack,Porter,Porter,3,Goalkeeper,2024-2025
3,538182,790,Jimi,Gower,Gower,3,Midfielder,2024-2025
4,514307,793,Jack,Henry-Francis,Jack Henry-Francis,3,Midfielder,2024-2025


In [None]:
teams2425 = pd.read_csv("/content/FPL-Core-Insights/data/2024-2025/teams/teams.csv")
teams2425.head()

Unnamed: 0,code,id,name,short_name,strength,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,pulse_id,elo
0,3,1,Arsenal,ARS,5,1350,1350,1390,1400,1310,1300,1,1991
1,7,2,Aston Villa,AVL,3,1145,1240,1130,1180,1160,1300,2,1870
2,91,3,Bournemouth,BOU,3,1170,1200,1120,1180,1220,1220,127,1806
3,94,4,Brentford,BRE,3,1130,1180,1100,1100,1160,1260,130,1809
4,36,5,Brighton,BHA,3,1140,1165,1090,1140,1190,1190,131,1825


In [None]:
teams2526 = pd.read_csv("/content/FPL-Core-Insights/data/2025-2026/teams.csv")
teams2526.head()

Unnamed: 0,code,id,name,short_name,strength,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,pulse_id,elo,fotmob_name
0,3,1,Arsenal,ARS,5,1300,1375,1340,1400,1260,1350,1,2065,Arsenal
1,7,2,Aston Villa,AVL,3,1145,1185,1150,1170,1140,1200,2,1929,Aston Villa
2,90,3,Burnley,BUR,3,1055,1095,1010,1090,1100,1100,43,1685,Burnley
3,91,4,Bournemouth,BOU,3,1150,1220,1100,1240,1200,1200,127,1835,AFC Bournemouth
4,94,5,Brentford,BRE,3,1135,1175,1100,1110,1170,1240,130,1853,Brentford


In [None]:
len(teams2425.columns), len(teams2526.columns)

(13, 14)

In [None]:
teams = pd.concat([teams2425, teams2526], axis=0)[teams2425.columns]

In [None]:
teams.head()

Unnamed: 0,code,id,name,short_name,strength,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,pulse_id,elo
0,3,1,Arsenal,ARS,5,1350,1350,1390,1400,1310,1300,1,1991
1,7,2,Aston Villa,AVL,3,1145,1240,1130,1180,1160,1300,2,1870
2,91,3,Bournemouth,BOU,3,1170,1200,1120,1180,1220,1220,127,1806
3,94,4,Brentford,BRE,3,1130,1180,1100,1100,1160,1260,130,1809
4,36,5,Brighton,BHA,3,1140,1165,1090,1140,1190,1190,131,1825


In [None]:
teams_map = dict(zip(teams["id"], teams["name"]))
teams_map

{1: 'Arsenal',
 2: 'Aston Villa',
 3: 'Burnley',
 4: 'Bournemouth',
 5: 'Brentford',
 6: 'Brighton',
 7: 'Chelsea',
 8: 'Crystal Palace',
 9: 'Everton',
 10: 'Fulham',
 11: 'Leeds',
 12: 'Liverpool',
 13: 'Man City',
 14: 'Man Utd',
 15: 'Newcastle',
 16: "Nott'm Forest",
 17: 'Sunderland',
 18: 'Spurs',
 19: 'West Ham',
 20: 'Wolves'}

In [None]:
data_step1 = pd.merge(
    playermatchstats,
    matches,
    left_on=['match_id', "season", "gw"],
    right_on=['match_id', "season", "gw"],
    how='left'
)

data = pd.merge(
    data_step1,
    playerstats,
    left_on=['player_id', 'season', 'gw'],
    right_on=['id', 'season', 'gw'],
    how='left'
).drop(columns=["id"])


In [None]:
player_names = dict(zip(players["player_id"], players["web_name"]))
positions_map = dict(zip(players["player_id"], players["position"]))


In [None]:
data["player"] = data.player_id.map(player_names)
data["position"] = data.player_id.map(positions_map)


In [None]:
data.head()

Unnamed: 0,player_id,match_id,minutes_played,goals,assists,total_shots,xg,xa,xgot,shots_on_target,successful_dribbles,big_chances_missed,touches_opposition_box,touches,accurate_passes,chances_created,final_third_passes,accurate_crosses,accurate_long_balls,tackles_won,interceptions,recoveries,blocks,clearances,headed_clearances,dribbled_past,duels_won,duels_lost,ground_duels_won,aerial_duels_won,was_fouled,fouls_committed,saves,goals_conceded,xgot_faced,goals_prevented,sweeper_actions,gk_accurate_passes,gk_accurate_long_balls,offsides,high_claim,tackles,accurate_passes_percent,accurate_crosses_percent,accurate_long_balls_percent,ground_duels_won_percent,aerial_duels_won_percent,successful_dribbles_percent,tackles_won_percent,start_min,finish_min,team_goals_conceded,penalties_scored,penalties_missed,season,gw,gameweek,kickoff_time,finished,match_url,fotmob_id,stats_processed,player_stats_processed,status,chance_of_playing_next_round,chance_of_playing_this_round,now_cost,now_cost_rank,now_cost_rank_type,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,selected_by_percent,selected_rank,selected_rank_type,total_points,event_points,points_per_game,points_per_game_rank,points_per_game_rank_type,bonus,bps,form,form_rank,form_rank_type,value_form,value_season,dreamteam_count,transfers_in,transfers_in_event,transfers_out,transfers_out_event,ep_next,ep_this,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,expected_goals_per_90,expected_assists_per_90,expected_goal_involvements_per_90,expected_goals_conceded_per_90,influence,influence_rank,influence_rank_type,creativity,creativity_rank,creativity_rank_type,threat,threat_rank,threat_rank_type,ict_index,ict_index_rank,ict_index_rank_type,corners_and_indirect_freekicks_order,direct_freekicks_order,penalties_order,set_piece_threat,player,position
0,17,24-25-prem-arsenal-vs-wolverhampton-wanderers,80,1,1,5,0.35,0.37,0.5,3,0,1,15,44,18,5.0,1,2,0,2,0,4,0,1,0,0,4,5,4,0,2,2,0,0,0.0,0.0,0,0,0,0,0,2,90.0,40.0,0.0,44.0,0.0,0.0,100.0,0,80,0,0,0,2024-2025,1,1.0,2024-08-17 14:00:00,True,/matches/wolverhampton-wanderers-vs-arsenal/2t...,4506265.0,True,False,a,,,10.0,5,4.0,0,0.0,0,0.0,33.0,9,4.0,12,12,12.0,4.0,2.0,2,42,12.0,4.0,2.0,1.2,1.2,1.0,113127,113127,28246,28246,4.3,4.3,0.35,0.37,0.72,0.32,0.4,0.42,0.82,0.36,64.2,1.0,1.0,61.7,2.0,2.0,78.0,2.0,2.0,20.4,1.0,1.0,1.0,,1.0,66.666667,칒degaard,Midfielder
1,4,24-25-prem-arsenal-vs-wolverhampton-wanderers,90,1,1,5,0.45,0.04,0.73,1,1,0,10,44,14,2.0,2,0,1,1,1,1,0,2,0,2,8,11,5,3,3,2,0,0,0.0,0.0,0,0,0,0,0,1,78.0,0.0,100.0,45.0,38.0,33.0,100.0,0,90,0,0,0,2024-2025,1,1.0,2024-08-17 14:00:00,True,/matches/wolverhampton-wanderers-vs-arsenal/2t...,4506265.0,True,False,a,,,8.0,12,4.0,0,0.0,0,0.0,14.8,29,6.0,12,12,12.0,3.0,2.0,3,48,12.0,3.0,2.0,1.5,1.5,1.0,150225,150225,22210,22210,3.5,3.5,0.45,0.04,0.49,0.47,0.45,0.04,0.49,0.47,54.8,5.0,2.0,24.1,37.0,2.0,46.0,9.0,3.0,12.5,5.0,1.0,,,3.0,13.333333,Setford,Goalkeeper
2,15,24-25-prem-arsenal-vs-wolverhampton-wanderers,90,0,0,0,0.0,0.0,0.0,0,0,0,0,41,23,0.0,6,0,5,0,0,8,0,0,0,0,0,0,0,0,0,0,3,0,0.19,0.19,0,23,5,0,1,0,72.0,0.0,36.0,0.0,0.0,0.0,0.0,0,90,0,0,0,2024-2025,1,1.0,2024-08-17 14:00:00,True,/matches/wolverhampton-wanderers-vs-arsenal/2t...,4506265.0,True,False,a,,,5.5,119,4.0,0,0.0,0,0.0,25.1,14,1.0,8,8,8.0,17.0,3.0,1,28,8.0,17.0,3.0,1.5,1.5,0.0,33276,33276,33135,33135,5.0,5.0,0.0,0.0,0.0,0.47,0.0,0.0,0.0,0.47,28.0,38.0,7.0,0.0,574.0,66.0,0.0,559.0,66.0,2.8,131.0,7.0,,,,0.0,Nichols,Defender
3,20,24-25-prem-arsenal-vs-wolverhampton-wanderers,90,0,0,1,0.06,0.06,0.0,0,0,0,0,50,33,4.0,1,0,0,2,0,3,0,3,0,0,6,4,6,0,3,3,0,0,0.0,0.0,0,0,0,0,0,3,89.0,0.0,0.0,60.0,0.0,0.0,67.0,0,90,0,0,0,2024-2025,1,1.0,2024-08-17 14:00:00,True,/matches/wolverhampton-wanderers-vs-arsenal/2t...,4506265.0,True,False,a,,,5.0,206,131.0,0,0.0,0,0.0,0.1,422,151.0,3,3,3.0,71.0,25.0,0,16,3.0,71.0,25.0,0.6,0.6,0.0,2096,2096,901,901,2.6,2.6,0.06,0.06,0.12,0.47,0.06,0.06,0.12,0.47,14.4,111.0,38.0,43.6,8.0,6.0,1.0,191.0,106.0,5.9,42.0,33.0,,,,0.0,Trossard,Midfielder
4,9,24-25-prem-arsenal-vs-wolverhampton-wanderers,90,0,0,1,0.08,0.19,0.0,0,2,0,11,36,17,3.0,0,1,0,1,0,3,0,0,0,2,9,6,7,2,3,1,0,0,0.0,0.0,0,0,0,0,0,2,81.0,50.0,0.0,58.0,67.0,50.0,50.0,0,90,0,0,0,2024-2025,1,1.0,2024-08-17 14:00:00,True,/matches/wolverhampton-wanderers-vs-arsenal/2t...,4506265.0,True,False,a,,,7.0,33,18.0,0,0.0,0,0.0,3.0,104,35.0,3,3,3.0,84.0,38.0,0,16,3.0,84.0,38.0,0.4,0.4,0.0,10245,10245,14140,14140,3.5,3.5,0.08,0.19,0.27,0.47,0.08,0.19,0.27,0.47,13.6,123.0,42.0,36.7,19.0,17.0,25.0,33.0,23.0,7.5,25.0,19.0,,,,0.0,Kiwior,Defender


In [None]:
data.shape

(21156, 121)

In [None]:
def dtypes_conv(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    str_cols = ["match_id", "match_url", "status", "player", "season", "position"]
    df[str_cols] = df[str_cols].astype("string")

    if "kickoff_time" in df.columns:
        df["kickoff_time"] = pd.to_datetime(df["kickoff_time"], format='mixed', dayfirst=False)
        df["kickoff_time"] = df["kickoff_time"]
    numeric_cols = [c for c in df.columns if c not in str_cols and c != "kickoff_time"]

    df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric)

    return df

In [None]:
def sort_data(df:pd.DataFrame)->pd.DataFrame:
  return df.sort_values(by=["player_id", "kickoff_time", "gw", "season"])

In [None]:
def get_competition(match_id):
    if 'champions-league' in match_id:
        return 'Champions League'
    elif 'europa-league' in match_id:
        return 'Europa League'
    elif 'conference-league' in match_id:
        return 'Conference League'
    elif 'prem' in match_id:
        return 'Premier League'
    else:
        return 'Other'

In [None]:
data[["player_id", "kickoff_time", "gw", "season"]].isna().sum()

Unnamed: 0,0
player_id,0
kickoff_time,0
gw,0
season,0


In [None]:
data = dtypes_conv(data)

In [None]:
data.shape

(21156, 121)

In [None]:
data['competition'] = data['match_id'].apply(get_competition)
data = data[data.competition=="Premier League"]
data = data.drop(columns=["competition", "gameweek"])
data.shape

(19312, 120)

In [None]:
data = pd.concat([
    data.select_dtypes(include="string"),
    data.select_dtypes(include="datetime"),
    data.select_dtypes(exclude=["string", "datetime"])], axis=1)

In [None]:
cols_to_drop=["match_id", "match_url", "fotmob_id", "stats_processed", "player_stats_processed"]
data.drop(columns=cols_to_drop, inplace=True)


In [None]:
cols = [c for c in data.columns if c != 'gw']
cols = cols[:5] + ["gw"] + cols[5:]
data = data[cols]

In [None]:
data = sort_data(data)
data.head()

Unnamed: 0,season,status,player,position,kickoff_time,gw,player_id,minutes_played,goals,assists,total_shots,xg,xa,xgot,shots_on_target,successful_dribbles,big_chances_missed,touches_opposition_box,touches,accurate_passes,chances_created,final_third_passes,accurate_crosses,accurate_long_balls,tackles_won,interceptions,recoveries,blocks,clearances,headed_clearances,dribbled_past,duels_won,duels_lost,ground_duels_won,aerial_duels_won,was_fouled,fouls_committed,saves,goals_conceded,xgot_faced,goals_prevented,sweeper_actions,gk_accurate_passes,gk_accurate_long_balls,offsides,high_claim,tackles,accurate_passes_percent,accurate_crosses_percent,accurate_long_balls_percent,ground_duels_won_percent,aerial_duels_won_percent,successful_dribbles_percent,tackles_won_percent,start_min,finish_min,team_goals_conceded,penalties_scored,penalties_missed,finished,chance_of_playing_next_round,chance_of_playing_this_round,now_cost,now_cost_rank,now_cost_rank_type,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,selected_by_percent,selected_rank,selected_rank_type,total_points,event_points,points_per_game,points_per_game_rank,points_per_game_rank_type,bonus,bps,form,form_rank,form_rank_type,value_form,value_season,dreamteam_count,transfers_in,transfers_in_event,transfers_out,transfers_out_event,ep_next,ep_this,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,expected_goals_per_90,expected_assists_per_90,expected_goal_involvements_per_90,expected_goals_conceded_per_90,influence,influence_rank,influence_rank_type,creativity,creativity_rank,creativity_rank_type,threat,threat_rank,threat_rank_type,ict_index,ict_index_rank,ict_index_rank_type,corners_and_indirect_freekicks_order,direct_freekicks_order,penalties_order,set_piece_threat
11614,2025-2026,a,Raya,Goalkeeper,2025-08-17 15:30:00,1,1,90,0,0,0,0.0,0.0,0.0,0,0,0,0,47,13,0.0,7,0,5,0,0,13,0,1,0,0,1,0,0,1,0,0,7,0,0.75,0.75,0,13,5,0,2,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,90,0,0,0,True,,,6.0,89,1.0,1,-1.0,5,-5.0,36.9,5,1.0,10,10,4.4,56.0,4.0,3,38,3.2,116.0,8.0,0.5,11.0,0.0,3072169,418466,1125554,56691,4.2,4.2,0.0,0.0,0.0,1.52,0.0,0.0,0.0,0.64,49.2,117.0,16.0,0.0,364.0,3.0,0.0,749.0,88.0,4.9,252.0,16.0,,,,
11875,2025-2026,a,Raya,Goalkeeper,2025-08-23 16:30:00,2,1,90,0,0,0,0.0,0.0,0.0,0,0,0,0,34,28,0.0,4,0,6,0,0,3,0,0,0,0,0,0,0,0,0,0,1,0,0.14,0.14,0,28,6,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,90,0,0,0,True,0.0,0.0,5.5,130,,0,,0,,21.5,18,,16,6,,,,3,66,8.0,,,1.5,2.9,,354674,77335,120510,61830,7.5,9.0,0.0,0.0,0.0,1.69,,,,,62.6,,,0.0,,,0.0,,,6.2,,,,,,
12366,2025-2026,a,Raya,Goalkeeper,2025-08-31 15:30:00,3,1,90,0,0,0,0.0,0.02,0.0,0,0,0,0,55,29,1.0,18,0,15,0,0,12,0,0,0,0,0,0,0,0,0,0,2,1,0.27,-0.73,1,29,15,0,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,90,1,0,0,True,0.0,0.0,5.5,132,,0,,0,,21.8,17,,18,2,,,,3,78,6.0,,,1.1,3.3,,462908,38830,235204,17474,6.5,5.5,0.0,0.02,0.02,2.21,,,,,82.6,,,10.0,,,0.0,,,9.2,,,,,,
12854,2025-2026,a,Raya,Goalkeeper,2025-09-13 11:30:00,4,1,90,0,0,0,0.0,0.0,0.0,0,0,0,0,34,25,0.0,5,0,3,0,0,9,0,0,0,0,0,0,0,0,0,0,1,0,0.06,0.06,0,25,3,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,90,0,0,0,True,0.0,0.0,5.5,137,,0,,0,,23.5,14,,24,6,,,,3,102,6.0,,,1.1,4.4,,746262,33143,363824,28342,6.0,6.5,0.0,0.02,0.02,2.41,,,,,95.4,,,10.0,,,0.0,,,10.5,,,,,,
13454,2025-2026,a,Raya,Goalkeeper,2025-09-21 15:30:00,5,1,90,0,0,0,0.0,0.01,0.0,0,0,0,0,39,25,0.0,10,0,7,0,0,6,0,1,0,0,1,0,1,0,1,0,2,1,1.11,0.11,0,25,7,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,90,1,0,0,True,0.0,0.0,5.5,138,,0,,0,,24.0,15,,26,2,,,,3,115,4.0,,,0.7,4.7,,907108,95889,475777,32409,4.0,4.0,0.0,0.03,0.03,3.3,,,,,116.8,,,10.0,,,0.0,,,12.6,,,,,,


In [None]:
TOKEN = userdata.get('GITHUB_TOKEN')
USER = "ruus77"
REPO = "FPL-Points-Predictions"

!git clone https://{TOKEN}@github.com/{USER}/{REPO}.git
%cd {REPO}

filename = 'FPL_data_24_26.csv'
data.to_csv(filename, index=False)

!git config --global user.email "russamuel2004@email.com"
!git config --global user.name "{USER}"
!git add {filename}
!git commit -m "Update CSV"
!git push origin main

Cloning into 'FPL-Points-Predictions'...
remote: Enumerating objects: 518, done.[K
remote: Counting objects: 100% (518/518), done.[K
remote: Compressing objects: 100% (153/153), done.[K
remote: Total 518 (delta 284), reused 518 (delta 284), pack-reused 0 (from 0)[K
Receiving objects: 100% (518/518), 10.36 MiB | 10.45 MiB/s, done.
Resolving deltas: 100% (284/284), done.
/content/FPL-Points-Predictions
On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean
Everything up-to-date
