In [18]:
#importing packages

import os 
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt

In [412]:
# unpack single season data
def unpack_season(year_fp,current, teams = None):
    #dictionaries to store gameweek details and player details 
    gw_data = {}
    players = {}
    
    #dictionary to store season data 
    data = {}
    
    #generating file paths for the data
    players_raw_fp = os.path.join(year_fp, 'players_raw.csv')
    cleaned_players_fp = os.path.join(year_fp,'cleaned_players.csv')
    gw_data_fp = os.path.join(year_fp, 'gws')
    players_fp = os.path.join(year_fp, 'players')
    
    season = 'season-' + year_fp.split('/')[1][2:].replace('-','') + '.csv'
    season_fp = os.path.join(year_fp, season)
    
    #reading in dataframes and storing in the data dictionary 
    players_raw = pd.read_csv(players_raw_fp)
    data['players_raw'] = players_raw
    
    #differing data files between the current season and past seasons 
    if year_fp.split('/')[1] != current:
        current_season = pd.read_csv(season_fp)
        data['current_season'] = current_season
        
        #no gw data at the start of the current season 
        for file in os.listdir(gw_data_fp):
            filename = os.fsdecode(file)
            key_val = filename.split('.')[0]
            gw_fp = os.path.join(gw_data_fp,filename)
            try:
                gw_data[key_val] = pd.read_csv(gw_fp, encoding = "ISO-8859-1")
            except:
                continue
                
        data['gw_data'] = gw_data
    else:
        fixtures_fp = os.path.join('data', current, 'fixtures.csv')
        data['fixtures'] = pd.read_csv(fixtures_fp)
        
        teams_fp = os.path.join('data', current, 'teams.csv')
        data['teams'] = pd.read_csv(teams_fp)
                
        
    #looping through all players and getting their gameweek and past season history
    for file in os.listdir(players_fp):
        player_gw_fp = os.path.join(players_fp, file, 'gw.csv')
        player_history_fp = os.path.join(players_fp,file,'history.csv')
        players[file] = {}
        try:
            # we dont have gameweek data at the start of a season 
            player_gw = pd.read_csv(player_gw_fp)
            players[file]['player_gw'] = player_gw
        except:
            pass
        try:
            # new players in the league do not have any history.csv file 
            player_history = pd.read_csv(player_history_fp)
            players[file]['players_history'] = player_history 
        except:
            pass
    data['players'] = players
    
    #we only want players that are playing the current season
    if isinstance(teams, type(None)):
        get_squads(data,data['teams'])
    else:
        get_squads(data,teams)
    
    print(data.keys())
    print()
    
    
    return data

In [413]:
def get_squads(season,teams):
    players_raw = season['players_raw']
    #creating a unique id which corresponds to the key of the players dictionary 
    players_raw['player_name_code'] = players_raw['first_name'] + '_' + players_raw['second_name'] + '_' + players_raw['id'].map(str)
    #refer to players by their webname 
    players_raw['name'] = players_raw['web_name']
    #getting the team names for each player 
    players_raw = players_raw.merge(teams[['name','code']],left_on = 'team_code', right_on = 'code', how = 'left')
    players_raw['team'] = players_raw['name_y']
    players_raw['name'] = players_raw['name_x']
    #dropping unnecessary columns
    players_raw = players_raw.drop(['photo','first_name','second_name','web_name','code_x','code_y','name_y','name_x','team_code'], axis = 1)
    #dropping players that have left the league 
    players_raw = players_raw.drop(players_raw.loc[players_raw.status== 'n'].index, axis = 0)
    #display(players_raw)
    squads = {}
    #looping through the teams in the current season
    for team in teams.name.values:
        #adding each teams squads to a dictionary with key of the teams name
        squad = players_raw.loc[players_raw['team'] == team]
        squads[team] = squad.copy()
    #no longer need raw player data since we have squad information
    del season['players_raw']
    season['squads'] = squads
    return squads
    
    
    

In [387]:
#loop through all season data and return a dictionary of all the data
def get_all_seasons(current):
    seasons = {}
    print(current)
    current_season_fp = os.path.join('data', current)
    current_season = unpack_season(current_season_fp, current)
    seasons[current] = current_season
    teams = seasons[current]['teams']
    for file in os.listdir('data'):
        if file == current:
            continue
        print(file)
        season_fp = os.path.join('data', file)
        seasons[file] = unpack_season(season_fp, current, teams)
            
    return seasons 

In [388]:
seasons_data = get_all_seasons('2019-20')

2019-20
dict_keys(['fixtures', 'teams', 'players', 'squads'])

2017-18
dict_keys(['current_season', 'gw_data', 'players', 'squads'])

2016-17
dict_keys(['current_season', 'gw_data', 'players', 'squads'])

2018-19
dict_keys(['current_season', 'gw_data', 'players', 'squads'])



In [389]:
seasons = seasons_data.copy()