#### 1. Reading one .csv file from the website

In [None]:
import pandas as pd

# URL structure and parameters 
URL = 'https://www.football-data.co.uk/mmz4281/2122/E0.csv'
root = 'https://www.football-data.co.uk/mmz4281/'
leagues = ['E0', 'E1', 'E2', 'E3', 'EC'] # create a list of leagues: Premier League + lower divisions + Championship
frames = [] # initialize an empty list to collect intermediate results from the loop

# loop through leagues, read multiple .csv, append into a list
for league in leagues:
    df = pd.read_csv(root + '2122' + '/' + league + '.csv', encoding='unicode_escape')
    frames.append(df)

# check the length of frames list. Expected result: 5 (1 season x 5 leagues)
len(frames)



#### 2. Reading .csv files from multiple URLs|

In [None]:
# ====================================================================
# This script downloads all .csv files containing data for every 
# Premier League match from 2015 to 2022 seasons
# ====================================================================
for league in leagues:
    for season in range(15, 21):
        df = pd.read_csv(root + (str(season) + str(season+1)) + '/' + league + '.csv', encoding='unicode_escape')
        df.insert(1, 'season', season)
        frames.append(df)

# check the length of frames list. Expected result: 30 (6 seasons x 5 leagues)
len(frames)
# show the first element
frames[0]

Unnamed: 0,Div,season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,BbAv<2.5,BbAH,BbAHh,BbMxAHH,BbAvAHH,BbMxAHA,BbAvAHA,PSCH,PSCD,PSCA
0,E0,15,08/08/2015,Bournemouth,Aston Villa,0,1,A,0,0,...,1.79,26,-0.50,1.98,1.93,1.99,1.92,1.82,3.88,4.70
1,E0,15,08/08/2015,Chelsea,Swansea,2,2,D,2,1,...,1.99,27,-1.50,2.24,2.16,1.80,1.73,1.37,5.04,10.88
2,E0,15,08/08/2015,Everton,Watford,2,2,D,0,1,...,1.96,26,-1.00,2.28,2.18,1.76,1.71,1.75,3.76,5.44
3,E0,15,08/08/2015,Leicester,Sunderland,4,2,H,3,0,...,1.67,26,-0.50,2.00,1.95,1.96,1.90,1.79,3.74,5.10
4,E0,15,08/08/2015,Man United,Tottenham,1,0,H,1,0,...,2.01,26,-1.00,2.20,2.09,1.82,1.78,1.64,4.07,6.04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,E0,15,15/05/2016,Stoke,West Ham,2,1,H,0,1,...,2.27,30,0.25,2.11,2.03,1.87,1.83,3.05,4.00,2.26
376,E0,15,15/05/2016,Swansea,Man City,1,1,D,1,1,...,2.45,31,1.00,2.14,2.05,1.85,1.81,7.05,5.00,1.47
377,E0,15,15/05/2016,Watford,Sunderland,2,2,D,0,1,...,2.10,29,-0.50,2.06,2.01,1.91,1.85,1.64,4.52,5.27
378,E0,15,15/05/2016,West Brom,Liverpool,1,1,D,1,1,...,2.09,30,-0.25,2.10,2.05,1.86,1.81,2.61,3.75,2.70


#### 3. Organise all the data into a dictionary

In [12]:
# create a dictionary with the original name of a league as a key
dict_countries = {
    'Spanish La Liga':'SP1',
    'Spanish Segunda Division':'SP2',
    'German Bundesliga':'D1',
    'English Premier League':'E0',
    'English League 1': 'E2'
}

dict_historical_data = {}

# looping through the key elements
for league in dict_countries:
    frames = []
    for season in range(15, 21):
        df = pd.read_csv(root + (str(season) + str(season+1)) + '/' + (dict_countries[league]) + '.csv', encoding='unicode_escape')
        df.insert(1, 'season', season)
        frames.append(df)
    df_concat = pd.concat(frames)
    dict_historical_data[league] = df_concat

dict_historical_data['English Premier League']

Unnamed: 0,Div,season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,AvgC<2.5,AHCh,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA
0,E0,15,08/08/2015,Bournemouth,Aston Villa,0,1,A,0,0,...,,,,,,,,,,
1,E0,15,08/08/2015,Chelsea,Swansea,2,2,D,2,1,...,,,,,,,,,,
2,E0,15,08/08/2015,Everton,Watford,2,2,D,0,1,...,,,,,,,,,,
3,E0,15,08/08/2015,Leicester,Sunderland,4,2,H,3,0,...,,,,,,,,,,
4,E0,15,08/08/2015,Man United,Tottenham,1,0,H,1,0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,E0,20,23/05/2021,Liverpool,Crystal Palace,2,0,H,1,0,...,3.49,-2.25,1.86,2.04,1.88,2.03,1.98,2.14,1.88,2.00
376,E0,20,23/05/2021,Man City,Everton,5,0,H,2,0,...,2.77,-1.75,2.01,1.89,1.99,1.89,2.20,2.00,2.03,1.85
377,E0,20,23/05/2021,Sheffield United,Burnley,1,0,H,1,0,...,2.05,0.00,2.04,1.86,2.05,1.86,2.17,1.90,2.03,1.84
378,E0,20,23/05/2021,West Ham,Southampton,3,0,H,2,0,...,2.14,-0.75,2.00,1.90,2.02,1.91,2.06,2.01,1.99,1.89
