# Premier League & Rights - Data Wrangling v 0.1

In [1]:
import os
import pandas as pd
import numpy as np
import glob

from bs4 import BeautifulSoup
import requests

#pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
%matplotlib inline
# import k-means from clustering stage
from sklearn.cluster import KMeans

print('Libraries imported.')

Libraries imported.


# 1. Import

## 1.1 Define data destinations and sources

In [2]:
# enter the start of the first season
start_year = 1992 
# enter the end of the last season
end_year = 2021 
number_of_seasons = end_year - start_year

### 1.1.1 Define the seasons

In [3]:
end_year

2021

In [4]:
number_of_seasons

29

In [5]:
# automatically create table with seasons from start_year to end_year
seasons_df = pd.DataFrame({'seasonStart': range (start_year,end_year),'seasonEnd': range (start_year+1,end_year+1)})

In [6]:
# check the head of the seasons_df dataframe
seasons_df.head()

Unnamed: 0,seasonStart,seasonEnd
0,1992,1993
1,1993,1994
2,1994,1995
3,1995,1996
4,1996,1997


In [7]:
# check the foot of the seasons_df dataframe
seasons_df.tail()

Unnamed: 0,seasonStart,seasonEnd
24,2016,2017
25,2017,2018
26,2018,2019
27,2019,2020
28,2020,2021


In [8]:
seasons_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29 entries, 0 to 28
Data columns (total 2 columns):
seasonStart    29 non-null int64
seasonEnd      29 non-null int64
dtypes: int64(2)
memory usage: 544.0 bytes


In [9]:
seasons_df['seasonStart']=seasons_df['seasonStart'].astype(str)
seasons_df['seasonEnd']=seasons_df['seasonEnd'].astype(str)
seasons_df['seasonEnd']=seasons_df['seasonEnd'].str[2:4]

seasons_df['targetDataframe'] = 'Premier_League_'+ seasons_df ['seasonStart'] +\
                    '-' + seasons_df ['seasonEnd'] + '_df'
seasons_df['url'] = 'https://en.wikipedia.org/wiki/' + seasons_df['seasonStart'] +'-'\
                + seasons_df['seasonEnd']+'_FA_Premier_League'
seasons_df.head()

Unnamed: 0,seasonStart,seasonEnd,targetDataframe,url
0,1992,93,Premier_League_1992-93_df,https://en.wikipedia.org/wiki/1992-93_FA_Premi...
1,1993,94,Premier_League_1993-94_df,https://en.wikipedia.org/wiki/1993-94_FA_Premi...
2,1994,95,Premier_League_1994-95_df,https://en.wikipedia.org/wiki/1994-95_FA_Premi...
3,1995,96,Premier_League_1995-96_df,https://en.wikipedia.org/wiki/1995-96_FA_Premi...
4,1996,97,Premier_League_1996-97_df,https://en.wikipedia.org/wiki/1996-97_FA_Premi...


In [10]:
# manual data wrangling

seasons_df.iloc[22,3] = 'https://en.wikipedia.org/wiki/2014-15_Premier_League'
seasons_df.iloc[23,3] = 'https://en.wikipedia.org/wiki/2015-16_Premier_League'
seasons_df.iloc[24,3] = 'https://en.wikipedia.org/wiki/2016-17_Premier_League'
seasons_df.iloc[25,3] = 'https://en.wikipedia.org/wiki/2017-18_Premier_League'
seasons_df.iloc[26,3] = 'https://en.wikipedia.org/wiki/2018-19_Premier_League'
seasons_df.iloc[27,3] = 'https://en.wikipedia.org/wiki/2019-20_Premier_League'
seasons_df.iloc[28,3] = 'https://en.wikipedia.org/wiki/2020-21_Premier_League'
seasons_df

Unnamed: 0,seasonStart,seasonEnd,targetDataframe,url
0,1992,93,Premier_League_1992-93_df,https://en.wikipedia.org/wiki/1992-93_FA_Premi...
1,1993,94,Premier_League_1993-94_df,https://en.wikipedia.org/wiki/1993-94_FA_Premi...
2,1994,95,Premier_League_1994-95_df,https://en.wikipedia.org/wiki/1994-95_FA_Premi...
3,1995,96,Premier_League_1995-96_df,https://en.wikipedia.org/wiki/1995-96_FA_Premi...
4,1996,97,Premier_League_1996-97_df,https://en.wikipedia.org/wiki/1996-97_FA_Premi...
5,1997,98,Premier_League_1997-98_df,https://en.wikipedia.org/wiki/1997-98_FA_Premi...
6,1998,99,Premier_League_1998-99_df,https://en.wikipedia.org/wiki/1998-99_FA_Premi...
7,1999,0,Premier_League_1999-00_df,https://en.wikipedia.org/wiki/1999-00_FA_Premi...
8,2000,1,Premier_League_2000-01_df,https://en.wikipedia.org/wiki/2000-01_FA_Premi...
9,2001,2,Premier_League_2001-02_df,https://en.wikipedia.org/wiki/2001-02_FA_Premi...


In [11]:
seasons_df.drop(seasons_df.columns [0:2], axis = 1, inplace = True)

In [12]:
seasons_df.head()

Unnamed: 0,targetDataframe,url
0,Premier_League_1992-93_df,https://en.wikipedia.org/wiki/1992-93_FA_Premi...
1,Premier_League_1993-94_df,https://en.wikipedia.org/wiki/1993-94_FA_Premi...
2,Premier_League_1994-95_df,https://en.wikipedia.org/wiki/1994-95_FA_Premi...
3,Premier_League_1995-96_df,https://en.wikipedia.org/wiki/1995-96_FA_Premi...
4,Premier_League_1996-97_df,https://en.wikipedia.org/wiki/1996-97_FA_Premi...


### 1.1.2 Import the tables and data wrangling


In [13]:
# manual data wrangling
# prem_league_df_1 frame 4 seasons 1:6
start_season = 1
end_season =  6
frame = 4

range_start = start_season - 1
range_end = end_season

prem_league_df_1 = []
for i in range (range_start, range_end):
    url = seasons_df.iloc[i,1]
    print (url)
    temp_df = pd.read_html(url, header=0)[frame]
    temp_df['Season Start'] = i + 1992
    prem_league_df_1.append(temp_df)
prem_league_df_1 = pd.concat (prem_league_df_1)

prem_league_df_1.drop(prem_league_df_1.columns [10:11], axis = 1, inplace = True)
prem_league_df_1.df = prem_league_df_1.reset_index(inplace =True, drop = True)
prem_league_df_1.tail(28)

https://en.wikipedia.org/wiki/1992-93_FA_Premier_League
https://en.wikipedia.org/wiki/1993-94_FA_Premier_League
https://en.wikipedia.org/wiki/1994-95_FA_Premier_League
https://en.wikipedia.org/wiki/1995-96_FA_Premier_League
https://en.wikipedia.org/wiki/1996-97_FA_Premier_League
https://en.wikipedia.org/wiki/1997-98_FA_Premier_League


Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts,Season Start
100,15,Everton,38.0,10.0,12.0,16.0,44.0,57.0,−13,42,1996
101,16,Southampton,38.0,10.0,11.0,17.0,50.0,56.0,−6,41,1996
102,17,Coventry City,38.0,9.0,14.0,15.0,38.0,54.0,−16,41,1996
103,18,Sunderland (R),38.0,10.0,10.0,18.0,35.0,53.0,−18,40,1996
104,19,Middlesbrough (R),38.0,10.0,12.0,16.0,51.0,60.0,−9,39[d],1996
105,20,Nottingham Forest (R),38.0,6.0,16.0,16.0,31.0,59.0,−28,34,1996
106,1,Arsenal (C),38.0,23.0,9.0,6.0,68.0,33.0,+35,78,1997
107,2,Manchester United,38.0,23.0,8.0,7.0,73.0,26.0,+47,77,1997
108,3,Liverpool,38.0,18.0,11.0,9.0,68.0,42.0,+26,65,1997
109,4,Chelsea,38.0,20.0,3.0,15.0,71.0,43.0,+28,63,1997


In [14]:
prem_league_df_1.tail(24)

Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts,Season Start
104,19,Middlesbrough (R),38.0,10.0,12.0,16.0,51.0,60.0,−9,39[d],1996
105,20,Nottingham Forest (R),38.0,6.0,16.0,16.0,31.0,59.0,−28,34,1996
106,1,Arsenal (C),38.0,23.0,9.0,6.0,68.0,33.0,+35,78,1997
107,2,Manchester United,38.0,23.0,8.0,7.0,73.0,26.0,+47,77,1997
108,3,Liverpool,38.0,18.0,11.0,9.0,68.0,42.0,+26,65,1997
109,4,Chelsea,38.0,20.0,3.0,15.0,71.0,43.0,+28,63,1997
110,5,Leeds United,38.0,17.0,8.0,13.0,57.0,46.0,+11,59,1997
111,6,Blackburn Rovers,38.0,16.0,10.0,12.0,57.0,52.0,+5,58,1997
112,7,Aston Villa,38.0,17.0,6.0,15.0,49.0,48.0,+1,57,1997
113,8,West Ham United,38.0,16.0,8.0,14.0,56.0,57.0,−1,56,1997


In [15]:
prem_league_df_1.iloc[104,9]= 39 # Middlesborough 9 point deduction
prem_league_df_1 = prem_league_df_1.drop([127], axis=0)
prem_league_df_1 = prem_league_df_1.drop([126], axis=0)
prem_league_df_1.tail(22)

Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts,Season Start
104,19,Middlesbrough (R),38.0,10.0,12.0,16.0,51.0,60.0,−9,39,1996
105,20,Nottingham Forest (R),38.0,6.0,16.0,16.0,31.0,59.0,−28,34,1996
106,1,Arsenal (C),38.0,23.0,9.0,6.0,68.0,33.0,+35,78,1997
107,2,Manchester United,38.0,23.0,8.0,7.0,73.0,26.0,+47,77,1997
108,3,Liverpool,38.0,18.0,11.0,9.0,68.0,42.0,+26,65,1997
109,4,Chelsea,38.0,20.0,3.0,15.0,71.0,43.0,+28,63,1997
110,5,Leeds United,38.0,17.0,8.0,13.0,57.0,46.0,+11,59,1997
111,6,Blackburn Rovers,38.0,16.0,10.0,12.0,57.0,52.0,+5,58,1997
112,7,Aston Villa,38.0,17.0,6.0,15.0,49.0,48.0,+1,57,1997
113,8,West Ham United,38.0,16.0,8.0,14.0,56.0,57.0,−1,56,1997


In [16]:
# manual data wrangling
# prem_league_df_2 frame 5 seasons 7:10
start_season = 7
end_season =  10
frame = 5

range_start = start_season - 1
range_end = end_season

prem_league_df_2 = []
for i in range (range_start, range_end):
    url = seasons_df.iloc[i,1]
    print (url)
    temp_df = pd.read_html(url, header=0)[frame]
    temp_df['Season Start'] = i + 1992
    prem_league_df_2.append(temp_df)
prem_league_df_2 = pd.concat (prem_league_df_2)

prem_league_df_2.drop(prem_league_df_2.columns [10:11], axis = 1, inplace = True)
prem_league_df_2.df = prem_league_df_2.reset_index(inplace =True, drop = True)
prem_league_df_2.tail()

https://en.wikipedia.org/wiki/1998-99_FA_Premier_League
https://en.wikipedia.org/wiki/1999-00_FA_Premier_League
https://en.wikipedia.org/wiki/2000-01_FA_Premier_League
https://en.wikipedia.org/wiki/2001-02_FA_Premier_League


Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts,Season Start
77,18,Ipswich Town (R),38.0,9.0,9.0,20.0,41.0,64.0,−23,36.0,2001
78,Qualification for the UEFA Cup qualifying roun...,,,,,,,,,,2001
79,Relegation to the Football League First Division,,,,,,,,,,2001
80,19,Derby County (R),38.0,8.0,6.0,24.0,33.0,63.0,−30,30.0,2001
81,20,Leicester City (R),38.0,5.0,13.0,20.0,30.0,64.0,−34,28.0,2001


In [17]:
prem_league_df_2 = prem_league_df_2.drop([79], axis=0)
prem_league_df_2 = prem_league_df_2.drop([78], axis=0)
prem_league_df_2.tail()

Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts,Season Start
75,16,Bolton Wanderers,38.0,9.0,13.0,16.0,44.0,62.0,−18,40.0,2001
76,17,Sunderland,38.0,10.0,10.0,18.0,29.0,51.0,−22,40.0,2001
77,18,Ipswich Town (R),38.0,9.0,9.0,20.0,41.0,64.0,−23,36.0,2001
80,19,Derby County (R),38.0,8.0,6.0,24.0,33.0,63.0,−30,30.0,2001
81,20,Leicester City (R),38.0,5.0,13.0,20.0,30.0,64.0,−34,28.0,2001


In [18]:
# manual data wrangling
# prem_league_df_3 frame 4 season 11
start_season = 11
end_season =  11
frame = 4

range_start = start_season - 1
range_end = end_season

prem_league_df_3 = []
for i in range (range_start, range_end):
    url = seasons_df.iloc[i,1]
    print (url)
    temp_df = pd.read_html(url, header=0)[frame]
    temp_df['Season Start'] = i + 1992
    prem_league_df_3.append(temp_df)
prem_league_df_3 = pd.concat (prem_league_df_3)

prem_league_df_3.drop(prem_league_df_3.columns [10:11], axis = 1, inplace = True)
prem_league_df_3.head()

https://en.wikipedia.org/wiki/2002-03_FA_Premier_League


Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts,Season Start
0,1,Manchester United (C),38,25,8,5,74,34,40,83,2002
1,2,Arsenal,38,23,9,6,85,42,43,78,2002
2,3,Newcastle United,38,21,6,11,63,48,15,69,2002
3,4,Chelsea,38,19,10,9,68,38,30,67,2002
4,5,Liverpool,38,18,10,10,61,41,20,64,2002


In [19]:
# manual data wrangling
# prem_league_df_4 frame 7 season 11
start_season = 12
end_season =  12
frame = 7

range_start = start_season - 1
range_end = end_season

prem_league_df_4 = []
for i in range (range_start, range_end):
    url = seasons_df.iloc[i,1]
    print (url)
    temp_df = pd.read_html(url, header=0)[frame]
    temp_df['Season Start'] = i + 1992
    prem_league_df_4.append(temp_df)
prem_league_df_4 = pd.concat (prem_league_df_4)
prem_league_df_4.head()

prem_league_df_4.drop(prem_league_df_4.columns [10:11], axis = 1, inplace = True)
prem_league_df_4.head()

https://en.wikipedia.org/wiki/2003-04_FA_Premier_League


Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts,Season Start
0,1,Arsenal (C),38,26,12,0,73,26,47,90,2003
1,2,Chelsea,38,24,7,7,67,30,37,79,2003
2,3,Manchester United,38,23,6,9,64,35,29,75,2003
3,4,Liverpool,38,16,12,10,55,37,18,60,2003
4,5,Newcastle United,38,13,17,8,52,40,12,56,2003


In [20]:
# manual data wrangling
# prem_league_df_5 frame 4 season 11
start_season = 13
end_season =  13
frame = 4

range_start = start_season - 1
range_end = end_season

prem_league_df_5 = []
for i in range (range_start, range_end):
    url = seasons_df.iloc[i,1]
    print (url)
    temp_df = pd.read_html(url, header=0)[frame]
    temp_df['Season Start'] = i + 1992
    prem_league_df_5.append(temp_df)
prem_league_df_5 = pd.concat (prem_league_df_5)

prem_league_df_5.drop(prem_league_df_5.columns [10:11], axis = 1, inplace = True)
prem_league_df_5.head()

https://en.wikipedia.org/wiki/2004-05_FA_Premier_League


Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts,Season Start
0,1,Chelsea (C),38,29,8,1,72,15,+57,95,2004
1,2,Arsenal,38,25,8,5,87,36,+51,83,2004
2,3,Manchester United,38,22,11,5,58,26,+32,77,2004
3,4,Everton,38,18,7,13,45,46,−1,61,2004
4,5,Liverpool,38,17,7,14,52,41,+11,58,2004


In [21]:
# manual data wrangling
# prem_league_df_6 frame 5 season 11
start_season = 14
end_season =  14
frame = 5

range_start = start_season - 1
range_end = end_season

prem_league_df_6 = []
for i in range (range_start, range_end):
    url = seasons_df.iloc[i,1]
    print (url)
    temp_df = pd.read_html(url, header=0)[frame]
    temp_df['Season Start'] = i + 1992
    prem_league_df_6.append(temp_df)
prem_league_df_6 = pd.concat (prem_league_df_6)
prem_league_df_6.head()

prem_league_df_6.drop(prem_league_df_6.columns [10:11], axis = 1, inplace = True)
prem_league_df_6.head()

https://en.wikipedia.org/wiki/2005-06_FA_Premier_League


Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts,Season Start
0,1,Chelsea (C),38,29,4,5,72,22,50,91,2005
1,2,Manchester United,38,25,8,5,72,34,38,83,2005
2,3,Liverpool,38,25,7,6,57,25,32,82,2005
3,4,Arsenal,38,20,7,11,68,31,37,67,2005
4,5,Tottenham Hotspur,38,18,11,9,53,38,15,65,2005


In [22]:
# manual data wrangling
# prem_league_df_7 frame 4 season 11
start_season = 15
end_season =  27
frame = 4

range_start = start_season - 1
range_end = end_season

prem_league_df_7 = []
for i in range (range_start, range_end):
    url = seasons_df.iloc[i,1]
    print (url)
    temp_df = pd.read_html(url, header=0)[frame]
    temp_df['Season Start'] = i + 1992
    prem_league_df_7.append(temp_df)
prem_league_df_7 = pd.concat (prem_league_df_7)

prem_league_df_7.drop(prem_league_df_7.columns [10:11], axis = 1, inplace = True)
prem_league_df_7.df = prem_league_df_7.reset_index(inplace =True, drop = True)

prem_league_df_7.tail()

https://en.wikipedia.org/wiki/2006-07_FA_Premier_League
https://en.wikipedia.org/wiki/2007-08_FA_Premier_League
https://en.wikipedia.org/wiki/2008-09_FA_Premier_League
https://en.wikipedia.org/wiki/2009-10_FA_Premier_League
https://en.wikipedia.org/wiki/2010-11_FA_Premier_League
https://en.wikipedia.org/wiki/2011-12_FA_Premier_League
https://en.wikipedia.org/wiki/2012-13_FA_Premier_League
https://en.wikipedia.org/wiki/2013-14_FA_Premier_League
https://en.wikipedia.org/wiki/2014-15_Premier_League
https://en.wikipedia.org/wiki/2015-16_Premier_League
https://en.wikipedia.org/wiki/2016-17_Premier_League
https://en.wikipedia.org/wiki/2017-18_Premier_League
https://en.wikipedia.org/wiki/2018-19_Premier_League


Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts,Season Start
259,16,Southampton,38.0,9.0,12.0,17.0,45.0,65.0,−20,39,2018
260,17,Brighton & Hove Albion,38.0,9.0,9.0,20.0,35.0,60.0,−25,36,2018
261,18,Cardiff City (R),38.0,10.0,4.0,24.0,34.0,69.0,−35,34,2018
262,19,Fulham (R),38.0,7.0,5.0,26.0,34.0,81.0,−47,26,2018
263,20,Huddersfield Town (R),38.0,3.0,7.0,28.0,22.0,76.0,−54,16,2018


In [23]:
prem_league_df_7.head()

Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts,Season Start
0,1,Manchester United (C),38.0,28.0,5.0,5.0,83.0,27.0,56,89,2006
1,2,Chelsea,38.0,24.0,11.0,3.0,64.0,24.0,40,83,2006
2,3,Liverpool,38.0,20.0,8.0,10.0,57.0,27.0,30,68,2006
3,4,Arsenal,38.0,19.0,11.0,8.0,63.0,35.0,28,68,2006
4,5,Tottenham Hotspur,38.0,17.0,9.0,12.0,57.0,54.0,3,60,2006


In [24]:
prem_league_df_7.iloc[79,9]= 19 # Portmouth 9 point deduction

In [25]:
prem_league_df_7 = prem_league_df_7.drop([141], axis=0)
prem_league_df_7 = prem_league_df_7.drop([140], axis=0)
prem_league_df_7 = prem_league_df_7.drop([99], axis=0)
prem_league_df_7 = prem_league_df_7.drop([98], axis=0)
prem_league_df_7.head()

Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts,Season Start
0,1,Manchester United (C),38.0,28.0,5.0,5.0,83.0,27.0,56,89,2006
1,2,Chelsea,38.0,24.0,11.0,3.0,64.0,24.0,40,83,2006
2,3,Liverpool,38.0,20.0,8.0,10.0,57.0,27.0,30,68,2006
3,4,Arsenal,38.0,19.0,11.0,8.0,63.0,35.0,28,68,2006
4,5,Tottenham Hotspur,38.0,17.0,9.0,12.0,57.0,54.0,3,60,2006


In [26]:
# manual data wrangling
# prem_league_df_8 frame 4 season 11, also need to rename column 1 to "Team"
start_season = 28
end_season =  28
frame = 4

range_start = start_season - 1
range_end = end_season

prem_league_df_8 = []
for i in range (range_start, range_end):
    url = seasons_df.iloc[i,1]
    print (url)
    temp_df = pd.read_html(url, header=0)[frame]
    temp_df['Season Start'] = i + 1992
    prem_league_df_8.append(temp_df)
prem_league_df_8 = pd.concat (prem_league_df_8)

prem_league_df_8.drop(prem_league_df_8.columns [10:11], axis = 1, inplace = True)

#rename column 1
prem_league_df_8.rename(columns={ prem_league_df_8.columns[1]: 'Team' }, inplace = True)  
prem_league_df_8.head()

https://en.wikipedia.org/wiki/2019-20_Premier_League


Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts,Season Start
0,1,Liverpool (C),38,32,3,3,85,33,52,99,2019
1,2,Manchester City,38,26,3,9,102,35,67,81,2019
2,3,Manchester United,38,18,12,8,66,36,30,66,2019
3,4,Chelsea,38,20,6,12,69,54,15,66,2019
4,5,Leicester City,38,18,8,12,67,41,26,62,2019


In [27]:
# manual data wrangling
# prem_league_df_9 frame 4 season 11
start_season = 29
end_season =  29
frame = 4

range_start = start_season - 1
range_end = end_season

prem_league_df_9 = []
for i in range (range_start, range_end):
    url = seasons_df.iloc[i,1]
    print (url)
    temp_df = pd.read_html(url, header=0)[frame]
    temp_df['Season Start'] = i + 1992
    prem_league_df_9.append(temp_df)
prem_league_df_9 = pd.concat (prem_league_df_9)

prem_league_df_9.drop(prem_league_df_9.columns [10:11], axis = 1, inplace = True)

prem_league_df_9.head()

#rename column 1

https://en.wikipedia.org/wiki/2020-21_Premier_League


Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts,Season Start
0,1,Liverpool,17,9,6,2,37,21,16,33,2020
1,2,Manchester United,16,10,3,3,33,24,9,33,2020
2,3,Leicester City,17,10,2,5,31,21,10,32,2020
3,4,Tottenham Hotspur,16,8,5,3,29,15,14,29,2020
4,5,Manchester City,15,8,5,2,24,13,11,29,2020


### 1.1.3 Create Single Table

In [28]:
prem_league_all_df = pd.concat([prem_league_df_1, prem_league_df_2,prem_league_df_3,prem_league_df_4,\
                               prem_league_df_5,prem_league_df_6,prem_league_df_7,prem_league_df_8,\
                               prem_league_df_9])

prem_league_all_df.reset_index(inplace =True, drop = True)

prem_league_all_df['Team'] = pd.Series(prem_league_all_df['Team']).str.replace(' \(.*\)', '')
prem_league_all_df['Team'] = pd.Series(prem_league_all_df['Team']).str.replace(' \(.*\)', '')

prem_league_all_df['Pld']=prem_league_all_df['Pld'].astype(int)
prem_league_all_df['W']=prem_league_all_df['W'].astype(int)
prem_league_all_df['D']=prem_league_all_df['W'].astype(int)
prem_league_all_df['L']=prem_league_all_df['L'].astype(int)
prem_league_all_df['GF']=prem_league_all_df['GF'].astype(int)
prem_league_all_df['GA']=prem_league_all_df['GA'].astype(int)
#prem_league_all_df['GD']=prem_league_all_df['GD'].astype(int)
prem_league_all_df['Pts']=prem_league_all_df['Pts'].astype(int)

In [29]:
prem_league_all_df.head()

Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts,Season Start
0,1,Manchester United,42,24,24,6,67,31,+36,84,1992
1,2,Aston Villa,42,21,21,10,57,40,+17,74,1992
2,3,Norwich City,42,21,21,12,61,65,−4,72,1992
3,4,Blackburn Rovers,42,20,20,11,68,46,+22,71,1992
4,5,Queens Park Rangers,42,17,17,13,63,55,+8,63,1992


## 1.2 Domestic Rights

In [30]:
# There are eight current tram stops within the central city area defined by the following dataframe
ukDomesticRights_df=pd.DataFrame({'Year':[1992, 1993,1994, 1995, 1996,\
                                          1997, 1998, 1999, 2000,\
                                          2001, 2002, 2003,\
                                          2004, 2005, 2006,\
                                          2007, 2008, 2009,\
                                          2010, 2011, 2012,\
                                          2013, 2014, 2015,\
                                          2016, 2017, 2018,\
                                          2019, 2020],\
                                  'Domestic TV Rights':[60.8, 60.8, 60.8, 60.8, 60.8,\
                                                        167.5, 167.5, 167.5, 167.5,\
                                                        366.7, 366.7, 366.7, \
                                                        341.3, 341.3, 341.3, \
                                                        568.7, 568.7, 568.7, \
                                                        594.0, 594.0, 594.0, \
                                                        1006.0, 1006.0, 1006.0,\
                                                        1707.5, 1707.5, 1707.5,\
                                                        1538.0, 1538.0]})
ukDomesticRights_df

Unnamed: 0,Year,Domestic TV Rights
0,1992,60.8
1,1993,60.8
2,1994,60.8
3,1995,60.8
4,1996,60.8
5,1997,167.5
6,1998,167.5
7,1999,167.5
8,2000,167.5
9,2001,366.7


## 1.3 Premier League plus Domestic Rights

In [31]:
prem_league_rights_df = prem_league_all_df.merge(ukDomesticRights_df, left_on = 'Season Start', right_on = 'Year', how = 'left')

In [32]:
prem_league_rights_df.head()

Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts,Season Start,Year,Domestic TV Rights
0,1,Manchester United,42,24,24,6,67,31,+36,84,1992,1992,60.8
1,2,Aston Villa,42,21,21,10,57,40,+17,74,1992,1992,60.8
2,3,Norwich City,42,21,21,12,61,65,−4,72,1992,1992,60.8
3,4,Blackburn Rovers,42,20,20,11,68,46,+22,71,1992,1992,60.8
4,5,Queens Park Rangers,42,17,17,13,63,55,+8,63,1992,1992,60.8


In [34]:
prem_league_rights_df.to_csv('prem_league_rights_df.csv')

In [None]:
# create list of dataframes
#list_of_dataframes_df =  seasons_df.copy(deep = True)
#list_of_dataframes_df.drop(list_of_dataframes_df.columns [1:], axis = 1, inplace = True)
#list_of_dataframes_df.head()

# create list of data sources (data also at also at https://www.premierleague.com/tables) 
#list_of_data_sources_df = seasons_df.copy(deep = True)
#list_of_data_sources_df.drop(list_of_data_sources_df.columns [0:1], axis = 1, inplace = True)
#list_of_data_sources_df.head()

# create a zip iterator
#list_of_dataframes = list_of_dataframes_df.values.tolist()
#list_of_data_sources = list_of_data_sources_df.values.tolist()
#dataframe_data_sources_zipped = list(zip(list_of_dataframes, list_of_data_sources))

#dataframe_data_sources_indice = [(dataframe, filename) for dataframe, filename in enumerate (dataframe_data_sources_zipped)]

#for dataframe, filename in dataframe_data_sources_indice:
#    dataframe['filename'] = filename