In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Get complete list of teams in premier league for a given year

*   Get complete list of teams in PL for the given year using <https://www.footballsquads.co.uk/eng/YYYY-YYYY/faprem.htm>

* NOTE: year must be 2017 or earlier (bug)




In [None]:
def get_teams(year):

  '''
  returns links to teams in PL that year
  Ex. ['faprem/arsenal.htm','faprem/avilla.htm',faprem/birmin.htm']
  '''

  teams = []

  # Parse the HTML with BeautifulSoup
  url = "https://www.footballsquads.co.uk/eng/{}-{}/faprem.htm".format(year,year+1)
  response = requests.get(url)
  soup = BeautifulSoup(response.content, 'html.parser')

  # Find all h5 elements and extract their links
  for h5 in soup.find_all('h5'):
      a_tag = h5.find('a')
      if a_tag:
          team = a_tag.get('href')
          teams.append(team.split('/')[-1])

  return teams

# Create a list of all players in the premier league for a given year
*   Then, get full roster for given year for each team in PL using <https://www.footballsquads.co.uk/eng/YYYY-YYYY/faprem/TEAM.htm>

In [None]:
def get_roster(year, team_name):
  url = "https://www.footballsquads.co.uk/eng/{}-{}/faprem/{}".format(year,year+1,team_name)
  response = requests.get(url)
  html_content = response.content

  # Parse the HTML with BeautifulSoup
  soup = BeautifulSoup(html_content, 'html.parser')

  # Find all table rows (tr)
  rows = soup.find_all('tr')

  # Extract player names from the second <td> element in each row
  player_names = []
  for row in rows:
      columns = row.find_all('td')
      if len(columns) > 1:
          name = columns[1].get_text(strip=True)  # Get the name from the 2nd column (index 1)
          if (name and name != 'Name') :  # Avoid empty names
              player_names.append(name)

  return player_names

# Get current status of player from Wikipedia

Example 1:
```
# Scrape infobox from player's wikipedia page
infobox = get_infobox_df("Cesc Fabregas")

# review data infobox to determine players status
status = still_playing(infobox)

print(status)
```
```
Player not playing, last played for Como in 2022–2023
```






In [None]:
def wiki_request(player_name):
    # Format the player name to match Wikipedia URLs (spaces replaced with underscores)
    player_name = player_name.replace(' ', '_')

    # Request the player's Wikipedia page
    url = f"https://en.wikipedia.org/wiki/{player_name}"
    response = requests.get(url)

    if response.status_code != 200:
        return False

    else:
        return response


def parse_infobox(response):
    # Parse the HTML content with BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')

    # Locate the infobox table (class "infobox infobox-table vcard")
    infobox = soup.find('table', {'class': 'infobox infobox-table vcard'})

    if infobox:
      # Define lists to store table data
      section_header = None
      rows_data = []

      # Loop through all rows in the infobox
      for row in infobox.find_all('tr'):
          # Check if the row is a section header
          header = row.find('th', {'class': 'infobox-header'})
          if header:
              section_header = header.get_text(strip=True)
              continue  # skip this row because it's just a header

          # Extract data from each row (th and td)
          columns = row.find_all(['th', 'td'])
          row_data = [section_header]  # start row data with the section header

          for col in columns:
              # Clean up the cell text and append to row_data
              row_data.append(col.get_text(strip=True))

          if len(row_data) > 1:
              rows_data.append(row_data)

      # Create a DataFrame with the extracted data
      df = pd.DataFrame(rows_data, columns=["Section", "Attribute", "Value","",""])
      print('Successfully parsed infobox')
      return True, df

    else:
      print("No infobox found")
      return False, None

def get_status(player_name):

  print('getting states for {}'.format(player_name))

  active = "n/a"
  fclub = "n/a"
  fyear = "n/a"

  response = wiki_request(player_name)

  if response:
    parse_success, df = parse_infobox(response)
    if parse_success:
      # attempt to filter infobox data to senior career only
      if "Senior career*" in df['Section'].values:
        sc = df[df['Section']=="Senior career*"]
        latest = sc.iloc[-1]

        # check if final entry in senior career table contains a "total" tally
        if latest['Attribute'] == 'Total':
          active = False
          fclub = sc.iloc[-2].Value
          fyear = sc.iloc[-2].Attribute
          print("Successfully retrieved status; Player not playing, last played for {} in {}".format(fclub, fyear))
        else:
          active = True
          fclub = "n/a"
          fyear = "n/a"
          cclub = sc.iloc[-1].Value
          print("Successfully retrieved status; Player still playing, currently playing for {}".format(cclub))

      else:
        print("ERROR; 'Senior Career' Data not present in inbox box data")
    else:
      print("ERROR; Could not parse infobox")
  else:
    print("ERROR; Could not retrieve Wikipedia page")

  return active, fyear, fclub

# Testing

In [None]:
year = 2011

# get all players
players = []
teams = get_teams(year)
print(teams)
for team in teams:
  players.extend(get_roster(year, team))

print(players)


['arsenal.htm', 'avilla.htm', 'blackbrn.htm', 'bolton.htm', 'chelsea.htm', 'everton.htm', 'fulham.htm', 'liverpool.htm', 'mancity.htm', 'manutd.htm', 'newcas.htm', 'norwich.htm', 'qpr.htm', 'stoke.htm', 'sunder.htm', 'swansea.htm', 'tottenha.htm', 'wba.htm', 'wigan.htm', 'wolves.htm']
['Manuel Almunia', 'Abou Diaby', 'Bacary Sagna', 'Per Mertesacker', 'Thomas Vermaelen', 'Laurent Koscielny', 'Tomáš Rosický', 'Mikel Arteta', 'Park Chu-Young', 'Robin van Persie', 'André Santos', 'Wojciech Szczęsny', 'Theo Walcott', 'Alex Oxlade-Chamberlain', 'Aaron Ramsey', 'Alexandre Song', 'Sébastien Squillaci', 'Jack Wilshere', 'Johan Djourou', 'Łukasz Fabiański', 'Carl Jenkinson', 'Kouassi Gervais "Gervinho" Yao', 'Kieran Gibbs', 'Marouane Chamakh', 'Yossi Benayoun', 'Benik Afobe', 'Zak Ansah', 'George Brislen-Hall', 'Francis Coquelin', 'Craig Eastmond', 'Sead Hajrovic', 'Conor Henderson', 'Gavin Hoyte', 'Sean McDermott', 'Jernade Meade', 'Ignasi Miquel', 'Nigel Neita', 'Oğuzhan Özyakup', 'Nico Yenna

In [None]:
2011_players = ['Manuel Almunia', 'Abou Diaby', 'Bacary Sagna', 'Per Mertesacker', 'Thomas Vermaelen', 'Laurent Koscielny', 'Tomáš Rosický', 'Mikel Arteta', 'Park Chu-Young', 'Robin van Persie', 'André Santos', 'Wojciech Szczęsny', 'Theo Walcott', 'Alex Oxlade-Chamberlain', 'Aaron Ramsey', 'Alexandre Song', 'Sébastien Squillaci', 'Jack Wilshere', 'Johan Djourou', 'Łukasz Fabiański', 'Carl Jenkinson', 'Kouassi Gervais "Gervinho" Yao', 'Kieran Gibbs', 'Marouane Chamakh', 'Yossi Benayoun', 'Benik Afobe', 'Zak Ansah', 'George Brislen-Hall', 'Francis Coquelin', 'Craig Eastmond', 'Sead Hajrovic', 'Conor Henderson', 'Gavin Hoyte', 'Sean McDermott', 'Jernade Meade', 'Ignasi Miquel', 'Nigel Neita', 'Oğuzhan Özyakup', 'Nico Yennaris', 'Martin Angha', 'Kyle Ebecilio', 'Emiliano Martínez', 'Elton Monteiro', 'Josh Rees', 'James Shea', 'Cesc Fàbregas', 'Carlos Vela', 'Samir Nasri', 'Armand Traoré', 'Henri Lansbury', 'Gilles Sunu', 'Nicklas Bendtner', 'Emmanuel Frimpong', 'Vito Mannone', 'Luke Freeman', 'Chuks Aneke', 'Daniel Boateng', 'Sanchez Watt', 'Rhys Murphy', 'Ryo Miyaichi', 'Thierry Henry', 'Andrey Arshavin', 'Shay Given', 'Alan Hutton', 'Stephen Warnock', 'Richard Dunne', 'James Collins', 'Stephen Ireland', 'Jermaine Jenas', 'Darren Bent', "Charles N'Zogbia", 'Gabriel Agbonlahor', 'Marc Albrighton', 'Nathan Delfouneso', 'Fabian Delph', 'Emile Heskey', 'Stiliyan Petrov', 'Ciaran Clark', 'Brad Guzan', 'Carlos Cuéllar', 'Barry Bannan', 'Andreas Weimann', 'Enda Stevens', 'Eric Lichaj', 'Chris Herd', 'Nathan Baker', 'Andy Marshall', 'Daniel Johnson', 'Graham Burke', 'Derrick Williams', 'Gary Gardner', 'Samir Carruthers', 'Jack Grealish', 'Luke Young', 'Jonathan Hogg', 'Jean II Makoun', 'Elliott Parish', 'Shane Lowry', 'Habib Beye', 'Robbie Keane', 'Paul Robinson', 'Michel Salgado', 'Martin Olsson', 'Gaël Givet', 'David Dunn', 'Anthony Modeste', 'Mauro Formica', 'Vince Grella', 'Morten Gamst Pedersen', 'Mark Bunn', 'Radosav Petrović', "Steven N'Zonzi", 'Scott Dann', 'Bradley Orr', 'Jordan Slew', 'Rubén Rochina', 'Marcus Olsson', 'Nick Blackman', 'Junior Hoilett', 'Yakubu Aiyegbeni', 'David Goodwillie', 'Hérold Goulon', 'Simon Vukčević', "Anthony O'Connor", 'Grant Hanley', 'Bruno Ribeiro', 'Josh Morris', 'Jake Kean', 'Jason Lowe', 'Amine Linganzi', 'Zac Aley', 'Myles Anderson', 'Adam Henley', 'Robbie Cotton', 'Raheem Hanley', 'Sebastian Usai', 'Brett Emerton', 'Jason Roberts', 'Keith Andrews', 'Ryan Nelsen', 'Christopher Samba', 'Ádám Bogdán', 'Grétar Steinsson', 'Marcos Alonso', 'Paul Robinson', 'Fabrice Muamba', 'Chris Eagles', 'Stuart Holden', 'Tuncay Şanlı', 'Martin Petrov', 'Ricardo Gardner', 'Zat Knight', 'Kevin Davies', 'Tyrone Mears', 'Mark Davies', 'Ivan Klasnić', 'Sam Ricketts', 'Nigel Reo-Coker', 'Robbie Blake', 'Darren Pratley', 'Jussi Jääskeläinen', 'Sean Davis', "David N'Gog", 'Dedryck Boyata', 'Robert Lainton', 'Lee Chung-Yong', 'Marvin Sordell', 'Ryo Miyaichi', 'David Wheater', 'Tim Ream', 'Mark Connolly', 'Tom Eaves', 'Temitope Obadeyi', "Michael O'Halloran", 'Josh Vela', 'Adam Blakeman', 'Joe Riley', 'Jay Lynch', 'Gaël Kakuta', 'Gary Cahill', 'Petr Čech', 'Branislav Ivanović', 'Ashley Cole', 'David Luiz', 'Michael Essien', 'Oriol Romeu', 'Ramires', 'Frank Lampard', 'Fernando Torres', 'Juan Mata', 'Didier Drogba', 'Mikel John Obi', 'Florent Malouda', 'Raul Meireles', 'José Bosingwa', 'Romelu Lukaku', 'Paulo Ferreira', 'Salomon Kalou', 'Ross Turnbull', 'Daniel Sturridge', 'Gary Cahill', 'John Terry', 'Sam Hutchinson', 'Ryan Bertrand', 'Lucas Piazón', 'Hilário', 'Nathaniel \r\n\tChalobah', 'Jamal Blackman', 'Billy Clifford', 'Conor Clifford', 'Aziz Deen-Conteh', 'Rohan Ince', 'Marko Mitrović', 'Todd Kane', 'James Ashton', 'George Saville', 'Archange Nkumu', 'Daniel Pappoe', 'Rhys Taylor', 'Yossi Benayoun', 'Yossi Benayoun', 'Nicolas Anelka', 'Gaël Kakuta', 'Josh McEachran', 'Patrick van Aanholt', 'Alex', 'Aliu Djaló', 'Philipp Prosenik', 'Jacob Mellis', 'Ján Mucha', 'Tony Hibbert', 'Leighton Baines', 'Darron Gibson', 'John Heitinga', 'Phil Jagielka', 'Nikica Jelavić', 'Royston Drenthe', 'Denis Stracqualursi', 'Marcus Hahnemann', 'James McFadden', 'Sylvain Distin', 'Tim Cahill', 'Phil Neville', 'Magaye Gueye', 'Ross Barkley', 'Leon Osman', 'Steven Pienaar', 'Séamus Coleman', 'Tim Howard', 'Marouane Fellaini', 'Jack Rodwell', 'Apostolos Vellios', 'Victor Anichebe', 'João Silva', 'Francisco Júnior', 'Adam Forshaw', 'Shane Duffy', 'Aristote Nsiala', 'Jose Baxter', 'James Wallace', 'Adam Davies', 'Jake Bidwell', 'Luke Garbutt', 'Conor McAleny', 'Femi Orenuga', 'Mikel Arteta', 'Jermaine Beckford', 'Yakubu Aiyegbeni', 'Joseph Yobo', 'Shkodran Mustafi', 'Diniyar Bilyaletdinov', 'Louis Saha', 'Landon Donovan', 'Mark Schwarzer', 'Stephen Kelly', 'John Arne Riise', 'Steve Sidwell', 'Brede Hangeland', 'Chris Baird', 'Pavel Pogrebnyak', 'Andy Johnson', 'Orlando Sá', 'Pajtim\xa0Kasami', 'Bryan Ruiz', 'David Stockdale', 'Danny Murphy', 'Philippe Senderos', 'Marcel Gecov', 'Damien Duff', 'Bjørn Helge Riise', 'Aaron Hughes', 'Mahamadou Diarra', 'Dickson Etuhu', 'Kerim Frei', 'Csaba Somogyi', 'Clint Dempsey', 'Lauri Dalla Valle', 'Zdeněk Grygera', 'Matthew Briggs', 'Simon Davies', 'Mousa Dembélé', 'Alex Kačaniklić', 'Rafik Halliche', 'Tom Donegan', 'Buomesca Tué Na Bangna', 'Courtney Harris', 'Neil Etheridge', 'Stephen Arthurworrey', 'Alex Smith', 'Marcus Bettinelli', 'Marcello Trotta', 'Josh Pritchard', 'Jesse Joronen', 'Carlos Salcido', 'Keanu Marsh-Brown', 'Richard Peniket', 'Bobby Zamora', 'Brad Jones', 'Glen Johnson', 'José Enrique', 'Daniel Agger', 'Fábio Aurélio', 'Luis Suárez', 'Steven Gerrard', 'Andy Carroll', 'Maxi Rodríguez', 'Jordan Henderson', 'Sebastián Coates', 'Dirk Kuyt', 'Stewart Downing', 'Jay Spearing', 'Lucas Leiva', 'Jamie Carragher', 'Pepe Reina', 'Charlie Adam', 'Suso', 'Raheem Sterling', 'Doni', 'Jonjo Shelvey', 'Martin Kelly', 'Conor Coady', 'Nathan Eccleston', 'Martin Škrteľ', 'Jon Flanagan', 'Craig Bellamy', 'Péter Gulácsi', 'Andre Wisdom', 'Jack Robinson', 'Daniel Ayala', 'Sotirios Kyrgiakos', 'Daniel Pacheco', 'Alberto Aquilani', 'Joe Cole', 'Christian Poulsen', "David N'Gog", 'Raul Meireles', 'Danny Wilson', 'Martin Hansen', 'Micah Richards', 'Vincent Kompany', 'Pablo Zabaleta', 'Joleon Lescott', 'James Milner', 'David Pizarro', 'Edin Džeko', 'Adam Johnson', 'Stuart Taylor', 'Aleksandar Kolarov', 'Stefan Savić', 'Sergio Agüero', 'Gareth Barry', 'Samir Nasri', 'Owen Hargreaves', 'David Silva', 'Gaël Clichy', 'Joe Hart', 'Kolo Touré', 'Costel Pantilimon', 'Carlos Tévez', 'Nigel de Jong', 'Gai Assulin', 'Denis Suárez', 'Gunnar Nielsen', 'Yaya Touré', 'Karim Rekik', 'Mario Balotelli', 'Harry Bunn', 'Luca Scapuzzi', 'Sean Tse', 'Reece Wabara', 'Ahmad Benali', 'Jérémy Helan', 'Abdul Razak', 'Eirik Johansen', 'Joan Román', 'Emmanuel Adebayor', 'Dedryck Boyata', 'Roque Santa Cruz', 'Vladimir Weiss', 'Shaun Wright-Phillips', 'Craig Bellamy', 'Greg Cunningham', 'Kieran Trippier', 'Chris Chantler', 'Loris Karius', 'David González', 'Alex Nimely', 'Ben Mee', 'Andrea Mancini', 'Nedum Onuoha', 'Wayne Bridge', 'Frédéric Veseli', 'Tommy Smith', 'Bradley Robinson', 'David De Gea', 'Patrice Evra', 'Phil Jones', 'Rio Ferdinand', 'Jonny Evans', 'Michael Owen', 'Anderson', 'Dimitar Berbatov', 'Wayne Rooney', 'Ryan Giggs', 'Chris Smalling', 'Park Ji-Sung', 'Javier Hernández', 'Nemanja Vidić', 'Michael Carrick', 'Nani', 'Ashley Young', 'Danny Welbeck', 'Fábio', 'Rafael', 'Paul Scholes', 'Tom Cleverley', 'Darren Fletcher', 'Luis Antonio Valencia', 'Tomasz Kuszczak', 'Ritchie De Laet', 'Anders Lindegaard', 'Reece Brown', 'Marnick Vermijl', 'Michael Keane', 'Tom Thorpe', 'Ben Amos', 'Paul Pogba', 'Matthew James', 'Sean McGinty', 'Davide Petrucci', 'Ryan Tunnicliffe', 'Will Keane', 'Sam Johnstone', 'Ezekiel Fryers', 'Larnell Cole', 'Jesse Lingard', 'Michele Fornasier', 'Luke Giverin', 'Federico Macheda', 'Darron Gibson', 'Joshua King', 'Mame Biram Diouf', 'Ravel Morrison', 'Oliver Norwood', 'Steve Harper', 'Fabricio Coloccini', 'Davide Santon', 'Yohan Cabaye', 'Danny Simpson', 'Michael Williamson', 'Danny Guthrie', 'Papiss Demba Cissé', 'Hatem Ben Arfa', 'Peter Løvenkrands', 'Mehdi Abeid', 'James Perch', 'Dan Gosling', 'Ryan Taylor', 'Jonás Gutiérrez', 'Demba Ba', 'Leon Best', 'Sylvain Marveaux', 'Shola Ameobi', 'Cheik Ismael Tioté', 'Gabriel Obertan', 'Tim Krul', 'Steven Taylor', 'Sammy Ameobi', 'Haris Vučkić', 'Nile Ranger', 'Shane Ferguson', 'Ryan Donaldson', 'James Tavernier', 'Robert Elliot', 'Greg McDermott', 'Paul Dummett', 'Jeff Henderson', 'Tamás Kádár', 'Fraser Forster', 'Joey Barton', 'Alan Smith', 'Ole Soderberg', 'John Ruddy', 'Russell Martin', 'Adam Drury', 'Bradley Johnson', 'Steve Morison', 'Zak Whitbread', 'Andrew Crofts', 'James Vaughan', 'Grant Holt', 'Simeon Jackson', 'Andrew Surman', 'Anthony Pilkington', 'Declan Rudd', 'Wesley Hoolahan', 'David Fox', 'Elliott Bennett', 'Korey Smith', 'Simon Lappin', 'Leon Barnett', 'Aaron Wilbraham', 'Elliott Ward', 'Marc Tierney', 'Jonathan Howson', 'Kyle Naughton', 'Daniel Ayala', 'Josh Dawkin', 'Matt Ball', 'Jed Steer', 'Ryan Bennett', 'Tom Adeyemi', 'George Francomb', 'Ritchie De Laet', 'Chris Martin', 'Paddy Kenny', 'Samba Diakité', 'Clint Hill', 'Shaun Derry', 'Fitz Hall', 'Danny Gabbidon', 'Adel Taarabt', 'Kieron Dyer', 'DJ Campbell', 'Jay Bothroyd', 'Alejandro Faurlín', 'Jamie Mackie', 'Armand Traoré', 'Ákos Buzsáky', 'Bruno Perone', 'Joey Barton', 'Luke Young', 'Patrick Agyemang', 'Rob Hulse', 'Tommy Smith', 'Heiðar Helguson', 'Djibril Cissé', 'Radek Černý', 'Hogan Ephraim', 'Brian Murphy', 'Peter Ramage', 'Danny Shittu', 'Michael Doughty', 'Troy Hewitt', 'Shaun Wright-Phillips', 'Federico Macheda', 'Taye Taiwo', 'Anton Ferdinand', 'Bruno Andrade', 'Lee Cook', 'Jordan Gibbons', 'Michael Harriman', 'Nedum Onuoha', 'Elvijs Putniņš', 'Aaron Lennox', 'Angelo Balanta', 'Bobby Zamora', 'Kaspars Gorkšs', 'Antonio German', 'Jason Puncheon', 'Matthew Connolly', 'Petter Vaagan Moen', 'Martin Rowlands', 'Bradley Orr', 'Asmir Begović', 'Robert Huth', 'Danny Collins', 'Glenn Whelan', 'Kenwyne Jones', 'Ricardo Fuller', 'Mamady Sidibé', 'Marc Wilson', 'Salif Diao', 'Jermaine Pennant', 'Ryan Shawcross', 'Dean Whitehead', 'Jonathan Walters', 'Matthew Upson', 'Rory Delap', 'Peter Crouch', 'Matthew Etherington', 'Carlo Nash', 'Andy Wilkinson', 'Thomas Sørensen', 'Ryan Shotton', 'Diego Arismendi', 'Cameron Jerome', 'Matthew Lund', 'Florent Cuvelier', 'Jonathan Woodgate', 'Wilson Palacios', 'Michael Clarkson', 'Louis Moult', 'Craig Sinclair', 'Lucas Dawson', 'Andrew Davies', 'Danny Pugh', 'Tom Soares', 'Michael Tonge', 'Ben Marshall', 'Danny Higginbotham', 'Craig Gordon', 'Phillip Bardsley', 'Wayne Bridge', 'Michael Turner', 'Wes Brown', 'Lee Cattermole', 'Sebastian Larsson', 'Craig Gardner', 'Fraizer Campbell', 'Connor Wickham', 'Kieran Richardson', 'Matthew Kilgallon', 'Jack Colback', 'David Vaughan', "John O'Shea", 'Ji Dong-Won', 'David Meyler', 'Titus Bramble', 'Keiren Westwood', 'Oumare Tounkara', 'Simon Mignolet', 'James McClean', 'Trevor Carson', 'Sotirios Kyrgiakos', 'Jordan Cook', 'Ahmed Elmohamady', 'Stéphane Sessegnon', 'Ryan Noble', 'Adam Reed', 'Craig Lynch', 'Louis Laing', 'John Egan', 'Nicklas Bendtner', 'Anton Ferdinand', 'Asamoah Gyan', 'Michel Vorm', 'Ashley Williams', 'Neil Taylor', 'Steven Caulker', 'Alan Tate', 'Ferrie Bodde', 'Leon Britton', 'Andrea Orlandi', 'Danny Graham', 'Scott Sinclair', 'Nathan Dyer', 'Stephen Dobbie', 'Wayne Routledge', 'Garry Monk', 'Josh McEachran', 'Leroy Lita', 'Luke Moore', 'Fede Bessone', 'José Moreira', 'Angel Rangel', 'Darnel Situ', 'Joe Allen', 'Gerhard Tremmel', 'Kemy Agustien', 'Mark Gower', 'Curtis Obeng', 'Ashley Richards', 'Ben Davies', 'Joe Walsh', 'Casey Thomas', 'Jordan Smith', 'Gwion Edwards', 'Kurtis March', 'Rory Donnelly', 'Gylfi Sigurðsson', 'Ryan Harley', 'Shaun MacDonald', 'David Cornell', 'Vangelis Moras', 'David Cotterill', 'Craig Beattie', 'Thomas Butler', 'Lee Lucas', 'Daniel Alfei', 'Heurelho Gomes', 'Gareth Bale', 'Younes Kaboul', 'Tom Huddlestone', 'Aaron Lennon', 'Scott Parker', 'Emmanuel Adebayor', 'Rafael van der Vaart', 'William Gallas', 'Luka Modrić', 'Louis Saha', 'Giovani dos Santos', 'Jermain Defoe', 'Michael Dawson', 'Niko Kranjčar', 'Carlo Cudicini', 'Brad Friedel', 'Danny Rose', 'Ledley King', 'Kyle Walker', 'Jake Livermore', 'Sandro', 'Andros Townsend', 'Benoît Assou-Ekotto', 'Ryan Nelsen', 'David Button', 'Bongani Khumalo', 'Harry Kane', 'David Bentley', 'Ryan Fredericks', 'Cristian Ceballos', 'Massimo Luongo', 'Jake Nicholson', 'Adam Smith', 'Cameron Lancaster', 'Alex Pritchard', 'Jesse Waller-Lassen', 'Jordan Archer', 'Dean Parrett', 'Kevin Stewart', 'Jack Barthram', 'Wilson Palacios', 'Alan Hutton', 'Jermaine Jenas', 'Peter Crouch', 'Ryan Mason', 'Iago Falqué', 'John Bostock', 'Tom Carroll', 'Kudus Oyenuga', 'Vedran Ćorluka', 'Sébastien Bassong', 'Steven Pienaar', 'Roman Pavlyuchenko', 'Simon Dawkins', 'Ben Foster', 'Jonas Olsson', 'Somen Tchoyi', 'Liam Ridgewell', 'James Morrison', 'Keith Andrews', 'Shane Long', 'Chris Brunt', 'Steven Reid', 'Márton Fülöp', 'Jerome Thomas', 'Scott\xa0Allan', 'Graham Dorrans', 'Luke Daniels', 'Nicky Shorey', 'Youssouf Mulumbu', 'Zoltán Gera', 'Gareth McAuley', 'Peter Odemwingie', 'Craig Dawson', 'James Hurst', 'Sam Mantom', 'Billy Jones', 'George Thorne', 'Gabriel Tamaş', 'Simon Cox', 'Marc-Antoine Fortuné', 'Paul Scharner', 'Paul Downing', 'Romaine Sawyers', 'Adil Nabi', 'Kayelden Brown', 'Saido Berahino', 'Kemar Roofe', 'Lateef Elford-Alliyu', 'Cameron Gayle', 'Ishmael Miller', 'Marek Čech', 'Pablo', 'Chris Wood', 'Roman Bednář', 'Joe Mattock', 'Gonzalo Jara', "Liam O'Neil", 'Chris Kirkland', 'Steve Gohouri', 'Antolín Alcaraz', 'James McCarthy', 'Gary Caldwell', 'Hendry Thomas', 'Albert Crusat', 'Ben Watson', 'Franco Di Santo', 'Shaun Maloney', 'Victor Moses', 'Mike Pollitt', 'Lee Nicholls', 'Jordi Gómez', 'Callum McManaman', 'James McArthur', 'Emmerson Boyce', 'Conor Sammon', 'David Jones', 'Hugo Rodallega', 'Mohamed Diamé', 'Jean Beausejour', 'Ronnie Stam', 'Piscu', 'Ali Al-Habsi', 'Maynor Figueroa', 'Román Golobart', 'Patrick van Aanholt', 'Daniel Redmond', 'Nouha Dicko', 'Jordan Mustoe', 'Wayne Hennessey', 'George Elokobi', 'David Edwards', 'Richard Stearman', 'Jody Craddock', 'Michael Kightly', 'Karl Henry', 'Sylvan Ebanks-Blake', 'Steven Fletcher', 'Stephen Ward', 'Stephen Hunt', 'Carl Ikeme', 'Roger Johnson', 'Emmanuel Frimpong', 'Christophe Berra', 'Matthew Jarvis', 'Adam Hammill', 'Nenad Milijaš', 'Sébastien Bassong', 'Eggert Jónsson', 'Ronald Zubar', "Jamie O'Hara", 'David Davis', 'Kevin Doyle', 'Dorus de Vries', 'Kevin Foley', 'Jake Cassidy', 'Anthony Forde', 'Louis Harris', 'Michael Ihiekwe', 'Ashley Hemmings', 'Jack Price', 'James Spray', 'Jamie Reckord', 'Nathaniel Mendez-Laing', 'Aaron McCarey', 'Johnny Gorman', 'Ethan Ebanks-Landell', 'Stefan Maierhofer', 'Leigh Griffiths', 'Scott Malone', 'Sam Winnall', 'Sam Vokes', 'Adlène Guédioura', 'Andrew Keogh', 'Matt Doherty', 'Brian McGroary']

In [None]:
import time

df = pd.DataFrame(columns=['player_name', 'active', 'fyear', 'fclub'])

for i, player in enumerate(players[:5]):
  active, fyear, fclub, = get_status(player)
  df.loc[i] = [player, active, fyear, fclub]
  time.sleep(1)

df

Successfully parsed infobox
Successfully retrieved status; Player still playing, currently playing for None
Successfully parsed infobox
Successfully retrieved status; Player not playing, last played for Marseille in 2015–2017
Successfully parsed infobox
Successfully retrieved status; Player not playing, last played for Montreal Impact in 2018–2019
Successfully parsed infobox
Successfully retrieved status; Player not playing, last played for Arsenal in 2011–2018
Successfully parsed infobox
Successfully retrieved status; Player not playing, last played for Vissel Kobe in 2019–2021


Unnamed: 0,player_name,active,fyear,fclub
0,Manuel Almunia,True,,
1,Abou Diaby,False,2015–2017,Marseille
2,Bacary Sagna,False,2018–2019,Montreal Impact
3,Per Mertesacker,False,2011–2018,Arsenal
4,Thomas Vermaelen,False,2019–2021,Vissel Kobe


# **Conclusion**
## this worked OK
* Data scraped for ~80% of players I searched for
* 150 / 877 (~20%) failed to retreive useful data from wikiped

## so many players are still playing!!!
* According to my search ~40% of all players active in 2011 are still playing
and ~42% have retired

## Notable "still playings..."
* Papiss Cisse
* Asmir Begović
* Juan Mata
* Carlos Vela

##Bugs
* If a player played their whole career for a single team, I marked them as active (Ryan Giggs and Tony Hibbert
* Players with "common names" failed to retrieve records since its likely the URL was looking for additional information like David Davis (footballer) not David Davis (politician)


In [None]:
import time

df = pd.DataFrame(columns=['player_name', 'active', 'fyear', 'fclub'])

for i, player in enumerate(players):
  print("progress: {}/{}".format(i, len(players)))
  active, fyear, fclub, = get_status(player)
  df.loc[i] = [player, active, fyear, fclub]
  time.sleep(1)

df

progress: 0/877
getting states for Manuel Almunia
Successfully parsed infobox
Successfully retrieved status; Player still playing, currently playing for None
progress: 1/877
getting states for Abou Diaby
Successfully parsed infobox
Successfully retrieved status; Player not playing, last played for Marseille in 2015–2017
progress: 2/877
getting states for Bacary Sagna
Successfully parsed infobox
Successfully retrieved status; Player not playing, last played for Montreal Impact in 2018–2019
progress: 3/877
getting states for Per Mertesacker
Successfully parsed infobox
Successfully retrieved status; Player not playing, last played for Arsenal in 2011–2018
progress: 4/877
getting states for Thomas Vermaelen
Successfully parsed infobox
Successfully retrieved status; Player not playing, last played for Vissel Kobe in 2019–2021
progress: 5/877
getting states for Laurent Koscielny
Successfully parsed infobox
Successfully retrieved status; Player not playing, last played for Bordeaux in 2019–20

Unnamed: 0,player_name,active,fyear,fclub
0,Manuel Almunia,True,,
1,Abou Diaby,False,2015–2017,Marseille
2,Bacary Sagna,False,2018–2019,Montreal Impact
3,Per Mertesacker,False,2011–2018,Arsenal
4,Thomas Vermaelen,False,2019–2021,Vissel Kobe
...,...,...,...,...
872,Sam Vokes,True,,
873,Adlène Guédioura,True,,
874,Andrew Keogh,,,
875,Matt Doherty,,,
