In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import sys, getopt
import csv

In [None]:
class Summary:
  START_HEADER = 9
  N_COLUMNS = 38
  END_HEADER = START_HEADER + N_COLUMNS
  FIELDS_SUMMARY = ['Date','Day','Comp','Round','Venue','Result',
                    'Squad','Opponent','Start','Pos','Min','Gls','Ast','PK',
                    'PKatt','Sh','SoT','CrdY','CrdR','Touches','Press','Tkl',
                    'Int','Blocks','xG','npxG','xA','SCA','GCA','Cmp','Att',
                    'CmpPerc','Prog','Carries','Prog','Succ','Att',
                    'Match Report','Player']
  FIELDS_MERGE_SUMMARY = ['Gls', 'PK', 'PKatt', 'Sh', 'SoT', 'xG', 'npxG']

class Passing:
  START_HEADER = 8
  N_COLUMNS = 33
  END_HEADER = START_HEADER + N_COLUMNS
  FIELDS_PASSING = ['Date','Day','Comp','Round','Venue','Result',
                    'Squad','Opponent','Start','Pos','Min','TotCmp','TotAtt',
                    'TotCmpPerc','TotDist','PrgDist','ShCmp','ShAtt',
                    'ShCmpPerc','MedCmp','MedAtt','MedCmpPerc','LongCmp',
                    'LongAtt','LongCmpPerc','Ast','xA','KP','1/3','PPA','CrsPA',
                    'Prog','Match Report','Player']

class Pass_Types:
  START_HEADER = 9
  N_COLUMNS = 37
  END_HEADER = START_HEADER + N_COLUMNS
  FIELDS_PASS_TYPES = ['Date','Day','Comp','Round','Venue','Result',
                       'Squad','Opponent','Start','Pos','Min','Att','Live',
                       'Dead','FK','TB','Press','Sw','Crs','CK','InCK','OutCK',
                       'StrCK','GroundPass','LowPass','HighPass','LeftPass',
                       'RightPass','HeadPass','TI','Other','CmpPass','OffSide',
                       'Out','IntPass','BlockPass','Match Report','Player']

class GCA:
  START_HEADER = 5
  N_COLUMNS = 26
  END_HEADER = START_HEADER + N_COLUMNS
  FIELDS_GCA = ['Date','Day','Comp','Round','Venue','Result','Squad',
                'Opponent','Start','Pos','Min','SCA','PassLiveSCA',
                'PassDeadSCA','DribSCA','ShSCA','FldSCA','DefSCA','GCA',
                'PassLiveGCA','PassDeadGCA','DribGCA','ShGCA','FldGCA',
                'DefGCA','Match Report','Player']

class Defensive:
  START_HEADER = 7
  N_COLUMNS = 35
  END_HEADER = START_HEADER + N_COLUMNS
  FIELDS_DEFENSIVE = ['Date','Day','Comp','Round','Venue','Result',
                      'Squad','Opponent','Start','Pos','Min','Tkl','TklW',
                      'Def3rdTkl','Mid3rdTkl','Att3rdTkl','TklDrib',
                      'TklDribAtt','TklDribPerc','TklDribPast','PressAtt',
                      'PressSucc','PressPerc','Def3rdPress','Mid3rdPress',
                      'Att3rdPress','Blocks','ShBlock','ShSvBlock','PassBlock',
                      'Int','Tkl+Int','Clr','Err','Match Report','Player']

class Possession:
  START_HEADER = 7
  N_COLUMNS = 36
  END_HEADER = START_HEADER + N_COLUMNS
  FIELDS_POSSESSION = ['Date','Day','Comp','Round','Venue','Result',
                       'Squad','Opponent','Start','Pos','Min','Touches',
                       'DefPenTouches','Def3rdTouches','Mid3rdTouches',
                       'Att3rdTouches','AttPenTouches','LiveTouches',
                       'SuccDrib','AttDrib','SuccDribPerc','NuPlDrib',
                       'Nutmegs','Carries','TotDistCarries','PrgDistCarries',
                       'ProgCarries','1/3Carries','CPA','MisCarries',
                       'DisCarries','TargPass','RecPass','RecPassPerc',
                       'ProgPassRec','Match Report','Player']

class Miscellaneous:
  START_HEADER = 5
  N_COLUMNS = 28
  END_HEADER = START_HEADER + N_COLUMNS
  FIELDS_MISC = ['Date','Day','Comp','Round','Venue','Result','Squad',
                 'Opponent','Start','Pos','Min','CrdY','CrdR','2CrdY','FlsComm',
                 'FlsDrawn','Offsides','Crs','Int','TklW','PKwon','PKcon','OG',
                 'Recov','AerialDuelsWon','AerialDuelsLost',
                 'AerialDuelsWinPerc','Match Report', 'Player']
  FIELDS_MERGE_MISC = ['CrdY', 'CrdR', '2CrdY', 'FlsComm', 'FlsDrawn', 
                       'Offsides', 'PKwon', 'PKcon', 'OG', 'Recov', 
                       'AerialDuelsWon', 'AerialDuelsLost', 
                       'AerialDuelsWinPerc', 'Match Report']


class Global:
  START_DATA = 2
  KEY = ['Player', 'Date', 'Day', 'Comp', 'Round', 'Venue', 'Result', 'Squad', 
       'Opponent', 'Start', 'Pos', 'Min']
  TYPES = ['summary', 'passing', 'passing_types', 'gca', 'defense', 
           'possession', 'misc']
  DICT_TYPES = {'summary': Summary, 'passing': Passing, 
                'passing_types': Pass_Types, 'gca': GCA, 
                'defense': Defensive, 'possession': Possession, 
                'misc': Miscellaneous}
  SEASONS = ['2021-2022', '2020-2021', '2019-2020', '2018-2019', '2017-2018']
  DICT_PLAYERS = {
      #'Karim Benzema': 'https://fbref.com/en/players/70d74ece/matchlogs/',
      #'Lionel Messi': 'https://fbref.com/en/players/d70ce98e/matchlogs/',
      #'Kylian Mbappe': 'https://fbref.com/en/players/42fd9c7f/matchlogs/',
      #'Memphis Depay': 'https://fbref.com/en/players/8f696594/matchlogs/',
      #'Duvan Zapata': 'https://fbref.com/en/players/d3de9af0/matchlogs/',
      #'Luis Muriel': 'https://fbref.com/en/players/eb2fe5b6/matchlogs/',
      #'Robert Lewandowski': 'https://fbref.com/en/players/8d78e732/matchlogs/',
      #'Gerard Moreno': 'https://fbref.com/en/players/81f0781e/matchlogs/',
      #'Cristiano Ronaldo': 'https://fbref.com/en/players/dea698d9/matchlogs/',
      #'Harry Kane': 'https://fbref.com/en/players/21a66f6a/matchlogs/',
      #'Paulo Dybala': 'https://fbref.com/en/players/e0921a4f/matchlogs/',
      #'Antoine Griezmann': 'https://fbref.com/en/players/df69b544/matchlogs/',
      #'Roberto Firmino': 'https://fbref.com/en/players/4c370d81/matchlogs/',
      #'Dimitri Payet': 'https://fbref.com/en/players/58ae47b2/matchlogs/',
      #'Iago Aspas': 'https://fbref.com/en/players/7dcf86f6/matchlogs/',
      #'Marco Reus': 'https://fbref.com/en/players/36a3ff67/matchlogs/',
      #'Kingsley Coman': 'https://fbref.com/en/players/042e8a49/matchlogs/',
      #'Angel Di Maria': 'https://fbref.com/en/players/19cda00b/matchlogs/',
      #'Leroy Sane': 'https://fbref.com/en/players/2b114be3/matchlogs/',
      #'Neymar': 'https://fbref.com/en/players/69384e5d/matchlogs/',
      #'Riyad Mahrez': 'https://fbref.com/en/players/892d5bb1/matchlogs/',
      #'Nabil Fekir': 'https://fbref.com/en/players/bece776f/matchlogs/',
      #'Serge Gnabry': 'https://fbref.com/en/players/88e357ef/matchlogs/',
      #'Erik Lamela': 'https://fbref.com/en/players/abe66106/matchlogs/',
      #'Nicolas Pepe': 'https://fbref.com/en/players/57e3f0c7/matchlogs/',
      #'Christopher Nkunku': 'https://fbref.com/en/players/7c56da38/matchlogs/',
      #'Phil Foden': 'https://fbref.com/en/players/ed1e53f3/matchlogs/',
      #'Roberto Firmino': 'https://fbref.com/en/players/4c370d81/matchlogs/',
      #'Dimitri Payet': 'https://fbref.com/en/players/58ae47b2/matchlogs/',
      #'Angel Correa': 'https://fbref.com/en/players/01eb744d/matchlogs/',
      #'Federico Chiesa': 'https://fbref.com/en/players/b0f7e36c/matchlogs/',
      #'Sadio Mane': 'https://fbref.com/en/players/c691bfe2/matchlogs/',
      #'Wissam Ben Yedder': 'https://fbref.com/en/players/942b4f90/matchlogs/',
      #'Breel Embolo': 'https://fbref.com/en/players/0b4f388a/matchlogs/',
      #'Alexandre Lacazette': 'https://fbref.com/en/players/9dbb75ca/matchlogs/',
      #'Alvaro Morata': 'https://fbref.com/en/players/129af0db/matchlogs/',
      #'Gabriel Jesus': 'https://fbref.com/en/players/b66315ae/matchlogs/',
      #'Ciro Immobile': 'https://fbref.com/en/players/4431aed2/matchlogs/',
      #'Romelu Lukaku': 'https://fbref.com/en/players/5eae500a/matchlogs/',
      #'Edin Dzeko': 'https://fbref.com/en/players/3bb7f478/matchlogs/',
      #'Andre Silva': 'https://fbref.com/en/players/3effaa34/matchlogs/',
      #'Timo Werner':'https://fbref.com/en/players/49fe9070/matchlogs/',
      #'Patrik Schick': 'https://fbref.com/en/players/5d4f7d61/matchlogs/',
      #'Andy Delort': 'https://fbref.com/es/jugadores/d33c706e/matchlogs/',
      #'Luis Suarez': 'https://fbref.com/es/jugadores/a6154613/matchlogs/',
      #'Carlos Bacca': 'https://fbref.com/es/jugadores/09a9e921/matchlogs/',
      #'Rodrigo': 'https://fbref.com/es/jugadores/1fb1c435/matchlogs/',
      #'Gareth Bale': 'https://fbref.com/es/jugadores/a58bb1e1/matchlogs/',
      #'Antonio Sanabria': 'https://fbref.com/es/jugadores/0a447501/matchlogs/',
      #'Kike': 'https://fbref.com/es/jugadores/e897d8ba/matchlogs/',
      #'Inaki Williams': 'https://fbref.com/es/jugadores/6a99e0b1/matchlogs/',
      #'Jamie Vardy': 'https://fbref.com/es/jugadores/45963054/matchlogs/',
      #'Paco Alcacer': 'https://fbref.com/es/jugadores/a7a9d95a/matchlogs/',
      #'Edinson Cavani': 'https://fbref.com/es/jugadores/527f063d/matchlogs/',
      #'Willian Jose': 'https://fbref.com/es/jugadores/d87e2cae/matchlogs/',
      #'Joao Pedro': 'https://fbref.com/es/jugadores/81255c03/matchlogs/',
      #'Kevin Lasagna': 'https://fbref.com/es/jugadores/09538fdb/matchlogs/',
      #'Fabio Quagliarella': 'https://fbref.com/es/jugadores/ee4f2f3b/matchlogs/',
      #'Lucas Perez': 'https://fbref.com/es/jugadores/a300ac7e/matchlogs/',
      #'Manolo Gabbiadini': 'https://fbref.com/es/jugadores/8f866fe8/matchlogs/',
      #'Roger Marti': 'https://fbref.com/es/jugadores/0ae4e09a/matchlogs/',
      #'Danny Ings': 'https://fbref.com/es/jugadores/07802f7f/matchlogs/',
      #'Ruben Sobrino': 'https://fbref.com/es/jugadores/19b776e9/matchlogs/',
      #'Raul Garcia': 'https://fbref.com/es/jugadores/b418dbd4/matchlogs/',
      #'Richarlison': 'https://fbref.com/es/jugadores/fa031b34/matchlogs/',
      #'Maxi Gomez': 'https://fbref.com/es/jugadores/4c2e9442/matchlogs/',
      #'Jorge Molina': 'https://fbref.com/es/jugadores/43f71e77/matchlogs/',
      #'Joselu': 'https://fbref.com/es/jugadores/6265208f/matchlogs/',
      #'Mattia Destro': 'https://fbref.com/es/jugadores/d7d32194/matchlogs/',
      #'Youssef En-Nesyri': 'https://fbref.com/es/jugadores/04e17fd5/matchlogs/',
      #'Olivier Giroud': 'https://fbref.com/es/jugadores/16ceb862/matchlogs/',
      #'Santi Mina': 'https://fbref.com/es/jugadores/0b90bb97/matchlogs/',
      #'Chris Wood': 'https://fbref.com/es/jugadores/4e9a0555/matchlogs/',
      #'Enes Unal': 'https://fbref.com/es/jugadores/f8eca1b6/matchlogs/',
      #'Leonardo Pavoletti': 'https://fbref.com/es/jugadores/d37b0350/matchlogs/',
      #'Giovanni Simeone': 'https://fbref.com/es/jugadores/343c0d52/matchlogs/',
      #'Christian Benteke': 'https://fbref.com/es/jugadores/ab070c55/matchlogs/',
      #'Dominic Calvert Lewin': 'https://fbref.com/es/jugadores/59e6e5bf/matchlogs/',
      #'Callum Wilson': 'https://fbref.com/es/jugadores/c596fcb0/matchlogs/',
      #'Keita Balde': 'https://fbref.com/es/jugadores/509a4ccb/matchlogs/',
      #'Edinson Cavani': 'https://fbref.com/es/jugadores/527f063d/matchlogs/',
      #'Mauro Icardi': 'https://fbref.com/es/jugadores/43b78598/matchlogs/',
      #'Arkadiusz Milik': 'https://fbref.com/es/jugadores/85613cf0/matchlogs/',
      #'Sehrou Guirassy': 'https://fbref.com/es/jugadores/923f4dda/matchlogs/',
      #'Angel Rodriguez': 'https://fbref.com/es/jugadores/8cfc2f69/matchlogs/',
      #'Munir-El-Haddadi': 'https://fbref.com/es/jugadores/8696bc90/matchlogs/',
      #'Mikel Oyarzabal': 'https://fbref.com/es/jugadores/8c3c640c/matchlogs/',
      #'Alex Berenguer': 'https://fbref.com/es/jugadores/dc1c2fce/matchlogs/',
      #'Henrikh Mkhitaryan': 'https://fbref.com/es/jugadores/dd0daf32/matchlogs/',
      #'Ivan Perisic': 'https://fbref.com/es/jugadores/6fe90922/matchlogs/',
      #'Pedro': 'https://fbref.com/es/jugadores/3ca7254a/matchlogs/',
      #'Raheem Sterling': 'https://fbref.com/es/jugadores/b400bde0/matchlogs/',
      #'Federico Bernardeschi': 'https://fbref.com/es/jugadores/ee93c1a9/matchlogs/',
      #'Antonio Candreva': 'https://fbref.com/es/jugadores/356c9002/matchlogs/',
      #'Hakan Calhanoglu': 'https://fbref.com/es/jugadores/cd0fa27b/matchlogs/',
      #'Jadon Sancho': 'https://fbref.com/es/jugadores/dbf053da/matchlogs/',
      #'Lorenzo Insigne': 'https://fbref.com/es/jugadores/2f557579/matchlogs/',
      #'Adnan Januzaj': 'https://fbref.com/es/jugadores/4737cebe/matchlogs/',
      #'Nathan Redmond': 'https://fbref.com/es/jugadores/ab651565/matchlogs/',
      #'Vincenzo Grifo': 'https://fbref.com/es/jugadores/54e4866f/matchlogs/',
      #'Iker Muniain': 'https://fbref.com/es/jugadores/c05dfb74/matchlogs/',
      #'Jordan Ayew': 'https://fbref.com/es/jugadores/da052c14/matchlogs/',
      #'Lucas Ocampos': 'https://fbref.com/es/jugadores/a08b974a/matchlogs/',
      #'Ludovic Blas': 'https://fbref.com/es/jugadores/6191093d/matchlogs/',
      #'Wilfried Zaha': 'https://fbref.com/es/jugadores/b2bc3b1f/matchlogs/',
      #'Lucas Moura': 'https://fbref.com/es/jugadores/2b622f01/matchlogs/',
      #'Papu Gomez': 'https://fbref.com/es/jugadores/6e4df551/matchlogs/',
      #'Suso': 'https://fbref.com/es/jugadores/4e219ad2/matchlogs/',
      #'Dele Alli': 'https://fbref.com/es/jugadores/cea4ee8f/matchlogs/',
      #'Xherdan Shaqiri': 'https://fbref.com/es/jugadores/6421ec64/matchlogs/',
      #'Son Heung-min': 'https://fbref.com/es/jugadores/92e7e919/matchlogs/',
      #'Mohamed Salah': 'https://fbref.com/es/jugadores/e342ad68/matchlogs/',
      #'Marcus Rashford': 'https://fbref.com/es/jugadores/a1d5bd30/matchlogs/',
      #'Ferran Torres': 'https://fbref.com/es/jugadores/9e1035f8/matchlogs/',
      #'Juanmi': 'https://fbref.com/es/jugadores/84399660/matchlogs/',
      #'Jose Luis Morales': 'https://fbref.com/es/jugadores/4a478107/matchlogs/',
      #'Kevin Volland': 'https://fbref.com/es/jugadores/64f69877/matchlogs/',
      #'Portu': 'https://fbref.com/es/jugadores/1bda5842/matchlogs/',
      #'Felipe Caicedo': 'https://fbref.com/es/jugadores/93b891d1/matchlogs/',
      #'Kelechi Iheanacho': 'https://fbref.com/es/jugadores/c92e1a31/matchlogs/',
      #'Simone Zaza': 'https://fbref.com/es/jugadores/9592289a/matchlogs/',
      #'Pierre-Emerick Aubameyang': 'https://fbref.com/es/jugadores/d5dd5f1f/matchlogs/',
      #'Borja Mayoral': 'https://fbref.com/es/jugadores/64e8ed6d/matchlogs/',
      #'Joaquin Correa': 'https://fbref.com/es/jugadores/45b9b619/matchlogs/',
      #'Andrej Kramaric': 'https://fbref.com/es/jugadores/603cb947/matchlogs/',
      'Sandro Ramirez': 'https://fbref.com/en/players/833fb62e/matchlogs/',
      'Martin Braithwaite': 'https://fbref.com/en/players/fd771f95/matchlogs/',
      'Mariano': 'https://fbref.com/en/players/5c4dc0ff/matchlogs/',
      'Stevan Jovetic': 'https://fbref.com/en/players/f36c432f/matchlogs/',
      'Eden Hazard ': 'https://fbref.com/en/players/a39bb753/matchlogs/',
      'Anthony Martial': 'https://fbref.com/en/players/8b788c01/matchlogs/'
  }
# 'Mason Mount': 'https://fbref.com/en/players/9674002f/matchlogs/',
# 'Mario Pasalic': 'https://fbref.com/en/players/e599253a/matchlogs/',
# 'Vinicius Junior': 'https://fbref.com/en/players/7111d552/matchlogs/',
# 'Hirving Lozano': 'https://fbref.com/en/players/a69e4806/matchlogs/',
# 'Zlatan Ibrahimovic': 'https://fbref.com/en/players/4cde5509/matchlogs/',
# 'Erling Haaland': 'https://fbref.com/en/players/1f44ac21/matchlogs/',
# 'Lautaro Martinez':'https://fbref.com/en/players/f7036e1c/matchlogs/',
# 'Diogo Jota':'https://fbref.com/en/players/178ae8f8/matchlogs/',
# 'Tammy Abraham':'https://fbref.com/en/players/f586779e/matchlogs/',
# 'Amine Gouiri': 'https://fbref.com/en/players/aad56ca3/matchlogs/',
# 'Max Kruse': 'https://fbref.com/en/players/f9446dcd/matchlogs/',
# 'Yannick Carrasco': 'https://fbref.com/es/jugadores/de39485a/matchlogs/',
# 'Willian': 'https://fbref.com/es/jugadores/8b9ebd03/matchlogs/',
# 'Radamel Falcao': 'https://fbref.com/en/players/66116290/matchlogs/',


In [None]:
## Scrape one type
def scrape(url, page_type):
  res = requests.get(url).text
  soup = BeautifulSoup(res, 'html.parser')
  table = soup.find('table', class_='min_width sortable stats_table min_width shade_zero')
  n_games = len(table.tbody.find_all('tr'))
  columns = []
  dates = []
  row = []
  for i, header in enumerate(table.find_all('th')):
    if i in range(page_type.START_HEADER, page_type.END_HEADER):
      columns.append(header.text)
    if i in range(page_type.END_HEADER, page_type.END_HEADER + n_games):
      dates.append(header.text)
  df = pd.DataFrame(columns=columns)
  for j, data in enumerate(table.find_all('tr')):
    if j in range(Global.START_DATA, Global.START_DATA + n_games):
      raw_data = data.find_all('td')
      for a in raw_data:
        row.append(a.text)
      row.insert(0, dates[j-Global.START_DATA])
      if len(row) == page_type.N_COLUMNS:
        new_row = pd.Series(row, index = df.columns)
        df = df.append(new_row, ignore_index=True)
      row.clear()
  df['Player'] = url.split('/')[-1].replace('-Match-Logs', '').replace('-',' ')
  return df

In [None]:
def get_result(result):
  if len(result.split(' ')) == 2:
    return result.split(' ')
  else:
    return [result[0], result[1:].replace(' ','')]

In [None]:
## Scrape all types
def scrape_all(player, season, global_url, types):
  urls = []
  keys_list = list(Global.DICT_TYPES)
  for typ in types:
    urls.append(global_url + season + '/' + typ + '/' + player.replace(' ','-') + '-Match-Logs')
  for i, url in enumerate(urls):
    if i == 0:
      df_summary = scrape(url, Global.DICT_TYPES[url.split('/')[8]])
      df_summary.rename(columns=dict(zip(df_summary.columns, 
                                         Summary.FIELDS_SUMMARY)), inplace=True)
    if i == 1:
      df_passing = scrape(url, Global.DICT_TYPES[url.split('/')[8]])
      df_passing.rename(columns=dict(zip(df_passing.columns, 
                                         Passing.FIELDS_PASSING)), inplace=True)
    if i == 2:
      df_pass_types = scrape(url, Global.DICT_TYPES[url.split('/')[8]])
      df_pass_types.rename(columns=dict(zip(df_pass_types.columns, 
                                      Pass_Types.FIELDS_PASS_TYPES)), 
                           inplace=True)
    if i == 3:
      df_gca = scrape(url, Global.DICT_TYPES[url.split('/')[8]])
      df_gca.rename(columns=dict(zip(df_gca.columns, 
                                     GCA.FIELDS_GCA)), inplace=True)
    if i == 4:
      df_defensive = scrape(url, Global.DICT_TYPES[url.split('/')[8]])
      df_defensive.rename(columns=dict(zip(
          df_defensive.columns, Defensive.FIELDS_DEFENSIVE)), inplace=True)
    if i == 5:
      df_possession = scrape(url, Global.DICT_TYPES[url.split('/')[8]])
      df_possession.rename(columns=dict(
          zip(df_possession.columns, Possession.FIELDS_POSSESSION)), 
          inplace=True)
    if i == 6:
      df_misc = scrape(url, Global.DICT_TYPES[url.split('/')[8]])
      df_misc.rename(columns=dict(zip(
          df_misc.columns, Miscellaneous.FIELDS_MISC)), inplace=True)

  df = df_summary[Global.KEY + Summary.FIELDS_MERGE_SUMMARY].merge(
                     df_passing, left_on=Global.KEY, right_on=Global.KEY)
  df = df.merge(df_pass_types, left_on=Global.KEY, right_on=Global.KEY)
  df = df.merge(df_gca, left_on=Global.KEY, right_on=Global.KEY)
  df = df.merge(df_defensive, left_on=Global.KEY, right_on=Global.KEY)
  df = df.merge(df_possession, left_on=Global.KEY, right_on=Global.KEY)
  df = df.merge(df_misc[Global.KEY + Miscellaneous.FIELDS_MERGE_MISC], 
                left_on=Global.KEY, right_on=Global.KEY)
  df = df.fillna(0)
  df['Start'] = df['Start'].apply(lambda x: 1 if x in ('Y', 'Y*') else 0)
  df['Opponent'] = df['Opponent'].apply(lambda x: x[2:] if x[0].islower() else x)
  df['Squad'] = df['Squad'].apply(lambda x: x[2:] if x[0].islower() else x)
  df['Result'] = df['Result'].apply(lambda x: get_result(x))
  df['Score'] = df['Result'].apply(lambda x: x[1])
  df['Result'] = df['Result'].apply(lambda x: x[0])
  df['Score'] = df['Score'].apply(lambda x: x.split('–'))
  df['Squad_Goals'] = df['Score'].apply(lambda x: x[0])
  df['Opponent_Goals'] = df['Score'].apply(lambda x: x[1])
  df = df.drop(columns=['Score', 'Match Report_x', 'Match Report_y'])
  return df

In [None]:
df = pd.DataFrame()
for player in Global.DICT_PLAYERS:
  print(player)
  for season in Global.SEASONS:
    df_new = scrape_all(player, season, Global.DICT_PLAYERS[player], Global.TYPES)
    df = pd.concat([df, df_new], ignore_index=True)
df = df.fillna(0)
df.to_csv('Trial_3.csv')
df

Sandro Ramirez


  validate=validate,


Martin Braithwaite
Mariano
Stevan Jovetic
Eden Hazard 
Anthony Martial


Unnamed: 0,Player,Date,Day,Comp,Round,Venue,Result,Squad,Opponent,Start,Pos,Min,Gls,PK,PKatt,Sh,SoT,xG,npxG,LongCmp,LongAtt,LongCmpPerc,TotDist,PrgDist,LongCmp.1,LongAtt.1,LongCmpPerc.1,LongCmp.2,LongAtt.2,LongCmpPerc.2,LongCmp.3,LongAtt.3,LongCmpPerc.3,Ast,xA,KP,1/3,PPA,CrsPA,Prog,...,Err,Touches,DefPenTouches,Def3rdTouches,Mid3rdTouches,Att3rdTouches,AttPenTouches,LiveTouches,SuccDrib,AttDrib,SuccDribPerc,NuPlDrib,Nutmegs,Carries,TotDistCarries,PrgDistCarries,ProgPassRec,1/3Carries,CPA,MisCarries,DisCarries,TargPass,RecPass,RecPassPerc,ProgPassRec.1,CrdY,CrdR,2CrdY,FlsComm,FlsDrawn,Offsides,PKwon,PKcon,OG,Recov,AerialDuelsWon,AerialDuelsLost,AerialDuelsWinPerc,Squad_Goals,Opponent_Goals
0,Sandro Ramirez,2021-08-13,Fri,La Liga,Matchweek 1,Away,L,Getafe,Valencia,1,"FW,AM",66,0,0,0,1,0,0.0,0.0,18,23,78.3,250,87,13,15,86.7,4,6,66.7,1,2,50.0,0,0.1,1,2,2,1,1,...,0,31,0,1,13,17,5,29,1,1,100.0,1,0,24,106,57,1,0,1,2,0,35,26,74.3,9,0,0,0,2,4,1,0,0,0,3,0,0,,0,1
1,Sandro Ramirez,2021-08-23,Mon,La Liga,Matchweek 2,Home,L,Getafe,Sevilla,1,FW,73,0,0,0,2,0,0.1,0.1,10,18,55.6,126,16,4,7,57.1,5,8,62.5,0,0,,0,0.0,0,0,0,0,0,...,0,29,0,0,9,20,5,29,0,1,0.0,0,0,23,69,20,1,0,0,5,2,32,25,78.1,12,0,0,0,1,0,0,0,0,0,3,2,2,50.0,0,1
2,Sandro Ramirez,2021-08-29,Sun,La Liga,Matchweek 3,Away,L,Getafe,Barcelona,1,FW,60,1,0,0,1,1,0.1,0.1,16,20,80.0,270,42,9,10,90.0,4,6,66.7,2,3,66.7,0,0.0,0,1,0,0,1,...,0,26,0,1,16,10,2,23,1,2,50.0,1,0,15,72,27,1,0,0,1,0,29,21,72.4,4,0,0,0,2,1,0,0,0,0,0,2,2,50.0,1,2
3,Sandro Ramirez,2021-09-13,Mon,La Liga,Matchweek 4,Home,L,Getafe,Elche,0,FW,31,0,0,0,1,0,0.0,0.0,12,15,80.0,204,62,8,8,100.0,1,2,50.0,3,4,75.0,0,0.1,1,2,0,0,1,...,0,23,0,1,7,17,2,19,0,0,,0,0,17,102,37,4,1,1,3,0,22,16,72.7,5,1,0,0,1,2,1,0,0,0,0,0,0,,0,1
4,Sandro Ramirez,2021-09-26,Sun,La Liga,Matchweek 7,Away,L,Getafe,Betis,0,FW,45,0,0,0,3,2,0.2,0.2,12,19,63.2,204,16,6,7,85.7,4,7,57.1,2,4,50.0,0,0.0,0,0,0,0,0,...,0,28,0,0,14,15,2,23,4,6,66.7,4,0,22,157,82,3,1,1,1,0,25,20,80.0,6,1,0,0,2,1,0,0,0,0,1,0,3,0.0,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
920,Anthony Martial,2018-04-15,Sun,Premier League,Matchweek 34,Home,L,Manchester Utd,West Brom,0,"LB,LW",33,0,0,0,1,0,0.1,0.1,19,29,65.5,358,114,7,10,70.0,11,12,91.7,1,4,25.0,0,0.0,0,3,0,0,4,...,0,34,0,0,19,21,2,31,2,2,100.0,2,0,28,145,97,7,3,0,0,2,30,26,86.7,2,0,0,0,0,2,0,0,0,0,5,0,1,0.0,0,1
921,Anthony Martial,2018-04-18,Wed,Premier League,Matchweek 35,Away,W,Manchester Utd,Bournemouth,1,LW,90,0,0,0,2,0,0.1,0.1,26,38,68.4,450,93,12,18,66.7,11,14,78.6,3,4,75.0,0,0.1,2,5,0,0,2,...,0,47,1,7,23,22,13,47,3,4,75.0,3,1,42,309,215,13,4,6,4,1,57,38,66.7,12,0,0,0,2,0,1,0,0,0,7,1,2,33.3,2,0
922,Anthony Martial,2018-04-29,Sun,Premier League,Matchweek 36,Home,W,Manchester Utd,Arsenal,0,"LW,CM",27,0,0,0,1,0,0.0,0.0,13,20,65.0,233,47,6,7,85.7,6,9,66.7,1,3,33.3,0,0.3,1,0,1,1,1,...,0,25,0,1,8,18,4,25,2,5,40.0,2,0,25,145,106,7,1,2,1,0,24,21,87.5,6,0,0,0,0,1,0,0,0,0,3,0,0,,2,1
923,Anthony Martial,2018-05-04,Fri,Premier League,Matchweek 37,Away,L,Manchester Utd,Brighton,1,LW,90,0,0,0,3,0,0.1,0.1,48,55,87.3,643,192,34,37,91.9,11,12,91.7,2,4,50.0,0,0.1,2,3,0,0,1,...,0,69,1,7,24,42,8,67,1,5,20.0,2,0,56,231,141,11,3,1,3,3,73,59,80.8,6,0,0,0,0,0,0,0,0,0,5,1,1,50.0,0,1


In [None]:
df = pd.read_csv('Version0.csv')
df

Unnamed: 0,Player,Date,Day,Comp,Round,Venue,Result,Squad,Opponent,Start,Pos,Min,Gls,PK,PKatt,Sh,SoT,xG,npxG,LongCmp,LongAtt,LongCmpPerc,TotDist,PrgDist,LongCmp.1,LongAtt.1,LongCmpPerc.1,LongCmp.2,LongAtt.2,LongCmpPerc.2,LongCmp.3,LongAtt.3,LongCmpPerc.3,Ast,xA,KP,1/3,PPA,CrsPA,Prog,...,Err,Touches,DefPenTouches,Def3rdTouches,Mid3rdTouches,Att3rdTouches,AttPenTouches,LiveTouches,SuccDrib,AttDrib,SuccDribPerc,NuPlDrib,Nutmegs,Carries,TotDistCarries,PrgDistCarries,ProgPassRec,1/3Carries,CPA,MisCarries,DisCarries,TargPass,RecPass,RecPassPerc,ProgPassRec.1,CrdY,CrdR,2CrdY,FlsComm,FlsDrawn,Offsides,PKwon,PKcon,OG,Recov,AerialDuelsWon,AerialDuelsLost,AerialDuelsWinPerc,Squad_Goals,Opponent_Goals
0,Karim Benzema,2021-08-14,Sat,La Liga,Matchweek 1,Away,W,Real Madrid,Alavés,1,FW,88.0,2,0,0,6.0,3.0,0.7,0.7,39.0,45.0,86.7,720.0,130.0,23.0,25.0,92.0,7.0,8.0,87.5,9.0,9.0,100.0,0.0,0.0,0.0,2.0,1.0,0.0,4.0,...,0.0,55.0,0.0,3.0,25.0,30.0,10.0,53.0,2.0,2.0,100.0,2.0,0.0,36.0,194.0,78.0,5.0,2.0,2.0,0.0,1.0,58.0,48.0,82.8,10.0,0,0,0,0.0,1.0,1.0,0.0,0.0,0.0,3.0,1.0,0.0,100.0,4,1
1,Karim Benzema,2021-08-22,Sun,La Liga,Matchweek 2,Away,D,Real Madrid,Levante,1,FW,90.0,0,0,0,1.0,0.0,0.1,0.1,27.0,32.0,84.4,365.0,67.0,19.0,23.0,82.6,8.0,8.0,100.0,0.0,0.0,,2.0,0.2,2.0,0.0,3.0,0.0,3.0,...,0.0,39.0,0.0,0.0,17.0,23.0,8.0,35.0,0.0,0.0,,0.0,0.0,25.0,126.0,44.0,3.0,1.0,0.0,1.0,2.0,44.0,29.0,65.9,5.0,0,0,0,1.0,1.0,4.0,0.0,0.0,0.0,5.0,0.0,0.0,,3,3
2,Karim Benzema,2021-08-28,Sat,La Liga,Matchweek 3,Away,W,Real Madrid,Betis,1,FW,90.0,0,0,0,2.0,0.0,0.1,0.1,31.0,37.0,83.8,389.0,92.0,18.0,20.0,90.0,9.0,11.0,81.8,1.0,2.0,50.0,1.0,0.3,3.0,0.0,4.0,1.0,5.0,...,0.0,48.0,0.0,4.0,21.0,27.0,4.0,47.0,2.0,2.0,100.0,2.0,1.0,30.0,112.0,74.0,5.0,2.0,2.0,5.0,0.0,48.0,37.0,77.1,3.0,0,0,0,0.0,1.0,3.0,0.0,0.0,0.0,6.0,1.0,2.0,33.3,1,0
3,Karim Benzema,2021-09-01,Wed,WCQ,First round,Home,D,France,Bosnia and Herzegovina,1,FW,75.0,0,0,0,2.0,1.0,,,,,,,,,,,,,,,,,0.0,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0.0,1.0,1.0,,,0.0,,,,,1,1
4,Karim Benzema,2021-09-04,Sat,WCQ,First round,Away,D,France,Ukraine,0,,27.0,0,0,0,0.0,0.0,,,,,,,,,,,,,,,,,0.0,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0.0,0.0,1.0,,,0.0,,,,,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24364,Anthony Martial,2018-04-15,Sun,Premier League,Matchweek 34,Home,L,Manchester Utd,West Brom,0,"LB,LW",33.0,0,0,0,1.0,0.0,0.1,0.1,19.0,29.0,65.5,358.0,114.0,7.0,10.0,70.0,11.0,12.0,91.7,1.0,4.0,25.0,0.0,0.0,0.0,3.0,0.0,0.0,4.0,...,0.0,34.0,0.0,0.0,19.0,21.0,2.0,31.0,2.0,2.0,100.0,2.0,0.0,28.0,145.0,97.0,7.0,3.0,0.0,0.0,2.0,30.0,26.0,86.7,2.0,0,0,0,0.0,2.0,0.0,0.0,0.0,0.0,5.0,0.0,1.0,0.0,0,1
24365,Anthony Martial,2018-04-18,Wed,Premier League,Matchweek 35,Away,W,Manchester Utd,Bournemouth,1,LW,90.0,0,0,0,2.0,0.0,0.1,0.1,26.0,38.0,68.4,450.0,93.0,12.0,18.0,66.7,11.0,14.0,78.6,3.0,4.0,75.0,0.0,0.1,2.0,5.0,0.0,0.0,2.0,...,0.0,47.0,1.0,7.0,23.0,22.0,13.0,47.0,3.0,4.0,75.0,3.0,1.0,42.0,309.0,215.0,13.0,4.0,6.0,4.0,1.0,57.0,38.0,66.7,12.0,0,0,0,2.0,0.0,1.0,0.0,0.0,0.0,7.0,1.0,2.0,33.3,2,0
24366,Anthony Martial,2018-04-29,Sun,Premier League,Matchweek 36,Home,W,Manchester Utd,Arsenal,0,"LW,CM",27.0,0,0,0,1.0,0.0,0.0,0.0,13.0,20.0,65.0,233.0,47.0,6.0,7.0,85.7,6.0,9.0,66.7,1.0,3.0,33.3,0.0,0.3,1.0,0.0,1.0,1.0,1.0,...,0.0,25.0,0.0,1.0,8.0,18.0,4.0,25.0,2.0,5.0,40.0,2.0,0.0,25.0,145.0,106.0,7.0,1.0,2.0,1.0,0.0,24.0,21.0,87.5,6.0,0,0,0,0.0,1.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,,2,1
24367,Anthony Martial,2018-05-04,Fri,Premier League,Matchweek 37,Away,L,Manchester Utd,Brighton,1,LW,90.0,0,0,0,3.0,0.0,0.1,0.1,48.0,55.0,87.3,643.0,192.0,34.0,37.0,91.9,11.0,12.0,91.7,2.0,4.0,50.0,0.0,0.1,2.0,3.0,0.0,0.0,1.0,...,0.0,69.0,1.0,7.0,24.0,42.0,8.0,67.0,1.0,5.0,20.0,2.0,0.0,56.0,231.0,141.0,11.0,3.0,1.0,3.0,3.0,73.0,59.0,80.8,6.0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,1.0,1.0,50.0,0,1
