In [30]:
''' Understat Download
    
    Downloads data from https://understat.com/ and calculates basic pandas tables.
    
    Sources:    Understat (no API)
                'in/LTable_FPL.csv'
                'in/Fixtures.csv'
                'in/Teams.csv'
                'in/Players.csv'
                
    Writes:     'in/Table_Understat.csv'
                'in/Name_Dictionary.csv'
                
'''


#Downloadting Data from understat.com(Understat)
print('Start inputUnderstat:')
from time import time
start_module = time()
start = start_module

from bs4 import BeautifulSoup
import requests
import json
import pandas as pd
import numpy as np
import codecs
from pathlib import Path
import constti

# 1.Check if there are equal names in the column
def  check_equal(Table, col_name): #Check doubles in column col_name of the table Table
    '''
    Check if there are equal names in the column
    '''
    repetitions = pd.DataFrame()
    for i in range(len(Table)):
        if Table[col_name][i] in set(Table[col_name][:i]):
            if (len(repetitions)>0):
                if not (Table[col_name][i] in set(repetitions[col_name])):
                    #repetitions = repetitions.append(Table[Table[col_name]==Table[col_name][i]])
                    repetitions = pd.concat([repetitions,Table[Table[col_name]==Table[col_name][i]]], 
                                            ignore_index=True)
            else:
                #repetitions = repetitions.append(Table[Table[col_name]==Table[col_name][i]])
                repetitions = pd.concat([repetitions,Table[Table[col_name]==Table[col_name][i]]], 
                                            ignore_index=True)
            #print('Check Equal', Table.iloc[i])
    print(repetitions)

# 2. Check if name_un is the same player as (name_fpl, web_name_fpl). Five comparison types: the lower the better
def name2standart(name):
    '''
    Changes name to the same standarts:
        All characters are low.
        Correct apostrof
        Odegaard = Ødegaard
    '''
    return name.lower().replace('ø', 'o').replace("&#039;", "'")

def same_player(name_un, name_fpl, web_name_fpl, type_comp, repeat=1):
    '''
    Check if name_un is the same player as (name_fpl, web_name_fpl). Five comparison types: the lower the better
    '''
    name_un = name2standart(name_un)#name_un.lower()
    name_fpl = name2standart(name_fpl)#name_fpl.lower()
    web_name_fpl = name2standart(web_name_fpl)#web_name_fpl.lower()
    same = 0
    
    # Unerstat name = Full FPL name. Best equality
    if (type_comp == 0)&(name_un == name_fpl):
        same = 1
    
    # Understat name = Short FPL name.
    if (type_comp == 1)&(web_name_fpl == name_un):
        same = 1
    
    # Understat name is included in Full FPL name.
    if (type_comp == 2)&(name_un in name_fpl):
        same = 1
    
    # All words of Understat name are in Full FPL name
    if (type_comp == 3)&(set(name_un.split()) <= set(name_fpl.split())):
        same = 1
    
    # FPL Short name is included in Understat name
    if (type_comp == 4)&(web_name_fpl in name_un):
        same = 1
    
    # Some ' ' space has been missed and after del of one space all words of Understat name are in Full FPL
    if type_comp == 5:
        fpl_name = name_fpl.lower().split()
        for m in range(1,len(fpl_name)):
            if set(name_un.lower().split()) <=\
            set([fpl_name[k-1]+fpl_name[k] if k==m else ('' if k==m-1 else fpl_name[k])\
                 for k in range(0,len(fpl_name))]):
                    same = 1
                    
    # If '-' in Understat is ' ' in FPL    DO IT ALL AGAIN!!   but one time
    if (same == 0)&(repeat==1):
        return same_player(name_un.replace('-', ' '), name_fpl, web_name_fpl, type_comp, 0)
    return same

# 3. Downloads match data, adds match number game_number to Table and updates Dictionary of names
def add_match_to_dict(game_number, Dictionary):
    '''
    Downloads match data, adds match number game_number to Table and updates Dictionary of names
    '''
    url = 'https://understat.com/match/'+ str(game_number)
    #print(url)
    p = constti.long_request(url)
    pdecoded = codecs.decode(p.text,'unicode_escape')
    page = BeautifulSoup(p.text, 'html.parser')
    tempList = []
    for tags in page('script'):
        if '= JSON.parse' in str(tags):
            for els in str(tags).split():
                if 'JSON' in els:
                    els = els[12:-3]
                    els = codecs.decode(els,'unicode_escape')
                    tempList.append(json.loads(els))    
    away_players = pd.DataFrame(tempList[2]['a']).transpose()
    home_players = pd.DataFrame(tempList[2]['h']).transpose()
    #match_players = away_players.append(home_players)
    match_players = pd.concat([away_players, home_players], ignore_index=True)
    
    
    if tempList[1]['team_h'] == 'Tottenham': tempList[1]['team_h'] = 'Spurs' 
    if tempList[1]['team_a'] == 'Tottenham': tempList[1]['team_a'] = 'Spurs'
    
    h = list([teams_dict[tempList[1]['team_h']] for i in range(len(match_players))])
    a = list([teams_dict[tempList[1]['team_a']] for i in range(len(match_players))])
    ha = list(match_players['h_a'])
    di = dict(zip(Teams['Teams'], Teams['id']))
    
    match_players['team_h_name'] = h
    match_players['team_a_name'] = a   
    match_players['team_name'] = [a[i] if ha[i] == 'a' else h[i] for i in range(len(a))]
    match_players['opponent_team_name'] = [a[i] if ha[i] == 'h' else h[i] for i in range(len(a))]
    match_players['team_h'] = [di[h[i]] for i in range(len(a))]
    match_players['team_a'] = [di[a[i]] for i in range(len(a))]
    match_players['team'] = [di[match_players.at[i,'team_name']] for i in match_players.index]
    match_players['opponent_team']=[di[match_players.at[i,'opponent_team_name']] for i in match_players.index]
    
    A = Fixtures['team_a']==match_players['team_a'].mean()
    B = Fixtures['team_h']==match_players['team_h'].mean()
    f = Fixtures[A&B]['id'].sum()
    
    match_players['fixture'] = [f for _ in range(len(a))]
    
    r = int(Fixtures[Fixtures['id']==match_players['fixture'].mean()]['event'].sum())
    
    match_players['round'] = [ r for _ in range(len(a))]
    match_players['player'] = constti.strip_accents_pdlist(pd.DataFrame(match_players['player']))
    
    sT = Table_FPL[Table_FPL['fixture']==match_players['fixture'].mean()]
    di = dict(zip(Players['id'], Players['Name']))
    A = [di[sT.at[i,'element']] for i in sT.index]
    
    
    FPL_names = constti.strip_accents_pdlist(pd.DataFrame(A, columns = ['player']))
    
    di = dict(zip(Players['id'], Players['web_name']))
    A = [di[sT.at[i,'element']] for i in sT.index]
    
    FPL_names['web_name'] = constti.strip_accents_pdlist(pd.DataFrame(A, columns = ['player']))
    FPL_names['id'] = [sT.at[i,'element'] for i in sT.index]
    match_players['in_FPL'] = [0 for i in match_players.index]
    
    Dictionary_strong = Dictionary[Dictionary['id_fpl']!='']

    for i in match_players.index:
        #match_players.at[i,'player'] = match_players.at[i,'player']
        if not(match_players.at[i,'player'] in set(Dictionary_strong['name_un'])):
            
            for t in range(6):
                #There are 6 types of name comparisons. The lower, the better is precision.
                for j in FPL_names.index:
                    if same_player(match_players.at[i,'player'], FPL_names.at[j,'player'], FPL_names.at[j,'web_name'], t):
                        match_players.at[i,'in_FPL'] = 1
                        name_un = match_players.at[i, 'player']
                        name_fpl = FPL_names.at[j,'player']
                        id_fpl = FPL_names.at[j,'id']
                        web_name_fpl = FPL_names.at[j,'web_name']
                        match_players.at[i, 'player'] = FPL_names.at[j,'player']
                        break
                if match_players.at[i,'in_FPL'] == 1:
                    break
            if match_players.at[i,'in_FPL'] == 0:
                name_un = match_players.at[i, 'player']
                name_fpl = ''
                id_fpl = ''
                web_name_fpl = ''
                
                
                
            if name_un in set(Dictionary['name_un']):
                print(f'SMTH went wrong. Probably the player "{name_un}" was not found in FPL')
                #print(name_un)#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                if name_fpl != '':
                    for u in Dictionary.index:
                        if name_un == Dictionary.at[u,'name_un']:
                            Dictionary.at[u,'name_fpl'] = name_fpl
                            Dictionary.at[u,'id_fpl'] = id_fpl
                            Dictionary.at[u,'web_name_fpl'] = web_name_fpl
            else:

#                 Dictionary = Dictionary.append(pd.DataFrame(\
#                     [[name_un, name_fpl, id_fpl, web_name_fpl]],\
#                     columns=["name_un", 'name_fpl', 'id_fpl', 'web_name_fpl']), ignore_index=True)
                Dictionary = pd.concat([Dictionary, pd.DataFrame(\
                    [[name_un, name_fpl, id_fpl, web_name_fpl]],\
                    columns=["name_un", 'name_fpl', 'id_fpl', 'web_name_fpl'])],
                                       ignore_index=True)
        else:
            match_players.at[i, 'player'] = dict(zip(Dictionary['name_un'], Dictionary['name_fpl']))\
                [match_players.at[i, 'player']]
            match_players.at[i,'in_FPL'] = 1

    match_players = match_players.sort_index()
    Dictionary = Dictionary.sort_values('name_un')
    Dictionary.index = np.arange(1, len(Dictionary) + 1)
    return match_players, Dictionary

# 4. Adding exceptions to Name_Dictionary
def Exc_dict(Name_Dictionary, name_understat, name_fpl):
    '''
    Adding exceptions to Name_Dictionary
    name_understat - "player" column in Table_Understat
    name_fpl - "first_name" + " " + "second_name" in bootstrap table
    '''
#     Name_Dictionary = Name_Dictionary.append(pd.DataFrame([[name_understat,name_fpl,'','']], 
#     columns=["name_un", 'name_fpl', 'id_fpl', 'web_name_fpl']), ignore_index=True)
    Name_Dictionary = pd.concat([Name_Dictionary, pd.DataFrame([[name_understat,name_fpl,'','']], 
    columns=["name_un", 'name_fpl', 'id_fpl', 'web_name_fpl'])], ignore_index=True)
#     display(Name_Dictionary)
#     print("asdasd" + Name_Dictionary.at[len(Name_Dictionary)-1, 'id_fpl'])    
    Name_Dictionary.at[len(Name_Dictionary)-1, 'id_fpl'] = Players[Players['Name']==name_fpl]['id'].mean()
#     display(Players[Players['Name']==name_fpl]['web_name'])
    Name_Dictionary.at[len(Name_Dictionary)-1, 'web_name_fpl'] = Players[Players['Name']==name_fpl]['web_name'].iat[0]
    #print(Name_Dictionary)
    #print(name_understat)
    return Name_Dictionary



# class StopExecution(Exception):
#     def _render_traceback_(self):
#         pass



#Read data from fantasy.premierleague.com(FPL) to compare with
'''
    Large_Table is needed only for inputUndersat. To get FPL names when Understat data is already calculated
    but FPL data is not. So players played are not excluded from the table but have zero data.
'''
try:
    Table_FPL = pd.read_csv('in/LTable_FPL.csv') #Main table of FPL
    1/len(Table_FPL)
    table_len = len(Table_FPL)
except:
    print("Table_FPL is empty or can't be read")
    table_len = 0
    pd.DataFrame().to_csv(Path('in/Table_Understat.csv'), index=False)
    pd.DataFrame().to_csv(Path('in/Name_Dictionary.csv'), index=False)
#     raise StopExecution
    
if table_len > 0:
    Fixtures = pd.read_csv('in/Fixtures.csv') #All fixtures with postponed
    Teams = pd.read_csv('in/Teams.csv') #Team Tables Template
    Players = pd.read_csv('in/Players.csv') #Player Table Template 

    url = 'https://understat.com/match/11919' #match data by id
    url1 = 'https://understat.com/league/EPL' #url to get list of matches and their id

    #Getting matches id and 
    p = constti.long_request(url1)
    pdecoded = codecs.decode(p.text,'unicode_escape')
    page = BeautifulSoup(p.text, 'html.parser')
    a = []
    for tags in page('script'):
        if '= JSON.parse' in str(tags):
            for els in str(tags).split():
                if 'JSON' in els:
                    els = els[12:-3]
                    els = codecs.decode(els,'unicode_escape')
                    a.append(json.loads(els))
    TT = pd.DataFrame(a[1]).transpose()
    UnderstatTeams = dict(zip(TT['id'], TT['title']))
    Schedule = pd.DataFrame(a[0])

    #Словарь для перевода understat команд к FPL именам Name_Dictionary
    teams_dtable = pd.DataFrame()
    teams_dtable['understat'] = ['Spurs' if i == 'Tottenham' else i for i in TT.sort_values(by=['title'])['title']]
    teams_dtable.index = np.arange(0, len(teams_dtable))
    teams_dtable['fpl'] = list(Teams.sort_values(by=['Teams'])['Teams'])
    teams_dict = dict(zip(teams_dtable['understat'], teams_dtable['fpl']))

    #Downloads all match data
    Table_Understat = pd.DataFrame()
    Name_Dictionary = pd.DataFrame(columns=["name_un", 'name_fpl', 'id_fpl', 'web_name_fpl'])

    #Adding exceptions to Dictionary
    #Name_Dictionary = Exc_dict(Name_Dictionary, 'Franck Zambo','Andre-Frank Zambo Anguissa')
    Name_Dictionary = Exc_dict(Name_Dictionary, 'Bobby Reid','Bobby De Cordova-Reid')
    #Name_Dictionary = Exc_dict(Name_Dictionary, 'Emerson','Emerson Aparecido Leite de Souza Junior')
    #Name_Dictionary = Exc_dict(Name_Dictionary, 'Nicolas N&#039;Koulou','Nicolas Nkoulou')
    Name_Dictionary = Exc_dict(Name_Dictionary, 'Rayan Ait Nouri','Rayan Ait-Nouri')
    Name_Dictionary = Exc_dict(Name_Dictionary, 'Cheick Oumar Doucoure','Cheick Doucoure')
    Name_Dictionary = Exc_dict(Name_Dictionary, 'Armel Bella Kotchap','Armel Bella-Kotchap')
    #display(Name_Dictionary)

    if not Table_FPL.empty:
        for i in range(len(Schedule)):
            if Schedule.at[i,'isResult']:
                #print(Schedule.at[i,'id'])
                MP, Name_Dictionary = add_match_to_dict(Schedule.at[i,'id'], Name_Dictionary)
                #Table_Understat = Table_Understat.append(MP, ignore_index=True)
                Table_Understat = pd.concat([Table_Understat, MP], ignore_index=True)

    print(f'\t All Data Downloaded.\t It takes {time() - start} sec')
    start = time()

    # Add fpl_id and name_fpl for players not in FPL
    j=0
    for i in Name_Dictionary.index:
        j+=1
        if Name_Dictionary.at[i,'name_fpl']=='':
            Name_Dictionary.at[i,'name_fpl'] = Name_Dictionary.at[i,'name_un']
            Name_Dictionary.at[i,'id_fpl'] = 1000000 + j

    name2id = dict(zip(Name_Dictionary['name_fpl'], Name_Dictionary['id_fpl']))
    Table_Understat['element'] = [name2id[Table_Understat.at[i, 'player']] for i in Table_Understat.index]
    Table_Understat = constti.change_column_name(Table_Understat, 'xG', 'threat')
    Table_Understat['threat']  = [100*float(Table_Understat['threat'][i]) for i in range(len(Table_Understat))]
    Table_Understat = constti.change_column_name(Table_Understat, 'xA', 'creativity')
    Table_Understat['creativity']  = [100*float(Table_Understat['creativity'][i]) for i in range(len(Table_Understat))]
    Table_Understat = constti.change_column_name(Table_Understat, 'player_id', 'Understat_id')
    Table_Understat = constti.change_column_name(Table_Understat, 'time', 'minutes')

    Table_Understat.to_csv(Path('in/Table_Understat.csv'), index=False)
    Name_Dictionary.to_csv(Path('in/Name_Dictionary.csv'), index=False)

    print(f'\t All Columns Added.\t It takes {time() - start} sec')
    print(f'inputUnderstat is over.\t It takes {time() - start_module} sec\n')

    if __name__ == '__main__':
        display(Table_Understat)

Start inputUnderstat:
	 All Data Downloaded.	 It takes 14.384668588638306 sec
	 All Columns Added.	 It takes 0.04096078872680664 sec
inputUnderstat is over.	 It takes 14.426697969436646 sec



Unnamed: 0,id,goals,own_goals,shots,threat,minutes,Understat_id,team_id,position,player,...,team_name,opponent_team_name,team_h,team_a,team,opponent_team,fixture,round,in_FPL,element
0,539137,0,0,0,0.000000,90,5603,83,GK,Aaron Ramsdale,...,Arsenal,Crystal Palace,7,1,1,7,1,1,1,15.0
1,539138,0,0,0,0.000000,90,7298,83,DR,Benjamin White,...,Arsenal,Crystal Palace,7,1,1,7,1,1,1,10.0
2,539139,0,0,0,0.000000,90,5613,83,DC,Gabriel dos Santos Magalhaes,...,Arsenal,Crystal Palace,7,1,1,7,1,1,1,16.0
3,539140,0,0,0,0.000000,90,6888,83,DC,William Saliba,...,Arsenal,Crystal Palace,7,1,1,7,1,1,1,26.0
4,539141,0,0,1,4.480058,84,2958,83,DL,Oleksandr Zinchenko,...,Arsenal,Crystal Palace,7,1,1,7,1,1,1,313.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
852,543271,0,0,3,21.881004,90,101,86,FWL,Allan Saint-Maximin,...,Newcastle,Man City,15,13,15,13,28,3,1,368.0
853,543272,0,0,0,0.000000,16,4456,86,Sub,Chris Wood,...,Newcastle,Man City,15,13,15,13,28,3,1,354.0
854,543275,0,0,0,0.000000,16,7078,86,Sub,Sean Longstaff,...,Newcastle,Man City,15,13,15,13,28,3,1,370.0
855,543273,0,0,0,0.000000,1,1545,86,Sub,Emil Krafth,...,Newcastle,Man City,15,13,15,13,28,3,1,364.0


In [26]:
#repetitions = repetitions.append(Table[Table[col_name]==Table[col_name][i]])
pd.concat([Table_FPL,Table_FPL], ignore_index=True)#.drop('index', axis=1) #reset_index()
Table_FPL
Name_Dictionary[Name_Dictionary['name_un']=='Bobby Reid']
Name_Dictionary[Name_Dictionary['web_name_fpl']=='']
Players[Players['Team']=='Wolves']


Unnamed: 0,id,Name,web_name,Team number,Team,Position,Team games,Played
563,474,Willy Boly,Boly,20,Wolves,Defender,3,0
564,476,Raul Jimenez,Jimenez,20,Wolves,Forward,3,1
565,477,Jonathan Castro Otto,Jonny,20,Wolves,Defender,3,3
566,478,Jose Malheiro de Sa,Sa,20,Wolves,Goalkeeper,3,3
567,479,Leander Dendoncker,Dendoncker,20,Wolves,Midfielder,3,3
568,480,Ruben da Silva Neves,Neves,20,Wolves,Midfielder,3,3
569,481,Hwang Hee-chan,Hwang,20,Wolves,Midfielder,3,3
570,482,Nelson Cabral Semedo,Semedo,20,Wolves,Defender,3,2
571,483,Daniel Castelo Podence,Podence,20,Wolves,Midfielder,3,3
572,484,Max Kilman,Kilman,20,Wolves,Defender,3,3


In [7]:
Table_FPL.append(Table_FPL)

  Table_FPL.append(Table_FPL)


Unnamed: 0,index,assists,bonus,bps,clean_sheets,creativity,element,fixture,goals_conceded,goals_scored,...,team_a_score,team_h_score,threat,total_points,transfers_balance,transfers_in,transfers_out,value,was_home,yellow_cards
0,0,0,0,0,0,0.0,1,1,0,0,...,2.0,0.0,0.0,0,0,0,0,45,False,0
1,1,0,0,0,0,0.0,1,11,0,0,...,2.0,4.0,0.0,0,-5169,1361,6530,44,True,0
2,2,0,0,0,0,0.0,1,21,0,0,...,3.0,0.0,0.0,0,-4337,879,5216,44,False,0
3,3,0,0,12,1,15.0,3,1,0,0,...,2.0,0.0,2.0,2,0,0,0,50,False,1
4,4,1,2,35,0,25.5,3,11,2,1,...,2.0,4.0,28.0,12,-629,9001,9630,50,True,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1740,1740,0,0,0,0,0.0,568,20,0,0,...,0.0,0.0,0.0,0,5868,7416,1548,45,True,0
1741,1741,0,0,0,0,0.0,568,29,0,0,...,0.0,1.0,0.0,0,4216,10584,6368,45,False,0
1742,1742,0,0,2,0,1.2,579,20,0,0,...,0.0,0.0,0.0,1,4692,5577,885,60,True,0
1743,1743,0,0,6,0,36.9,579,29,1,0,...,0.0,1.0,41.0,2,3524,6853,3329,60,False,0


In [32]:
a = []
pd.Series(a)

  pd.Series(a)


Series([], dtype: float64)