In [1]:
import json

from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager



In [2]:
options = webdriver.ChromeOptions()
options.set_capability(
    'goog:loggingPrefs', {"performance":"ALL", "browser":"ALL"}
)

driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()),options=options)

In [3]:
listCountryNames = ['argentina','peru', 'chile','canada','venezuela','mexico','ecuador','jamaica','uruguay','usa','panama','bolivia','colombia','brazil','costa-rica','paraguay']
listOfCountryIds = [4819, 4790,4754,4752,4722,4781,4757,4769,4725,4724,5164,4746,4820,4748,4756,4789]

In [5]:
driver.set_page_load_timeout(10)

# Create Overall Stats Table (CSV)
infoOverall = []
infoPlayers = []
writeFields = False
# Iterate through teams
fieldsOverall = ["idTeam","nameCode", "countryName","primaryColor","secondaryColor","textColor","group","points" ]
fieldsPlayers = ["idTeam","idPlayer", "category","value","playerName","playerPosition","playerImage"]
for i,countryId in enumerate(listOfCountryIds):
    # Get Website and scroll down to load full page
    try:
        driver.get('https://www.sofascore.com/team/football/'+listCountryNames[i]+'/'+str(countryId))
        print(f"Retrieved page for {listCountryNames[i]}")
    except:
        pass

    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    logs_raw = driver.get_log("performance")
    logs = [json.loads(lr['message'])['message'] for lr in logs_raw]

    # Getting API Calls
    isReadyTP = False
    isReadyStats = False
    isReadyGeneral = False
    stats = ""
    topPlayers = ""
    generalTeam = ""
    for a in logs:
        if '/standings/total' in a['params'].get('headers',{}).get(':path',''):
            generalTeam = a
            isReadyGeneral = True
            
        if '/top-players/overall' in a['params'].get('headers',{}).get(':path',''):
            topPlayers = a
            isReadyTP = True
            
        if '/statistics/overall' in a['params'].get('headers',{}).get(':path',''):
            stats = a
            isReadyStats = True

        if isReadyStats and isReadyTP and isReadyGeneral:
            break
        
    
    if generalTeam != '' and topPlayers != '' and stats != '': print("API Calls retrieved with success")

    # Getting information
    stats_info = json.loads(driver.execute_cdp_cmd('Network.getResponseBody',{'requestId':stats['params']['requestId']})['body'])['statistics']
    topPlayers_info = json.loads(driver.execute_cdp_cmd('Network.getResponseBody',{'requestId':topPlayers['params']['requestId']})['body'])['topPlayers']
    generalTeam_info = json.loads(driver.execute_cdp_cmd('Network.getResponseBody',{'requestId':generalTeam['params']['requestId']})['body'])['standings']

    # Find team in group and add general information
    tempTeam = {}
    templayer = {}
    for team in generalTeam_info[0]['rows']:
        if team['team']['id'] == countryId:
            t = team['team']
            tempTeam = {"idTeam": countryId,"nameCode": t['nameCode'], "countryName":t['name'],"primaryColor":t['teamColors']['primary'],"secondaryColor":t['teamColors']['secondary'],"textColor":t['teamColors']['text'],"group":generalTeam_info[0]['name'], "points":team['points']}
            
    if not writeFields:
        for key in stats_info:
            fieldsOverall.append(key)
            tempTeam[key] = stats_info[key]
        writeFields = True
    else:
        for key in stats_info:
            tempTeam[key] = stats_info[key]


    for key in topPlayers_info:
        for player in topPlayers_info[key]:
            p = player['player']
            stats = player['statistics']
            templayer = {"idTeam": countryId, 'idPlayer':p['id'], 'category':key, "value":stats[key],"playerName":p['name'],"playerPosition":p['position'],"playerImage":"https://api.sofascore.app/api/v1/player/"+str(p['id'])+"/image"}
            infoPlayers.append(templayer)
        
    infoOverall.append(tempTeam)
    print(f'{listCountryNames[i]} appended to array')


Retrieved page for argentina
API Calls retrieved with success
argentina appended to array
Retrieved page for peru
API Calls retrieved with success
peru appended to array
Retrieved page for chile
API Calls retrieved with success
chile appended to array
Retrieved page for canada
API Calls retrieved with success
canada appended to array
Retrieved page for venezuela
API Calls retrieved with success
venezuela appended to array
Retrieved page for mexico
API Calls retrieved with success
mexico appended to array
Retrieved page for ecuador
API Calls retrieved with success
ecuador appended to array
Retrieved page for jamaica
API Calls retrieved with success
jamaica appended to array
Retrieved page for uruguay
API Calls retrieved with success
uruguay appended to array
Retrieved page for usa
API Calls retrieved with success
usa appended to array
Retrieved page for panama
API Calls retrieved with success
panama appended to array
Retrieved page for bolivia
API Calls retrieved with success
bolivia ap

In [6]:
import csv
# Write orverall country stats
filename = "country_overall_stats.csv"

# writing to csv file
with open(filename, 'w') as csvfile:
    # creating a csv dict writer object
    writer = csv.DictWriter(csvfile, fieldnames=fieldsOverall)

    # writing headers (field names)
    writer.writeheader()

    # writing data rows
    writer.writerows(infoOverall)

In [7]:
filename = "top_players.csv"

# writing to csv file
with open(filename, 'w') as csvfile:
    # creating a csv dict writer object
    writer = csv.DictWriter(csvfile, fieldnames=fieldsPlayers)

    # writing headers (field names)
    writer.writeheader()

    # writing data rows
    writer.writerows(infoPlayers)