# Import libaries

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
original_url = "https://www.futwiz.com"
#  There're 789 pages  from 0 to 788. Format: orignal_url + "/en/fc24/players?page=" + page_number

# Define functions

In [3]:
def get_all_players_urls(page_url):
    '''This function takes a page url and returns a list of all the urls of the players on that page.'''
    resp = requests.get(page_url)
    soup = BeautifulSoup(resp.text, 'html.parser')
    hrefs = []
    for each_text in soup.find_all('a',{'class':'latest-player-card'}):
        hrefs.append(each_text['href'])
    return hrefs 

#Testing 
print(get_all_players_urls("https://www.futwiz.com/en/fc24/players?page=0"))

['/en/fc24/player/lauren-james/22026', '/en/fc24/player/jeremie-frimpong/22124', '/en/fc24/player/aitana-bonmati/19564', '/en/fc24/player/erling-haaland/19560', '/en/fc24/player/erling-haaland/22024', '/en/fc24/player/pele/21591', '/en/fc24/player/sakina-karchaoui/22142', '/en/fc24/player/kylian-mbappe/22141', '/en/fc24/player/kylian-mbappe/19559', '/en/fc24/player/caroline-graham-hansen/19548', '/en/fc24/player/alexandra-popp/22089', '/en/fc24/player/virgil-van-dijk/22078', '/en/fc24/player/harry-kane/22088', '/en/fc24/player/johan-cruyff/21673', '/en/fc24/player/lionel-messi/22180', '/en/fc24/player/lionel-messi/19558', '/en/fc24/player/tabitha-chawinga/22143', '/en/fc24/player/florian-wirtz/22091', '/en/fc24/player/vitinha/22144', '/en/fc24/player/ferenc-puskas/21749', '/en/fc24/player/jude-bellingham/19569', '/en/fc24/player/lena-oberdorf/22090', '/en/fc24/player/khadija-shaw/22027', '/en/fc24/player/grace-geyoro/22145', '/en/fc24/player/pele/19313', '/en/fc24/player/declan-rice/22

In [4]:
def get_player_info(player_url):
    resp = requests.get(original_url + player_url)
    soup = BeautifulSoup(resp.text, 'html.parser')
    player_info = {}
    h1_content = soup.find('h1').text
    player_info['version'] = soup.find('div',{'class':'player-version'}).text
    player_info['name'] = h1_content.split(player_info['version'])[0]
    player_info['rating'] = int(soup.find('div',{'class':'card-24-rating'}).text)
    player_info['position'] = soup.find('div',{'class':'card-24-position'}).text
    player_info['PAC'] = int(soup.find('div',{'class':'att1-num card-24-attnum1'}).text)
    player_info['SHO'] = int(soup.find('div',{'class':'att2-num card-24-attnum2'}).text)
    player_info['PAS'] = int(soup.find('div',{'class':'att3-num card-24-attnum3'}).text)
    player_info['DRI'] = int(soup.find('div',{'class':'att4-num card-24-attnum4'}).text)
    player_info['DEF'] = int(soup.find('div',{'class':'att5-num card-24-attnum5'}).text)
    player_info['PHY'] = int(soup.find('div',{'class':'att6-num card-24-attnum6'}).text)
    return player_info

#Testing 
print(get_player_info('/en/fc24/player/lionel-messi/19558'))

{'version': 'FC 24 TOTY ', 'name': 'Lionel Messi ', 'rating': 97, 'position': 'CF', 'PAC': 91, 'SHO': 96, 'PAS': 97, 'DRI': 98, 'DEF': 40, 'PHY': 77}


# Crawling and save the result

In [5]:
player_dict = {
    'name':[],
    'version':[],
    'rating':[],
    'position':[],
    'PAC':[],
    'SHO':[],
    'PAS':[],
    'DRI':[],
    'DEF':[],
    'PHY':[]
}

In [6]:
for page_idx in range(0,50):
    page_url = original_url + "/en/fc24/players?page=" + str(page_idx)
    players_urls = get_all_players_urls(page_url)       
    for player_url in players_urls:
        player_info = get_player_info(player_url)
        print(player_info)
        player_dict['name'].append(player_info['name'])
        player_dict['version'].append(player_info['version'])
        player_dict['rating'].append(player_info['rating'])
        player_dict['position'].append(player_info['position'])
        player_dict['PAC'].append(player_info['PAC'])
        player_dict['SHO'].append(player_info['SHO'])
        player_dict['PAS'].append(player_info['PAS'])
        player_dict['DRI'].append(player_info['DRI'])
        player_dict['DEF'].append(player_info['DEF'])
        player_dict['PHY'].append(player_info['PHY'])

df = pd.DataFrame(player_dict)
df.to_csv('futwiz_players.csv',index=False)

{'version': 'FC 24 TOTS ', 'name': 'Lauren James ', 'rating': 97, 'position': 'ST', 'PAC': 97, 'SHO': 91, 'PAS': 93, 'DRI': 97, 'DEF': 50, 'PHY': 92}
{'version': 'FC 24 TOTS ', 'name': 'Jeremie Frimpong ', 'rating': 97, 'position': 'RWB', 'PAC': 99, 'SHO': 84, 'PAS': 93, 'DRI': 97, 'DEF': 91, 'PHY': 88}
{'version': 'FC 24 TOTY ', 'name': 'Aitana Bonmati ', 'rating': 97, 'position': 'CM', 'PAC': 90, 'SHO': 91, 'PAS': 91, 'DRI': 98, 'DEF': 83, 'PHY': 81}
{'version': 'FC 24 TOTY ', 'name': 'Erling Haaland ', 'rating': 97, 'position': 'ST', 'PAC': 96, 'SHO': 98, 'PAS': 80, 'DRI': 91, 'DEF': 56, 'PHY': 97}
{'version': 'FC 24 TOTS ', 'name': 'Erling Haaland ', 'rating': 97, 'position': 'ST', 'PAC': 97, 'SHO': 99, 'PAS': 80, 'DRI': 92, 'DEF': 57, 'PHY': 97}
{'version': 'FC 24 FUT Birthday Icon ', 'name': 'Pele ', 'rating': 97, 'position': 'CAM', 'PAC': 95, 'SHO': 96, 'PAS': 92, 'DRI': 96, 'DEF': 60, 'PHY': 81}
{'version': 'FC 24 TOTS ', 'name': 'Sakina Karchaoui ', 'rating': 97, 'position': '