<a href="https://colab.research.google.com/github/sunnypaajee/FIFA_Rating_Model/blob/main/FC24_Ratings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
import numpy as np
import pandas as pd

# Web Scraping
from urllib.request import Request, urlopen
from urllib.error import HTTPError
from bs4 import BeautifulSoup

# Time
from datetime import date
from time import sleep

In [30]:
url = "https://www.futbin.com/players?page=1&version=gold_rare&pos_type=all"

headers = {'User-Agent': 'Mozilla/5.0'}
req = Request(url, headers=headers)
response = urlopen(req)
html = response.read()


soup = BeautifulSoup(html, 'html.parser')

In [31]:
example = soup.find_all("tr")[2] # Mbappé

In [36]:
attributes = [td.get_text() for td in example.find_all("td")]


In [37]:
attributes

['\n\n\n\nKylian Mbappé\n\nnormal\n\n',
 '\n\n91\n\n',
 '\nST\nCF, LW\n',
 '\n205K\n5.67%\n',
 '\n185K\n15.62%\n',
 '',
 '4',
 '5',
 'H / L',
 '\n97\n',
 '\n90\n',
 '\n80\n',
 '\n92\n',
 '\n36\n',
 '\n78\n',
 '3,451',
 '2250',
 '\n182cm | 6\'0"\nMostly Explosive',
 '']

In [34]:
def flatten_list(list_):
    output = []
    for sublist in list_:
        output.extend(sublist)

    return output


In [38]:
attributes = [i.strip() for i in attributes]


In [39]:
attributes

['Kylian Mbappé\n\nnormal',
 '91',
 'ST\nCF, LW',
 '205K\n5.67%',
 '185K\n15.62%',
 '',
 '4',
 '5',
 'H / L',
 '97',
 '90',
 '80',
 '92',
 '36',
 '78',
 '3,451',
 '2250',
 '182cm | 6\'0"\nMostly Explosive',
 '']

In [40]:
attributes = flatten_list([i.split("\n") for i in attributes])


In [41]:
attributes

['Kylian Mbappé',
 '',
 'normal',
 '91',
 'ST',
 'CF, LW',
 '205K',
 '5.67%',
 '185K',
 '15.62%',
 '',
 '4',
 '5',
 'H / L',
 '97',
 '90',
 '80',
 '92',
 '36',
 '78',
 '3,451',
 '2250',
 '182cm | 6\'0"',
 'Mostly Explosive',
 '']

In [42]:
attributes = flatten_list([i.split("\\") for i in attributes])

In [43]:
attributes

['Kylian Mbappé',
 '',
 'normal',
 '91',
 'ST',
 'CF, LW',
 '205K',
 '5.67%',
 '185K',
 '15.62%',
 '',
 '4',
 '5',
 'H / L',
 '97',
 '90',
 '80',
 '92',
 '36',
 '78',
 '3,451',
 '2250',
 '182cm | 6\'0"',
 'Mostly Explosive',
 '']

In [44]:
attributes = [item.strip() for item in attributes if item.strip() != ""]


In [45]:
attributes

['Kylian Mbappé',
 'normal',
 '91',
 'ST',
 'CF, LW',
 '205K',
 '5.67%',
 '185K',
 '15.62%',
 '4',
 '5',
 'H / L',
 '97',
 '90',
 '80',
 '92',
 '36',
 '78',
 '3,451',
 '2250',
 '182cm | 6\'0"',
 'Mostly Explosive']

In [55]:
player_information = example.find_all("div")[0].find_all("a")


In [57]:
player_information = example.find_all("span")[0].find_all("a")
player_information = [item['data-original-title'] for item in player_information]
player_information

[]

In [58]:
attributes_names = ["Name", "Club", "Nation", "League", "Rating", "Main_Position", "Alternate_Positions",
                    "Card_Version", "Run_Style", "Price", "Price_Variation", "Skills_Star", "Weak_Foot_Star",
                    "Attack_Workrate", "Defense_Workrate",
                    "Pace / Diving", "Shooting / Handling",
                    "Passing / Kicking", "Dribbling / Reflexes",
                    "Defense / Speed", "Physical / Positioning",
                    "Height", "BodyType", "Popularity", "Base_Stats", "Ingame_Stats"]

In [59]:
for e, info in enumerate(player_information):
    attributes.insert(e + 1, info)

In [60]:
player_dict = dict(zip(attributes_names, attributes))
player_dict

{'Name': 'Kylian Mbappé',
 'Club': 'normal',
 'Nation': '91',
 'League': 'ST',
 'Rating': 'CF, LW',
 'Main_Position': '205K',
 'Alternate_Positions': '5.67%',
 'Card_Version': '185K',
 'Run_Style': '15.62%',
 'Price': '4',
 'Price_Variation': '5',
 'Skills_Star': 'H / L',
 'Weak_Foot_Star': '97',
 'Attack_Workrate': '90',
 'Defense_Workrate': '80',
 'Pace / Diving': '92',
 'Shooting / Handling': '36',
 'Passing / Kicking': '78',
 'Dribbling / Reflexes': '3,451',
 'Defense / Speed': '2250',
 'Physical / Positioning': '182cm | 6\'0"',
 'Height': 'Mostly Explosive'}

In [66]:
def get_player_attributes(row):
    attributes = [td.get_text() for td in row.find_all("td")]
    attributes = [i.strip() for i in attributes]

    attributes = flatten_list([i.split("\n") for i in attributes])
    attributes = flatten_list([i.split("\\") for i in attributes])

    attributes = [item.strip() for item in attributes if item.strip() != ""]

    # Club, League and Nation
    player_information = row.find_all("span")[0].find_all("a")
    player_information = [item['data-original-title'] for item in player_information]

    # Adding to the attributes list
    for e, info in enumerate(player_information):
        attributes.insert(e + 1, info)

    # If a player doesn't have an alternate position
    if len(attributes) == 25:
        attributes.insert(6, None)

    player_dict = dict(zip(attributes_names, attributes))
    return player_dict

       ## Height
    if not find_missing_att(attributes, "|"):
        attributes.insert(-4, "0")
    ## Price Variation:
    if not find_missing_att(attributes, "%"):
        attributes.insert(9, "0")

    # Player doesn't have card version labeled
#     runstyle = ["Controlled", "Explosive", "Lengthy"]
#     if len(attributes) < 25:
#         for style in runstyle:
#             if style in attributes:
#                 index = attributes.index(style)
#                 attributes.insert(index - 1, "Normal")

    # If a player doesn't have an alternate position
    if len(attributes) == 25:
        attributes.insert(6, "0")

    # If the player attributes are complete
    if len(attributes) > 25:
        player_dict = dict(zip(attributes_names, attributes))
        return player_dict

    return None

In [67]:
gold_players_page = soup.find_all("tr")


In [68]:
top30_gold_players = list()

for player_row in gold_players_page:
    try:
        player_attributes = get_player_attributes(player_row)
        top30_gold_players.append(player_attributes)
    except IndexError:
        continue

In [69]:
top30_gold_players = pd.DataFrame(top30_gold_players)
top30_gold_players.head()

Unnamed: 0,Name,Club,Nation,League,Rating,Main_Position,Alternate_Positions,Card_Version,Run_Style,Price,...,Attack_Workrate,Defense_Workrate,Pace / Diving,Shooting / Handling,Passing / Kicking,Dribbling / Reflexes,Defense / Speed,Physical / Positioning,Height,BodyType
0,,,,,,,,,,,...,,,,,,,,,,
1,Erling Haaland,normal,91.0,ST,CF,25.5K,1.92%,23.25K,2.11%,3.0,...,93.0,66.0,80.0,45.0,88.0,2723.0,2191,"195cm | 6'5""",Mostly Lengthy,
2,Kylian Mbappé,normal,91.0,ST,"CF, LW",205K,5.67%,185K,15.62%,4.0,...,90.0,80.0,92.0,36.0,78.0,3451.0,2250,"182cm | 6'0""",Mostly Explosive,
3,Alexia Putellas Segura,normal,91.0,CM,LW,29.75K,1.65%,29.5K,5,5.0,...,91.0,92.0,72.0,78.0,851.0,2466.0,"173cm | 5'8""",Avg & Normal(67kg),Mostly Explosive,
4,Kevin De Bruyne,normal,91.0,CM,CAM,25K,1.96%,22K,8.33%,5.0,...,88.0,94.0,87.0,65.0,78.0,2175.0,2349,"181cm | 5'11""",Controlled,


In [65]:
def find_missing_att(list_att, sep):
    '''Iterates for the attributes list and  returns False if doesn't contain.
    If the function find the separator in an item it will break the loop and
    the value will keep his True value, meaning that it found and the player has
    the attribute.'''
    for att in list_att:
        value = sep in att
        if value:
            break

    return value

In [70]:
for player_row in gold_players_page:
    try:
        player_attributes = get_player_attributes(player_row)
        top30_gold_players.append(player_attributes)
    except IndexError:
        continue

In [78]:
def authentication_page(page):
    headers = {'User-Agent': 'Mozilla/5.0'}
    req = Request(page, headers=headers)
    response = urlopen(req)
    html = response.read()
    soup_a = BeautifulSoup(html, 'html.parser')
    return soup_a

In [79]:
def iterate_for_page(index):

    # Authentication
    soup_p = authentication_page("https://www.futbin.com/players?page=")

    # Getting the players from the page
    page_players = soup_p.find_all("tr")
    page_dataframe = list()

    # Iteration
    for player_row in page_players:
        try:
            player_attributes = get_player_attributes(player_row)
            if player_attributes:
                page_dataframe.append(player_attributes)
        except:
            continue

    page_dataframe = pd.DataFrame(page_dataframe)
    return page_dataframe

In [82]:
def iterate_for_page(index, gender):
    page_url = "https://www.futbin.com/players?page=" + str(index) + "&gender=" + gender

    # Authentication
    soup_p = authentication_page(page_url)

    # Getting the players from the page
    page_players = soup_p.find_all("tr")
    page_dataframe = list()

    # Iteration
    for player_row in page_players:
        try:
            player_attributes = get_player_attributes(player_row)
            if player_attributes:
                page_dataframe.append(player_attributes)
        except:
            continue

    page_dataframe = pd.DataFrame(page_dataframe)
    return page_dataframe
sleep(20)
players_till_10 = pd.DataFrame([])
for i in range(10):
    page_df = iterate_for_page(i + 1, "men")
    players_till_10 = pd.concat([players_till_10, page_df])

In [83]:
(players_till_10.head())


Unnamed: 0,Name,Club,Nation,League,Rating,Main_Position,Alternate_Positions,Card_Version,Run_Style,Price,...,Attack_Workrate,Defense_Workrate,Pace / Diving,Shooting / Handling,Passing / Kicking,Dribbling / Reflexes,Defense / Speed,Physical / Positioning,Height,BodyType
0,Ferenc Puskás,Greats of Game Icon,98,CF,ST,0,0,5,4,H / M,...,98,53,84,2352,2558,"172cm | 5'8""",Avg & Stocky(72kg),Mostly Explosive,,
1,Kenny Dalglish,Greats of Game Icon,98,ST,CF,3.2M,1.54%,3.9M,22.33%,5,...,98,87,96,52,83,250,2513,"173cm | 5'8""",Avg & Normal(73kg),Explosive
2,Rivaldo,Greats of Game Icon,98,LW,"LM, CAM, CF",5.7M,0,5,5,H / H,...,99,51,85,189,2546,"186cm | 6'1""",Tall & Lean(75kg),Controlled,,
3,Ronaldo,Greats of Game Icon,98,ST,CF,0,0,5,5,H / M,...,97,49,84,378,2482,"183cm | 6'0""",R9(78kg),Controlled,,
4,Ronaldinho,Greats of Game Icon,98,LW,"LM, CAM",12.53M,7.19%,10.15M,1.46%,4,...,95,99,99,43,91,516,2509,"180cm | 5'11""",Ronaldinho(78kg),Controlled Explosive


In [88]:
sleep(20)
men_soup_page = authentication_page("https://www.futbin.com/players?page=1&gender=men")
women_soup_page = authentication_page("https://www.futbin.com/players?page=1&gender=women")

We will only consider men players.


In [86]:
men_players = pd.DataFrame([])

In [90]:
sleep(20)
for i in range(1,  100):
    try:
        page_df = iterate_for_page(i, "men")
        men_players = pd.concat([men_players, page_df])

        if (i) % 6 == 0:
            sleep(15)
        if (i) % 50 == 0:
            print(f"Page {i} Successful")

    except HTTPError as err:
        print("--Erro--", err, sep = "\n")
        sleep(30)

print("\n# -- All Pages Done -- #")

Page 50 Successful

# -- All Pages Done -- #


In [91]:
men_players.head()


Unnamed: 0,Name,Club,Nation,League,Rating,Main_Position,Alternate_Positions,Card_Version,Run_Style,Price,...,Attack_Workrate,Defense_Workrate,Pace / Diving,Shooting / Handling,Passing / Kicking,Dribbling / Reflexes,Defense / Speed,Physical / Positioning,Height,BodyType
0,Ferenc Puskás,Greats of Game Icon,98,CF,ST,0,0,5,4,H / M,...,98,53,84,2352,2558,"172cm | 5'8""",Avg & Stocky(72kg),Mostly Explosive,,
1,Kenny Dalglish,Greats of Game Icon,98,ST,CF,3.2M,1.54%,3.9M,22.33%,5,...,98,87,96,52,83,250,2513,"173cm | 5'8""",Avg & Normal(73kg),Explosive
2,Rivaldo,Greats of Game Icon,98,LW,"LM, CAM, CF",5.7M,0,5,5,H / H,...,99,51,85,189,2546,"186cm | 6'1""",Tall & Lean(75kg),Controlled,,
3,Ronaldo,Greats of Game Icon,98,ST,CF,0,0,5,5,H / M,...,97,49,84,378,2482,"183cm | 6'0""",R9(78kg),Controlled,,
4,Ronaldinho,Greats of Game Icon,98,LW,"LM, CAM",12.53M,7.19%,10.15M,1.46%,4,...,95,99,99,43,91,516,2509,"180cm | 5'11""",Ronaldinho(78kg),Controlled Explosive
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25,Kevin Volland,normal,79,ST,"CAM, CF",500,11.11%,1.8K,4,3,...,78,78,46,84,-2,2140,"174cm | 5'9""",Avg & Stocky(85kg),Controlled,
26,Nico Williams,normal,79,RM,"LM, RW",700,10K,566.67%,5,4,...,72,83,33,60,352,1956,"181cm | 5'11""",Avg & Lean(67kg),Mostly Explosive,
27,Denis Zakaria,normal,79,CDM,CM,700,700,3,3,H / H,...,76,78,84,60,2068,"191cm | 6'3""",Tall & Lean(81kg),Lengthy,,
28,Davide Zappacosta,normal,79,LWB,"LB, RM, LM",900,80%,700,3,3,...,73,74,75,72,49,2101,"182cm | 6'0""",Avg & Lean(75kg),Controlled Explosive,


In [93]:
men_players['Rating'] = pd.to_numeric(men_players['Rating'])

ValueError: Unable to parse string "ST" at position 0