In [1]:
# !pip install lxml
# !pip install selenium
# !pip install cssselect
#!apt-get update
#!apt install chromium-chromedriver
#!cp /usr/lib/chromium-browser/chromedriver /usr/bin

In [2]:
import pandas as pd
import numpy as np

from lxml import html
from lxml.cssselect import CSSSelector

# Create the web driver
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

import sys
#sys.path.insert(0,'d:\d_workspace\axieinfinity\axie-infinity-leaderboard-axie-scraper')
from selenium import webdriver
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome('d:/d_workspace/axieinfinity/axie-infinity-leaderboard-axie-scraper/chromedriver',options=chrome_options)

In [3]:
# Load the site on the web driver
url = "https://axie.zone/leaderboard"
driver.get(url)
driver.implicitly_wait(10)

# List to store links pointing to the player pages
players = []

try:
  # Find the anchor tags in the leaderboard table
  table_body_selector = ".leaderboard_table tbody"
  table_body = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, table_body_selector)))
  
  table_html = table_body[0].get_attribute('innerHTML')
  table_tree = html.fromstring(table_html)

  table_rows = table_tree.cssselect("tr")

  # Extract the href from the anchors found in the table
  for row in table_rows:
    players.append({
        "url": "https://axie.zone/" + row.cssselect("a")[0].get("href"),
        "winrate": int(row.cssselect("td:nth-child(6)")[0].text.replace("%", ""))
    })

  print("Retrieved", len(players), "URLs")
except TimeoutException:
  print("Element not properly loaded")

print(players)

Retrieved 101 URLs
[{'url': 'https://axie.zone//profile?ron_addr=0xc882d12c11e2bf22a9a9b220fe6e9254af716c26', 'winrate': 0}, {'url': 'https://axie.zone//profile?ron_addr=0x0051463893d80bd2de2dd31f6a74edac96441de5', 'winrate': 0}, {'url': 'https://axie.zone//profile?ron_addr=0xf772d45bd17eb5aaa4ea1fe4e4290ff9dce61779', 'winrate': 0}, {'url': 'https://axie.zone//profile?ron_addr=0x21b76375cd457907c238794bc3db3228dc87507c', 'winrate': 0}, {'url': 'https://axie.zone//profile?ron_addr=0x05e5e2a7a15c62892b7d886f9e8c6a5e2d5e776a', 'winrate': 0}, {'url': 'https://axie.zone//profile?ron_addr=0x702009d8ca2f03b815a4732d12411abbcbb14a2a', 'winrate': 0}, {'url': 'https://axie.zone//profile?ron_addr=0x70adec6b529e9ad38c20b097eb229dc09ebffeed', 'winrate': 0}, {'url': 'https://axie.zone//profile?ron_addr=0xccaaaf8c4bdcfd36a9943e9206fbb0f62b8164d8', 'winrate': 0}, {'url': 'https://axie.zone//profile?ron_addr=0xf3b143e9a4ac5dec8d51de70d9417ef793130df2', 'winrate': 0}, {'url': 'https://axie.zone//profile

In [5]:
import re

# Used to extract the Axie parts from the user page
def get_parts(players):
  # List to store the scraped data
  axie_data = []

  # List for failed links
  failed = []

  # Counter that will be used to track the progress of the function
  progress = 0

  # Regex pattern to parse the parts
  p = re.compile('(.*): (.*) (\[.*\])')

  # Iterate through each of the player in the leaderboards
  for player in players:
    progress += 1
    try:
      # Load the player page on the driver
      driver.get(player["url"])
      driver.implicitly_wait(10)

      # Wait till the Axie info container loads
      _ = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "#last_used_team_container > div")))

      # Get the element that wraps the last used team container
      # This can be refactored to include the most used team of the player as well
      axie_container = WebDriverWait(driver, 0).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "#last_used_team_container")))

      # Convert the selenium webelement into a LXML tree
      axie_html = axie_container[0].get_attribute('innerHTML')
      axie_tree = html.fromstring(axie_html)

      # Get the Axie containers
      info_containers = axie_tree.cssselect(".search_result_wrapper")

      for info_container in info_containers:
        axie_parts = {}

        # Get the address of the owner
        axie_parts["User_Addr"] = player["url"].replace("https://axie.zone/profile?ron_addr=", "")

        # Get the Axie class
        axie_parts["Class"] = info_container.cssselect(".search_result")[0].get('class').replace("search_result ", "").capitalize()

        # Scrapes the parts from the Axie container
        parts = info_container.cssselect(".purity_part > div")

        for part in parts:
          m = p.match(part.get('title'))
          axie_parts[m.group(1)] = m.group(2)

        # Add winrate
        axie_parts["Winrate"] = player["winrate"]

        # Add the scraped parts to the main list of data
        axie_data.append(axie_parts)

      # To keep track and now that our script is working, we would print a message
      # containing a summary of the scraping's progress
      print("(", progress, "/", len(players) ,") Retrieved")
    except Exception as e:
      # We might also encounter an error where our collection script fails,
      # in the rare case where this would happen we would need to manually
      # revisit what went wrong and fix it.
      failed.append(player)
      print("(", progress, "/", len(players) ,") Error occured while retrieving", player["url"])
      print("Error received:", e)

  if len(failed) != 0:
    print("Retrying failed links")
    axie_data += get_parts(failed)

  return axie_data
  
# Start the scrapping process
res = get_parts(players)

( 1 / 101 ) Error occured while retrieving https://axie.zone//profile?ron_addr=0xc882d12c11e2bf22a9a9b220fe6e9254af716c26
Error received: Message: 

( 2 / 101 ) Retrieved
( 3 / 101 ) Retrieved
( 4 / 101 ) Retrieved
( 5 / 101 ) Retrieved
( 6 / 101 ) Retrieved
( 7 / 101 ) Error occured while retrieving https://axie.zone//profile?ron_addr=0x70adec6b529e9ad38c20b097eb229dc09ebffeed
Error received: Message: 

( 8 / 101 ) Retrieved
( 9 / 101 ) Error occured while retrieving https://axie.zone//profile?ron_addr=0xf3b143e9a4ac5dec8d51de70d9417ef793130df2
Error received: Message: 

( 10 / 101 ) Retrieved
( 11 / 101 ) Retrieved
( 12 / 101 ) Retrieved
( 13 / 101 ) Retrieved
( 14 / 101 ) Retrieved
( 15 / 101 ) Error occured while retrieving https://axie.zone//profile?ron_addr=0xb5ba1caf86164e07b5eeb09c0686442afc276e63
Error received: Message: 

( 16 / 101 ) Error occured while retrieving https://axie.zone//profile?ron_addr=0x5b705c8bd41cbccd20da4aaa24577d9e9846e7ef
Error received: Message: 

( 17 /

In [None]:
# Convert the python list into a dataframe
df_res = pd.DataFrame(res)
df_res.head()

In [None]:
# Export the dataframe into a csv file
df_res.to_csv("leaderboard.csv", index=False)