In [1]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException

Game Data

In [None]:
all_games_data = []

years = range(2017, 2025)  

for year in years:
    try:
        # Specify the path to the ChromeDriver
        chrome_driver_path = "C:\\Users\\trung kien\\Downloads\\chromedriver-win64\\chromedriver.exe"

        # Set up the Service object
        service = Service(chrome_driver_path)

        # Initialize the WebDriver with the service
        driver = webdriver.Chrome(service=service)

        url = f'https://escharts.com/top-games?year={year}'
        driver.get(url)

        games_data = []

        while True:
            try:
                # Wait for the table rows to be present
                wait = WebDriverWait(driver, 10)

                while True:
                    try:
                        rows = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'tbody.relative tr')))
                        break
                    except StaleElementReferenceException:
                        print("StaleElementReferenceException encountered. Re-locating rows...")

                for row in rows:
                    # Extract individual columns
                    try:
                        columns = row.find_elements(By.TAG_NAME, 'td')
                        if len(columns) >= 0: 
                            no = columns[0].text.strip()
                            name = columns[1].text.strip()
                            game_type = columns[2].text.strip()
                            prize_tool = columns[3].text.strip()
                            peak_viewers = columns[4].text.strip()

                            games_data.append({
                                'No': no,
                                'Name': name,
                                'Type': game_type,
                                'Prize Tool': prize_tool,
                                'Peak Viewers': peak_viewers,
                                'Year': year,
                            })
                    except Exception as e:
                        print(f"Error parsing row: {e}")

                try:
                    # Locate the pagination navigation
                    pagination_div = driver.find_element(By.CSS_SELECTOR, 'div.flex.md\\:justify-between.sm-max\\:flex-col')
                    nav = pagination_div.find_element(By.CSS_SELECTOR, 'nav.pagination-buttons')
                    next_buttons = nav.find_elements(By.CSS_SELECTOR, 'a[rel="next"]')

                    if next_buttons:
                        next_button = next_buttons[0]

                        if next_button.is_displayed() and next_button.is_enabled():
                            driver.execute_script("arguments[0].click();", next_button)
                            time.sleep(10)  # Sleep briefly to give time for the page to load
                        else:
                            break
                    else:
                        break
                except Exception as e:
                    print(f"Error while navigating pages for {year}: {e}")
                    break
            except Exception as e:
                print(f"Failed to load data for {year}: {e}")
                break

        # Append the year's data to the all_games_data list
        all_games_data.extend(games_data)

    except Exception as e:
        print(f"Failed to initialize scraping for {year}: {e}")

    finally:
        driver.quit()

# Once scraping for all years is complete, create a DataFrame from all the collected data
df = pd.DataFrame(all_games_data)

# Ensure 'Peak Viewers' column exists and process it
if 'Peak Viewers' in df.columns:
    df['Peak Viewers'] = df['Peak Viewers'].str.split('\n').str.join(', ')

# Define the filename
filename = 'games_data.xlsx'

# Save the combined DataFrame to a single Excel file
df.to_excel(filename, index=False, engine='openpyxl')

print(f"Data successfully saved to {filename}")


Organizations Data

In [None]:
orders = ['players', 'prizes', 'tournaments_count']
data_org_dict = {}

for order in orders:
    all_organizations = []  
    try:
        chrome_driver_path = 'C:\\Users\\trung kien\\Downloads\\chromedriver-win64\\chromedriver.exe'
        service = Service(chrome_driver_path)
        driver = webdriver.Chrome(service=service)
        url = f'https://escharts.com/organizations?order={order}'
        driver.get(url)

        # Scroll down to make sure all rows are loaded (if lazy loading is in place)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)

        wait = WebDriverWait(driver, 10)
        while True:
            try:
                rows = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'tbody tr')))

                for row in rows:
                    try:
                        columns = row.find_elements(By.TAG_NAME, 'td')
                        if len(columns) >= 7: 
                            divs = columns[0].find_elements(By.CSS_SELECTOR, 'div.relative')
                            a_tag = divs[0].find_element(By.TAG_NAME, 'a')
                            name = a_tag.text.strip()
                            prize = columns[3].text.strip()
                            streamer = columns[4].text.strip()
                            player = columns[5].text.strip()
                            tournament = columns[6].text.strip()

                            all_organizations.append({
                                'Name': name,
                                'Prize': prize,
                                'Streamer': streamer,
                                'Player': player,
                                'Tournament': tournament,
                            })
                    except Exception as e:
                        print(f"Error parsing row: {e}")

                if len(all_organizations) >= 40:
                    break

                try:
                    pagination_div = driver.find_element(By.CSS_SELECTOR, 'div.flex.md\\:justify-between.sm-max\\:flex-col')
                    nav = pagination_div.find_element(By.CSS_SELECTOR, 'nav.pagination-buttons')
                    next_buttons = nav.find_elements(By.CSS_SELECTOR, 'a[rel="next"]')

                    if next_buttons:
                        next_button = next_buttons[0]

                        if next_button.is_displayed() and next_button.is_enabled():
                            driver.execute_script("arguments[0].click();", next_button)
                            time.sleep(10)
                        else:
                            break
                    else:
                        break
                except Exception as e:
                    print(f"Error while navigating pages: {e}")
                    break

            except TimeoutException:
                print("Timed out waiting for page elements.")
                break

    except Exception as e:
        print(f"Failed: {e}")
    finally:
        driver.quit()

    # Convert list to DataFrame
    df = pd.DataFrame(all_organizations)

    # Save the DataFrame in a dictionary to write to different Excel sheets
    data_org_dict[order] = df

# Write all DataFrames to different sheets in one Excel file
excel_path = 'organizations_data.xlsx'
with pd.ExcelWriter(excel_path, engine='xlsxwriter') as writer:
    for sheet_name, df in data_org_dict.items():
        df.to_excel(writer, sheet_name=sheet_name, index=False)

print(f"Data saved to {excel_path}")


Game Players Per Month

In [None]:
driver_path = "C:\\Users\\trung kien\\Downloads\\chromedriver-win64\\chromedriver.exe"
service = Service(driver_path)
driver = webdriver.Chrome(service=service)

games = ['league-of-legends', 'dota-2', 'valorant']

dataframes = {}

for game in games:
    url = f'https://activeplayer.io/{game}/'
    driver.get(url)

    try:
        table = driver.find_element(By.ID, 'table_3')
        rows = table.find_elements(By.TAG_NAME, 'tr')

        data = []
        for row in rows:
            cells = row.find_elements(By.TAG_NAME, 'th') + row.find_elements(By.TAG_NAME, 'td')
            cell_texts = [cell.text for cell in cells]
            data.append(cell_texts)

        # print(f"Data for {game}:")
        # print(data)

        df = pd.DataFrame(data)
        dataframes[game] = df

    except Exception as e:
        print(f"Table with id 'table_3' not found for {game}. Error: {e}")

with pd.ExcelWriter('players_per_month.xlsx', engine='openpyxl') as writer:
    for game, df in dataframes.items():
        if not df.empty:
            df.to_excel(writer, sheet_name=game, index=False, header=False)

print("Data saved to 'players_per_month.xlsx'.")

driver.quit()


Data saved to 'games_data.xlsx'.


In [4]:
driver_path = "C:\\Users\\trung kien\\Downloads\\chromedriver-win64\\chromedriver.exe"
service = Service(driver_path)
driver = webdriver.Chrome(service=service)

games = ['counter-strike-2', 'mobile-legends-bang-bang']

dataframes = {}

for game in games:
    url = f'https://activeplayer.io/{game}/'
    driver.get(url)

    try:
        table = driver.find_element(By.ID, 'table_2')
        rows = table.find_elements(By.TAG_NAME, 'tr')

        data = []
        for row in rows:
            cells = row.find_elements(By.TAG_NAME, 'th') + row.find_elements(By.TAG_NAME, 'td')
            cell_texts = [cell.text for cell in cells]
            data.append(cell_texts)

        # print(f"Data for {game}:")
        # print(data)

        df = pd.DataFrame(data)
        dataframes[game] = df

    except Exception as e:
        print(f"Table with id 'table_3' not found for {game}. Error: {e}")

with pd.ExcelWriter('players_per_month.xlsx', engine='openpyxl', mode='a') as writer:
    for game, df in dataframes.items():
        if not df.empty:
            df.to_excel(writer, sheet_name=game, index=False, header=False)

print("Data saved to 'players_per_month.xlsx'.")

driver.quit()


Data saved to 'players_per_month.xlsx'.
