In [None]:
import os
import time
import json
import csv
import logging
from getpass import getpass

import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

from bs4 import BeautifulSoup

In [None]:
def make_session(xsrf_token: str, wcl_session_cookie: str, timeout: int = 12):
    s = requests.Session()
    s.headers.update({
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
        "Accept": "text/html, */*; q=0.01",
        "Accept-Language": "en-US,en;q=0.5",
        "X-Requested-With": "XMLHttpRequest",
        "Referer": "https://www.esologs.com/",
    })
    s.cookies.set("XSRF-TOKEN", xsrf_token, domain="www.esologs.com", path="/")
    s.cookies.set("wcl_session", wcl_session_cookie, domain="www.esologs.com", path="/")

    retries = Retry(total=3, backoff_factor=0.8,
                    status_forcelist=(429, 500, 502, 503, 504))
    s.mount("https://", HTTPAdapter(max_retries=retries))
    s.request_timeout = timeout
    return s

def fetch_soup(url: str, session: requests.Session, timeout=None):
    try:
        resp = session.get(url, timeout=timeout or session.request_timeout)
        resp.raise_for_status()
        return BeautifulSoup(resp.content, "html.parser")
    except Exception as e:
        logging.debug(f"fetch error {url}: {e}")
        return None


In [None]:
def generate_esologs_urls():
    base = ("https://www.esologs.com/zone/rankings/table/{zone}/bossdps/{boss}/"
            "{diff_code}/12/{partition}/Any/Any/0/0/0/0/0/?search=&page=1"
            "&affixes=0&faction=0&dpstype=rdps&restricted=0&covenant=undefined"
            "&soulbind=undefined&hardmode=null&includeProgressMap=true"
            "&heroTree=undefined&externalBuffs=0")

    boss_counts = {
        1:4, 2:4, 3:4, 4:0, 5:3, 6:5, 7:3, 8:4,
        9:4, 10:4, 11:7,
        12:3,
        13:0,
        14:3, 15:3, 16:3, 17:3, 18:3, 19:3
    }

    SKIP_BOSSES = {1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 19, 21, 22, 24, 25, 26}
    skipped_zones = {9, 10, 11, 13}

    diff_code_overrides = {
        1: 122,
        2: 122,
        3: 122,
        5: 122,
        6: 122,
        7: 124,
        8: 125,
        12: 122
    }
    default_diff_code = 122

    urls = []
    global_boss_number = 1

    for zone in range(1, 20):
        boss_total = boss_counts.get(zone, 0)

        if zone in skipped_zones:
            global_boss_number += boss_total
            continue

        partition_start = 1 if zone < 14 else 5 + 4 * (zone - 14)

        diff_code = diff_code_overrides.get(zone, default_diff_code)

        for _ in range(boss_total):
            boss = global_boss_number
            global_boss_number += 1

            if boss in SKIP_BOSSES:
                continue

            for partition in range(partition_start, 28):
                url = base.format(zone=zone, boss=boss, diff_code=diff_code, partition=partition)
                urls.append((zone, boss, partition, url))

    return urls


In [4]:
import re
import json
import html
from bs4 import BeautifulSoup

def extract_all_talents_and_gear(loaded_data):
    script_text = "\n".join(s.get_text() for s in loaded_data.find_all("script"))

    results = {}

    talents_block_regex = re.compile(
        r'talentsAndGear\["(?P<row>[^"]+)"\]\s*=\s*\{.*?talentsCell:\s*\'(?P<html>.*?)\'',
        re.S
    )

    for m in talents_block_regex.finditer(script_text):
        row = m.group("row")
        html_block = m.group("html")

        icons = re.findall(r'https://[^"\']+?\.png', html_block)

        results.setdefault(row, {})
        results[row]["talents"] = icons

    gear_push_regex = re.compile(
        r'talentsAndGear\["(?P<row>[^"]+)"\]\.gear\.push\(\{\s*'
        r'name:\s*"(?P<name>[^"]*)",\s*'
        r'icon:\s*"(?P<icon>[^"]*)",\s*'
        r'.*?id:\s*(?P<id>\d+)',
        re.S
    )

    for m in gear_push_regex.finditer(script_text):
        row = m.group("row")
        name = html.unescape(m.group("name"))
        icon = m.group("icon")
        item_id = int(m.group("id"))

        results.setdefault(row, {})
        results[row].setdefault("gear", [])

        results[row]["gear"].append({
            "name": name,
            "icon": icon,
            "id": item_id
        })

    return results

def extract_table_rows(loaded_data):
    rows = loaded_data.select("tr[id^=row-]")

    results = []
    for row in rows:
        row_id = row.get("id")

        name_el = row.select_one(".main-table-player")
        name = name_el.get_text(strip=True) if name_el else None

        name_at_el = row.select_one(".display-name-col").contents[0]
        name_at = name_at_el.get_text(strip=True) if name_at_el else None

        dps_el = row.select_one(".players-table-dps")
        dps = None
        if dps_el:
            m = re.search(r"[\d,.]+", dps_el.get_text(" ", strip=True))
            if m:
                dps = float(m.group(0).replace(",", ""))

        timestamp = None
        ts_cell = row.select_one(f"#date-{row_id.split('-')[-1]}") or \
                row.select_one(".players-table-date")

        if ts_cell:
            hidden = ts_cell.find("span", style=lambda v: v and "display:none" in v)
            if hidden:
                m = re.match(r"(\d+)", hidden.get_text(strip=True))
                if m:
                    timestamp = int(m.group(1))

        results.append({
            "row_id": row_id,
            "name": name,
            "@name": name_at,
            "dps": dps,
            "timestamp": timestamp
        })

    return results

def parse_full(loaded_data):
    row_info = extract_table_rows(loaded_data)
    tg_info = extract_all_talents_and_gear(loaded_data)

    results = []

    for entry in row_info:
        row_id = entry["row_id"]

        talents = tg_info.get(row_id, {}).get("talents", [])
        gear = tg_info.get(row_id, {}).get("gear", [])

        entry["talents"] = talents
        entry["gear"] = gear

        results.append(entry)

    return results

In [None]:
def save_results_to_csv(zone, boss, partition, data_rows):
    directory = f"data/zone_{zone}/boss_{boss}"
    os.makedirs(directory, exist_ok=True)

    file_path = f"{directory}/partition_{partition}.csv"

    fieldnames = ["row_id", "name", "@name", "dps", "timestamp", "talents", "gear"]

    with open(file_path, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()

        for row in data_rows:
            writer.writerow({
                "row_id": row["row_id"],
                "name": row["name"],
                "@name": row["@name"],
                "dps": row["dps"],
                "timestamp": row["timestamp"],
                "talents": ";".join(row["talents"]),
                "gear": ";".join(f"{g['id']}:{g['name']}" for g in row["gear"])
            })

def rows_to_dataframe(rows, zone=None, boss=None, partition=None):
    df = pd.DataFrame.from_records(rows)
    if zone is not None:
        df["zone"] = zone
    if boss is not None:
        df["boss"] = boss
    if partition is not None:
        df["partition"] = partition
    return df


In [None]:
xsrf = getpass("XSRF_TOKEN (paste): ")
wcl = getpass("wcl_session cookie (paste): ")

sess = make_session(xsrf, wcl)
test_url = generate_esologs_urls()[0][3]
print("Testing:", test_url)
soup = fetch_soup(test_url, sess)
if soup is None:
    print("Fetch failed — check cookies or network.")
else:
    parsed = parse_full(soup)
    print("rows parsed:", len(parsed))


Testing: https://www.esologs.com/zone/rankings/table/1/dps/1/122/12/1/Any/Any/0/0/0/0/0/?search=&page=1&affixes=0&faction=0&dpstype=rdps&restricted=0&covenant=undefined&soulbind=undefined&hardmode=null&includeProgressMap=true&heroTree=undefined&externalBuffs=0
rows parsed: 100


In [9]:
def run_all(base_output_dir="data",
            sleep_between=0.35,
            xsrf_token=None,
            wcl_session_cookie=None,
            start_index=0,
            limit=None):

    assert xsrf_token and wcl_session_cookie, "Provide cookies first"

    sess = make_session(xsrf_token, wcl_session_cookie)

    all_urls = generate_esologs_urls()

    if limit is not None:
        all_urls = all_urls[start_index : start_index + limit]
    else:
        all_urls = all_urls[start_index : ]

    master_rows = []

    counter = 0
    for zone, boss, partition, url in all_urls:
        counter += 1
        print(f"[{counter}/{len(all_urls)}] Z{zone} B{boss} P{partition}")

        soup = fetch_soup(url, sess)
        if soup is None:
            print("  -> fetch failed, skipping")
            time.sleep(sleep_between)
            continue

        parsed = parse_full(soup)

        save_results_to_csv(zone, boss, partition, parsed)

        for row in parsed:
            master_rows.append({
                "zone": zone,
                "boss": boss,
                "partition": partition,
                **row
            })

        time.sleep(sleep_between)

    return master_rows


def save_master_csv(master_rows, filename="master_esologs.csv"):
    if not master_rows:
        print("No data to save, nothing written.")
        return

    fieldnames = list(master_rows[0].keys())

    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for row in master_rows:
            writer.writerow(row)

    print(f"Saved master CSV → {filename}")

In [7]:
import getpass

print("Enter your ESOlogs session cookies:")
xsrf_token = getpass.getpass("XSRF-TOKEN: ")
wcl_session_cookie = getpass.getpass("wcl_session: ")

print("Tokens captured.")

Enter your ESOlogs session cookies:
Tokens captured.


In [10]:
master = run_all(
    base_output_dir="data",
    sleep_between=0.5,
    xsrf_token=xsrf_token,
    wcl_session_cookie=wcl_session_cookie,
    start_index=0,
    limit=None
)

save_master_csv(master, "master_esologs.csv")

[1/504] Z1 B4 P1
[2/504] Z1 B4 P2
[3/504] Z1 B4 P3
[4/504] Z1 B4 P4
[5/504] Z1 B4 P5
[6/504] Z1 B4 P6
[7/504] Z1 B4 P7
[8/504] Z1 B4 P8
[9/504] Z1 B4 P9
[10/504] Z1 B4 P10
[11/504] Z1 B4 P11
[12/504] Z1 B4 P12
[13/504] Z1 B4 P13
[14/504] Z1 B4 P14
[15/504] Z1 B4 P15
[16/504] Z1 B4 P16
[17/504] Z1 B4 P17
[18/504] Z1 B4 P18
[19/504] Z1 B4 P19
[20/504] Z1 B4 P20
[21/504] Z1 B4 P21
[22/504] Z1 B4 P22
[23/504] Z1 B4 P23
[24/504] Z1 B4 P24
[25/504] Z1 B4 P25
[26/504] Z1 B4 P26
[27/504] Z1 B4 P27
[28/504] Z2 B8 P1
[29/504] Z2 B8 P2
[30/504] Z2 B8 P3
[31/504] Z2 B8 P4
[32/504] Z2 B8 P5
[33/504] Z2 B8 P6
[34/504] Z2 B8 P7
[35/504] Z2 B8 P8
[36/504] Z2 B8 P9
[37/504] Z2 B8 P10
[38/504] Z2 B8 P11
[39/504] Z2 B8 P12
[40/504] Z2 B8 P13
[41/504] Z2 B8 P14
[42/504] Z2 B8 P15
[43/504] Z2 B8 P16
[44/504] Z2 B8 P17
[45/504] Z2 B8 P18
[46/504] Z2 B8 P19
[47/504] Z2 B8 P20
[48/504] Z2 B8 P21
[49/504] Z2 B8 P22
[50/504] Z2 B8 P23
[51/504] Z2 B8 P24
[52/504] Z2 B8 P25
[53/504] Z2 B8 P26
[54/504] Z2 B8 P27
[