In [2]:
import requests
from bs4 import BeautifulSoup
import csv
import time
from tqdm import tqdm

BASE = "https://www.hltv.org"
HEADERS = {"User-Agent": "Mozilla/5.0"}

def get_match_ids(pages=5):
    ids = set()
    for offset in range(0, pages * 100, 100):
        url = f"{BASE}/results?offset={offset}"
        r = requests.get(url, headers=HEADERS)
        soup = BeautifulSoup(r.text, "html.parser")
        for match in soup.select("a.result-con"):
            href = match.get("href")
            if href and "/match/" in href:
                match_id = href.split("/")[2]
                ids.add(match_id)
        time.sleep(1)
    return list(ids)

def fetch_match_data(match_id):
    url = f"{BASE}/match/{match_id}"
    r = requests.get(url, headers=HEADERS)
    soup = BeautifulSoup(r.text, "html.parser")

    # Example selectors — adjust as needed
    teams = soup.select(".teamName")
    team1 = teams[0].text.strip() if teams else None
    team2 = teams[1].text.strip() if len(teams) > 1 else None

    score = soup.select_one(".match-info-box-score .bold")
    score = score.text.strip() if score else None

    date = soup.select_one(".match-info-box .time")
    date = date.text.strip() if date else None

    # Collect player stats: KD, ADR, rating
    players = []
    for row in soup.select(".stats-table tr")[1:]:
        cols = row.find_all("td")
        if len(cols) >= 7:
            name = cols[0].text.strip()
            kd = cols[1].text.strip()
            adr = cols[4].text.strip()
            rating = cols[6].text.strip()
            players.append({"name": name, "kd": kd, "adr": adr, "rating": rating})

    demo = soup.find("a", string="Download demo")
    demo_link = BASE + demo['href'] if demo else None

    return {
        "match_id": match_id,
        "date": date,
        "team1": team1,
        "team2": team2,
        "score": score,
        "demo_url": demo_link,
        "players": players
    }

def scrape_and_save(pages=3, outfile="hltv_matches.csv"):
    ids = get_match_ids(pages)
    all_data = []
    for mid in tqdm(ids, desc="Scraping matches"):
        data = fetch_match_data(mid)
        all_data.append(data)
        time.sleep(1)

    # Flatten for CSV: one row per player
    with open(outfile, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(["match_id", "date", "team1", "team2", "score", "player", "kd", "adr", "rating", "demo_url"])
        for m in all_data:
            for p in m["players"]:
                writer.writerow([
                    m["match_id"], m["date"], m["team1"], m["team2"],
                    m["score"], p["name"], p["kd"], p["adr"], p["rating"],
                    m["demo_url"]
                ])

if __name__ == "__main__":
    scrape_and_save(pages=3)

Scraping matches: 0it [00:00, ?it/s]


In [1]:
from awpy import Demo
import pandas as pd

demo_file = "hltv_data/demos/123456.dem"
demo = Demo(demo_file)
match = demo.parse()

# Example: extract rounds info
rounds = match.rounds
df = pd.DataFrame([{
    "round_num": r.number,
    "winning_team": r.winner,
    "bomb_planted": r.bomb_planted
} for r in rounds])

df.to_csv("hltv_data/match_123456_rounds.csv", index=False)

FileNotFoundError: File not found: hltv_data/demos/123456.dem