# DRG Web Scraping

In [81]:
from typing import List
import datetime
from pathlib import Path

import requests
from bs4 import BeautifulSoup
from wasabi import msg

In [82]:
def get_loadout_links(output_dir: Path, page_num: int = 950, verbose: bool = False) -> List[str]:
    links = []
    timestamp = datetime.datetime.now()
    for p in range(1, page_num+1):
        msg.text(f"Checking page {p}...", show=verbose)
        url = f"https://karl.gg/browse?sort=updated_at&direction=desc&page={p}"
        response = requests.get(url)
        if response.ok:
            soup = BeautifulSoup(response.content, "html.parser")
            table = soup.find("table")
            for row in table.findAll("tr"):
                # The individual loadout links have the cursor-pointer class. 
                link = row.find_all("a", attrs={"class": "cursor-pointer"})
                if link:
                    links.append(link[0].get("href"))
    msg.info(f"Found {len(links)} builds (until page {page_num}) as of {timestamp}")
    
    if output_dir and len(links) > 0:
        output_dir.mkdir(parents=True, exist_ok=True)
        output_path = output_dir / f"loadout_links-page-{page_num}_{timestamp.isoformat()}.txt"
        with open(output_path, 'w') as f:
            for link in links:
                f.write("%s\n" % link)
        msg.good(f"Individual loadout links saved to {output_path}")
    
    return links
    

In [79]:
url = "https://karl.gg/browse?sort=updated_at&direction=desc&page=1000"
response = requests.get(url)

In [77]:
x = get_loadout_links(Path.cwd() / "dump", , verbose=True)

Checking page 1...
Checking page 2...
Checking page 3...
[38;5;4mℹ Found 45 builds (until page 3) as of 2022-11-19 13:51:52.715802[0m
[38;5;2m✔ Individual loadout links saved to
/home/lj/Dev/scratch/2022-11-19-drg/dump/loadout_links-page-3_2022-11-19T13:51:52.715802.txt[0m
