# DRG Web Scraping

In [215]:
from typing import List, Dict
import datetime
from pathlib import Path

import requests
from bs4 import BeautifulSoup
from wasabi import msg

In [84]:
def get_loadout_links(output_dir: Path, page_num: int = 950, verbose: bool = False) -> List[str]:
    links = []
    timestamp = datetime.datetime.now()
    for p in range(1, page_num+1):
        msg.text(f"Checking page {p}...", show=verbose)
        url = f"https://karl.gg/browse?sort=updated_at&direction=desc&page={p}"
        response = requests.get(url)
        if response.ok:
            soup = BeautifulSoup(response.content, "html.parser")
            table = soup.find("table")
            for row in table.findAll("tr"):
                # The individual loadout links have the cursor-pointer class. 
                link = row.find_all("a", attrs={"class": "cursor-pointer"})
                if link:
                    links.append(link[0].get("href"))
    msg.info(f"Found {len(links)} builds (until page {page_num}) as of {timestamp}")
    
    if output_dir and len(links) > 0:
        output_dir.mkdir(parents=True, exist_ok=True)
        output_path = output_dir / f"loadout_links-page-{page_num}_{timestamp.isoformat()}.txt"
        with open(output_path, 'w') as f:
            for link in links:
                f.write("%s\n" % link)
        msg.good(f"Individual loadout links saved to {output_path}")
    
    return links
    

## Scraper for individual builds

In [214]:
import ast
import json

In [87]:
builds = get_loadout_links(None, 1)

[38;5;4mℹ Found 15 builds (until page 1) as of 2022-11-19 14:39:51.837123[0m


In [392]:
def get_loadout(url) -> Dict:
    EMPTY_DATA = "{}"
    response = requests.get(url)
    if response.ok:
        soup = BeautifulSoup(response.content, "html.parser")
    
        # The output of the response isn't the actual DOM but rather, a PHP/AJAX call
        # where the parameters are stored in the attrs. That's what we only need
        data = soup.body.find("loadout-preview-page").attrs
        user_loadout = json.loads(data[":loadout-data"])
        
        salutes = int(soup.find("span", attrs={"class": "salute-count"}).text)
        
        primary = json.loads(data.get(":primary", EMPTY_DATA))
        secondary = json.loads(data.get(":secondary", EMPTY_DATA))
        equipment = json.loads(data.get(":available-equipment", EMPTY_DATA))
        weapon_mods = user_loadout.get("mods")
        equipment_mods = user_loadout.get("equipment_mods")
        overclocks = json.loads(data.get(":overclocks", EMPTY_DATA))
        
        if user_loadout.get("creator"):
            username = user_loadout.get("creator").get("name")
        else:
            username = "Anonymous"
        
            
        def _get_mods(id: str, key: str, mods: Dict) -> str:
            _mods = sorted([mod for mod in mods if mod.get(key) == id], key=lambda x: x.get("mod_tier"))
            _mod_text = "".join([m.get("mod_index") for m in _mods]) 
            return _mod_text
        
        def _get_overclock(id: str) -> str:
            for overclock in overclocks:
                if overclock.get("gun_id") == id:
                    return overclock.get("overclock_name")
                
        def _get_equipment(class_name: Dict, equip_map: Dict):
            for equip in equipment:
                if equip.get("name") == equip_map.get(class_name):
                    id = equip.get("id")
                    mods = _get_mods(id, "equipment_id", equipment_mods)
                    return {"name": equip.get("name"), "mods": mods}
      
        #return user_loadout, data
        
        # The problem here is that both traversal and support tools are called Support Tools,
        # so we need to disambiguate a bit and hard code a few things
        traversal_map = {"Gunner": "Zipline Launcher", "Scout": "Grappling Hook", "Driller": "Reinforced Power Drills", "Engineer": "Platform Gun"}
        support_map = {"Gunner": "Shield Generator", "Scout": "Flare Gun", "Driller": "Satchel Charge", "Engineer": "LMG Gun Platform"}
        traversal = _get_equipment(user_loadout.get("character").get("name"), traversal_map)
        support = _get_equipment(user_loadout.get("character").get("name"), support_map)
        
        # Prepare output
        loadout = {
            "name": user_loadout.get("name"),
            "class": user_loadout.get("character").get("name"),
            "patch": user_loadout.get("patch_id"),
            "created_at": user_loadout.get("created_at"),
            "updated_at": user_loadout.get("updated_at", None),
            "description": user_loadout.get("description"),
            "username": username,
            "primary": primary.get("name"),
            "primary_mods": _get_mods(id=primary.get("id"), key="gun_id", mods=weapon_mods),
            "primary_overclock": _get_overclock(id=primary.get("id")),
            "secondary": secondary.get("name"),
            "secondary_mods": _get_mods(id=secondary.get("id"), key="gun_id", mods=weapon_mods),
            "secondary_overclock": _get_overclock(id=secondary.get("id")),
            "throwable": json.loads(data.get(":throwable", "{}")).get("name"),
            "traversal": traversal.get("name"),
            "traversal_mods": traversal.get("mods"),
            "support": support.get("name"),
            "support_mods": support.get("mods"),
            "salutes": salutes,
        }
          
        return loadout

In [366]:
import pandas as pd

In [364]:
links = ["https://karl.gg/preview/2", "https://karl.gg/preview/16937"]
loadouts = []
for link in links:
    loadouts.append(get_loadout(link))

In [367]:
pd.DataFrame(loadouts)

Unnamed: 0,name,class,patch,created_at,updated_at,description,username,primary,primary_mods,primary_overclock,secondary,secondary_mods,secondary_overclock,throwable,traversal,traversal_mods,support,support_mods,salutes
0,test,Engineer,3,2020-05-07T01:03:31.000000Z,2020-05-07T01:03:31.000000Z,test 1,MARIOPRO55,"""Warthog"" Auto 210",AAABA,,Breach Cutter,AABBC,,,Platform Gun,,LMG Gun Platform,,4
1,Driller's amazing loadout 2,Driller,10,2022-11-02T09:15:29.000000Z,2022-11-19T06:31:50.000000Z,||Pickaxe (Support Tool)|Description|\n|-|-|-|...,nitekib216,Cryo Cannon,CABCB,Ice Spear,Experimental Plasma Charger,BBABB,Energy Rerouting,Impact Axe,Reinforced Power Drills,,Satchel Charge,,1


In [393]:
get_loadout("https://karl.gg/preview/18284#/")

{'name': 'Gorfnite scout',
 'class': 'Scout',
 'patch': 10,
 'created_at': '2022-11-19T03:36:07.000000Z',
 'updated_at': '2022-11-19T03:36:07.000000Z',
 'description': '',
 'username': 'Anonymous',
 'primary': 'M1000 Classic',
 'primary_mods': 'BBBBC',
 'primary_overclock': 'Active Stability System',
 'secondary': 'Zhukov NUK17',
 'secondary_mods': 'ABBCB',
 'secondary_overclock': 'Embedded Detonators',
 'throwable': 'Cryo Grenade',
 'traversal': 'Grappling Hook',
 'traversal_mods': 'AAAB',
 'support': 'Flare Gun',
 'support_mods': 'ABC',
 'salutes': 0}

In [383]:
user_loadout.get("creator")

In [391]:
user_loadout

{'id': 18284,
 'name': 'Gorfnite scout',
 'description': '',
 'user_id': None,
 'character_id': 2,
 'created_at': '2022-11-19T03:36:07.000000Z',
 'updated_at': '2022-11-19T03:36:07.000000Z',
 'patch_id': 10,
 'throwable_id': 11,
 'mods': [{'id': 169,
   'character_id': 2,
   'gun_id': 6,
   'mod_tier': 1,
   'mod_index': 'B',
   'mod_name': 'Increased Caliber Rounds',
   'credits_cost': 1200,
   'magnite_cost': 0,
   'bismor_cost': 0,
   'umanite_cost': 0,
   'croppa_cost': 0,
   'enor_pearl_cost': 25,
   'jadiz_cost': 0,
   'text_description': '+10 Direct Damage',
   'json_stats': '{ "dmg": { "name": "Damage", "value": 10 } }',
   'icon': 'Icon_Upgrade_DamageGeneral',
   'mod_type': 'Damage',
   'created_at': '2021-11-29T05:13:15.000000Z',
   'updated_at': '2021-11-29T05:13:15.000000Z',
   'pivot': {'loadout_id': 18284, 'mod_id': 169}},
  {'id': 171,
   'character_id': 2,
   'gun_id': 6,
   'mod_tier': 2,
   'mod_index': 'B',
   'mod_name': 'Better Weight Balance',
   'credits_cost': 

In [379]:
help(dict.get)

Help on method_descriptor:

get(self, key, default=None, /)
    Return the value for key if key is in the dictionary, else default.



In [162]:
soup

<!DOCTYPE html>
<html lang="en">
<head>
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async="" src="https://www.googletagmanager.com/gtag/js?id=UA-165794980-1"></script>
<script>
        window.dataLayer = window.dataLayer || [];

        function gtag() {
            dataLayer.push(arguments);
        }

        gtag('js', new Date());

        gtag('config', 'UA-165794980-1');
    </script>
<meta charset="utf-8"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<!-- CSRF Token -->
<meta content="V8zMp1vQm4szo592FUzx56mAMVRF47CxkzaQMNkW" name="csrf-token"/>
<title>Karl: Born Ready - Driller - Driller's amazing loadout 2</title>
<meta content="Driller build with Cold Radiance mod, Larger Reserve Tank mod, Increased Flow Volume mod, Larger Reserve Tank mod, Overcharged Plasma Accelerator mod,..." name="description"/>
<meta content="Driller build, Deep Rock Galactic builds, drg builds, Stronger Cooling Unit build, Thin Containment Field build, Improv