In [3]:
import os
import requests
from bs4 import BeautifulSoup
import pandas as pd

BASE = "https://strengthlevel.com/strength-standards"
GENDERS = ["male", "female"]
UNITS = ["kg", "lb"]
TYPES = {
    "bodyweight": ["By Bodyweight"],
    "age": ["By Age"],
}
TARGET_EX = {
    "bench press",
    "squat",
    "deadlift",
    "shoulder press",
    "barbell curl",
    "bent over row",
    "power clean",
}

def fetch_tables(url):
    r = requests.get(url)
    r.raise_for_status()
    html = r.text
    tables = pd.read_html(html)  # grab all tables efficiently :contentReference[oaicite:1]{index=1}

    soup = BeautifulSoup(html, "lxml")
    elems = soup.find_all(["h2","h3","h4","table"])
    
    idx = 0
    out = []  # list of tuples: (exercise, standard_type, DataFrame)
    current_ex = None
    current_type = None

    for el in elems:
        text = el.get_text().strip().lower()
        if el.name in ["h2","h3","h4"]:
            for ex in TARGET_EX:
                if ex in text:
                    current_ex = ex.replace(" ", "-")
            for ttype, keywords in TYPES.items():
                for kw in keywords:
                    if kw.lower() in text:
                        current_type = ttype
        elif el.name == "table" and current_ex and current_type:
            df = tables[idx]
            out.append((current_ex, current_type, df))
            idx += 1
            # ready for next table under same exercise
    return out

def main():
    for gender in GENDERS:
        for unit in UNITS:
            url = f"{BASE}/{gender}/{unit}"
            print(f"➡ Fetching: {url}")
            tables = fetch_tables(url)
            print(f"  → Got {len(tables)} tables")

            for ex, ttype, df in tables:
                path = os.path.join(gender, unit, ttype)
                os.makedirs(path, exist_ok=True)
                filename = os.path.join(path, f"{ex}.csv")
                df.to_csv(filename, index=False)
                print(f"   ✓ Saved {filename}")

if __name__ == "__main__":
    main()


➡ Fetching: https://strengthlevel.com/strength-standards/male/kg


  tables = pd.read_html(html)  # grab all tables efficiently :contentReference[oaicite:1]{index=1}


  → Got 15 tables
   ✓ Saved male\kg\bodyweight\bench-press.csv
   ✓ Saved male\kg\age\bench-press.csv
   ✓ Saved male\kg\bodyweight\squat.csv
   ✓ Saved male\kg\age\squat.csv
   ✓ Saved male\kg\bodyweight\deadlift.csv
   ✓ Saved male\kg\age\deadlift.csv
   ✓ Saved male\kg\bodyweight\shoulder-press.csv
   ✓ Saved male\kg\age\shoulder-press.csv
   ✓ Saved male\kg\bodyweight\barbell-curl.csv
   ✓ Saved male\kg\age\barbell-curl.csv
   ✓ Saved male\kg\bodyweight\bent-over-row.csv
   ✓ Saved male\kg\age\bent-over-row.csv
   ✓ Saved male\kg\bodyweight\power-clean.csv
   ✓ Saved male\kg\age\power-clean.csv
   ✓ Saved male\kg\age\power-clean.csv
➡ Fetching: https://strengthlevel.com/strength-standards/male/lb


  tables = pd.read_html(html)  # grab all tables efficiently :contentReference[oaicite:1]{index=1}


  → Got 15 tables
   ✓ Saved male\lb\bodyweight\bench-press.csv
   ✓ Saved male\lb\age\bench-press.csv
   ✓ Saved male\lb\bodyweight\squat.csv
   ✓ Saved male\lb\age\squat.csv
   ✓ Saved male\lb\bodyweight\deadlift.csv
   ✓ Saved male\lb\age\deadlift.csv
   ✓ Saved male\lb\bodyweight\shoulder-press.csv
   ✓ Saved male\lb\age\shoulder-press.csv
   ✓ Saved male\lb\bodyweight\barbell-curl.csv
   ✓ Saved male\lb\age\barbell-curl.csv
   ✓ Saved male\lb\bodyweight\bent-over-row.csv
   ✓ Saved male\lb\age\bent-over-row.csv
   ✓ Saved male\lb\bodyweight\power-clean.csv
   ✓ Saved male\lb\age\power-clean.csv
   ✓ Saved male\lb\age\power-clean.csv
➡ Fetching: https://strengthlevel.com/strength-standards/female/kg


  tables = pd.read_html(html)  # grab all tables efficiently :contentReference[oaicite:1]{index=1}


  → Got 15 tables
   ✓ Saved female\kg\bodyweight\bench-press.csv
   ✓ Saved female\kg\age\bench-press.csv
   ✓ Saved female\kg\bodyweight\squat.csv
   ✓ Saved female\kg\age\squat.csv
   ✓ Saved female\kg\bodyweight\deadlift.csv
   ✓ Saved female\kg\age\deadlift.csv
   ✓ Saved female\kg\bodyweight\shoulder-press.csv
   ✓ Saved female\kg\age\shoulder-press.csv
   ✓ Saved female\kg\bodyweight\barbell-curl.csv
   ✓ Saved female\kg\age\barbell-curl.csv
   ✓ Saved female\kg\bodyweight\bent-over-row.csv
   ✓ Saved female\kg\age\bent-over-row.csv
   ✓ Saved female\kg\bodyweight\power-clean.csv
   ✓ Saved female\kg\age\power-clean.csv
   ✓ Saved female\kg\age\power-clean.csv
➡ Fetching: https://strengthlevel.com/strength-standards/female/lb


  tables = pd.read_html(html)  # grab all tables efficiently :contentReference[oaicite:1]{index=1}


  → Got 15 tables
   ✓ Saved female\lb\bodyweight\bench-press.csv
   ✓ Saved female\lb\age\bench-press.csv
   ✓ Saved female\lb\bodyweight\squat.csv
   ✓ Saved female\lb\age\squat.csv
   ✓ Saved female\lb\bodyweight\deadlift.csv
   ✓ Saved female\lb\age\deadlift.csv
   ✓ Saved female\lb\bodyweight\shoulder-press.csv
   ✓ Saved female\lb\age\shoulder-press.csv
   ✓ Saved female\lb\bodyweight\barbell-curl.csv
   ✓ Saved female\lb\age\barbell-curl.csv
   ✓ Saved female\lb\bodyweight\bent-over-row.csv
   ✓ Saved female\lb\age\bent-over-row.csv
   ✓ Saved female\lb\bodyweight\power-clean.csv
   ✓ Saved female\lb\age\power-clean.csv
   ✓ Saved female\lb\age\power-clean.csv
