In [1]:
import requests
import pandas as pd

from bs4 import BeautifulSoup

In [2]:
def download_xometry_steel_reference():

    url = "https://xometry.pro/en-eu/articles/steel-reference-standards/"
    response = requests.get(url=url)
    if response.status_code != 200:
        print(f"Failed to fetch {url}, status code: {response.status_code}")
        return []
    return response.text


    
body = download_xometry_steel_reference()

In [3]:
def parse_table_contents(table_selector):

    rows = table_selector.select("div.custom-table-block tr")
    cross_reference = []

    for idx, row in enumerate(rows):
        cells = row.select("td")
        row = [cell.get_text(strip=True) for cell in cells]
        if idx == 0:
            table_columns = row
            continue

        row_dict = {}
        for id_column, column in enumerate(table_columns):
            row_dict[column] = row[id_column]
        cross_reference.append(row_dict)
    return cross_reference


def parse_tables(body):
    soup = BeautifulSoup(body, "html.parser")
    tables = soup.find_all("div", class_="custom-table-block")
    
    tables = [pd.DataFrame(parse_table_contents(table)) for table in tables]
    return tables


tables = parse_tables(body)


tables[0]

Unnamed: 0,DIN (Germany),ISO,AISI (USA),SAE,UNS,AFNOR (France),BS (Great Britain),UNE (Spain),SIS (Sweden),UNI (Italy),JIS (Japan)
0,1.2083,X40Cr14; X42Cr13,420,420,S42000,Z40C14,420S37,,2303,,SUS420J2
1,1.2085,X33CrS16,422 + S,423 + S,S42000,Z35CD17.S,420S37,,2303,,SUS420J2
2,1.2316,X38CrMo16,422,422,S42200,Z35CD17,,,,X38CrMo16KU,
3,1.4021,X20Cr13,420,420,S42000,Z20C13,420S37,,2303,X20Cr13,SUS420J1
4,1.4034,X46Cr13,420,420,S42000,Z40CM,420S45,F.3405,2304,X40Cr14,
5,1.4057,X17CrNi16-2,431,431,S43100,Z6CNi6.02,431S29,,2321,X16CrNi16,SUS431
6,1.4104,X14CrMoS17,430F,430F,S43020,Z10CF17,441S29,F.3117,2383,X10CrS17,SUS430F
7,1.4112,X90CrMoV18,440B,440B,S44003,Z90CDV18,,,,,SUS440B
8,1.4122,X39CrMo17-1,,,,Z38CD 16-01,,,,,
9,1.4301,X5CrNi18.10(V2A),304,304,S30400,Z6CN18.09,304S15,F.3551,2332,X5CrNi1810,SUS304


In [4]:
def concat_tables(tables):
    for table in tables:
        table.rename(columns={"SS (Sweden)": "SIS (Sweden)"}, inplace=True)
    return pd.concat(tables).drop_duplicates()


def normalize(s):
    return str(s).strip().upper().replace("-", "").replace(" ", "")

def export_tables(table):
    table.to_csv("../resources/export/cross_reference_table.csv", header=True, index=False)



concatenated_table = concat_tables(tables)

In [5]:
cross_long = concatenated_table.melt(
    id_vars=[],  # keep no fixed ID, or you can keep row index
    value_name="Grade",
    var_name="Standard"
)

cross_long = cross_long.dropna()
cross_long["Grade_norm"] = cross_long["Grade"].apply(normalize)
cross_long

Unnamed: 0,Standard,Grade,Grade_norm
0,DIN (Germany),1.2083,1.2083
1,DIN (Germany),1.2085,1.2085
2,DIN (Germany),1.2316,1.2316
3,DIN (Germany),1.4021,1.4021
4,DIN (Germany),1.4034,1.4034
...,...,...,...
699,JIS (Japan),,
700,JIS (Japan),,
701,JIS (Japan),,
702,JIS (Japan),,


In [6]:
export_tables(concatenated_table)