In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

base_url = "https://www.shl.com"
p_name = []
p_link = []
remote_testing = []
adaptive_irt = []
test_type = []
description_list = []
job_levels_list = []
language_list = []
assessment_length_list = []

headers = {
    "User-Agent": "Mozilla/5.0"
}

for page_num in range(0, 32):  # 32 pages total
    url = f"https://www.shl.com/solutions/products/product-catalog/?start={page_num * 12}&type=1"
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    rows = soup.find_all('tr')

    for row in rows:
        name_td = row.find('td', class_='custom__table-heading__title')
        if name_td and name_td.a:
            name = name_td.a.text.strip()
            link = base_url + name_td.a['href'].strip()

            td_tags = row.find_all('td', class_='custom__table-heading__general')

            remote = "Yes" if len(td_tags) >= 1 and td_tags[0].find('span', class_='catalogue__circle -yes') else "No"
            adaptive = "Yes" if len(td_tags) >= 2 and td_tags[1].find('span', class_='catalogue__circle -yes') else "No"

            test_td = row.find('td', class_='custom__table-heading__general product-catalogue__keys')
            test_string = ', '.join([span.text.strip() for span in test_td.find_all('span', class_='product-catalogue__key')]) if test_td else ""

            # Go to detail page
            detail_response = requests.get(link, headers=headers)
            detail_soup = BeautifulSoup(detail_response.content, 'html.parser')
            all_rows = detail_soup.find_all("div", class_="product-catalogue-training-calendar__row typ")

            description = ""
            job_levels = ""
            languages = ""
            assessment_length = ""

            for section in all_rows:
                heading = section.find("h4")
                content = section.find("p")
                if heading and content:
                    heading_text = heading.text.strip().lower()
                    if "description" in heading_text:
                        description = content.text.strip()
                    elif "job levels" in heading_text:
                        job_levels = content.text.strip()
                    elif "languages" in heading_text:
                        languages = content.text.strip()
                    elif "assessment length" in heading_text:
                        assessment_length = content.text.strip()

            # Append everything
            p_name.append(name)
            p_link.append(link)
            remote_testing.append(remote)
            adaptive_irt.append(adaptive)
            test_type.append(test_string)
            description_list.append(description)
            job_levels_list.append(job_levels)
            language_list.append(languages)
            assessment_length_list.append(assessment_length)

df = pd.DataFrame({
    "Assessment Name": p_name,
    "Link": p_link,
    "Remote Testing": remote_testing,
    "Adaptive/IRT": adaptive_irt,
    "Test Type": test_type,
    "Description": description_list,
    "Job Levels": job_levels_list,
    "Languages": language_list,
    "Assessment Length": assessment_length_list
})


df.to_csv("shl_Data.csv", index=False)
print("✅ Done! File saved as 'shl_Data.csv'")

In [3]:
df.head()

Unnamed: 0,Assessment Name,Link,Remote Testing,Adaptive/IRT,Test Type,Description,Job Levels,Languages,Assessment Length
0,Account Manager Solution,https://www.shl.com/solutions/products/product...,Yes,Yes,"C, P, A, B",The Account Manager solution is an assessment ...,"Mid-Professional,","English (USA),",Approximate Completion Time in minutes = 49
1,Administrative Professional - Short Form,https://www.shl.com/solutions/products/product...,Yes,Yes,"A, K, P",The Administrative Professional solution is fo...,"Entry-Level,","English (USA),",Approximate Completion Time in minutes = 36
2,Agency Manager Solution,https://www.shl.com/solutions/products/product...,Yes,Yes,"A, B, P, S",The Agency Manager solution is for mid-level s...,"Front Line Manager, Manager, Supervisor,","English (USA),",Approximate Completion Time in minutes = 51
3,Apprentice + 8.0 Job Focused Assessment,https://www.shl.com/solutions/products/product...,Yes,No,"B, P",The Apprentice + 8.0 Job-Focused Assessment is...,"General Population, Graduate, Entry-Level,","English International, German,",Approximate Completion Time in minutes = 30
4,Apprentice 8.0 Job Focused Assessment,https://www.shl.com/solutions/products/product...,Yes,No,"B, P",The Apprentice 8.0 Job-Focused Assessment is a...,"Entry-Level, General Population, Graduate,","English International, German, French,",Approximate Completion Time in minutes = 20
