# This code is for one page

In [15]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Step 1: Load the page
url = "https://taqat-gaza.com/public/en/talents"
headers = {
    "User-Agent": "Mozilla/5.0"
}
response = requests.get(url, headers=headers)

# Step 2: Parse HTML
soup = BeautifulSoup(response.content, "html.parser")

# Step 3: Find all talent cards
talents = soup.find_all("div", class_="card")  # You may adjust if needed

# Step 4: Extract info
data = []
for card in talents:
    name = card.find("h5", class_="mb-0")
    specialty = card.find("span", class_="text-muted d-block mb-2")
    
    # Extracting experience and project count
    experience = ""
    projects = ""

    # Find all stat containers
    stats_blocks = card.find_all("div", class_="stats m-1")
    for stat in stats_blocks:
        label = stat.find("p")
        value = stat.find("span")
        if label and value:
            label_text = label.get_text(strip=True).lower()
            if "projects" in label_text:
                projects = value.get_text(strip=True)
            elif "experience" in label_text:
                experience = value.get_text(strip=True)
    
            

    data.append({
        "Name": name.text.strip() if name else "",
        "Specialty": specialty.text.strip() if specialty else "",
        "Experience (years)": experience,
        "Projects": projects
    })

# Step 5: Export to Excel or CSV
df = pd.DataFrame(data)
df.to_csv("taqat_talents.csv", index=False)

print("✅ Data scraped and saved to taqat_talents.csv")


✅ Data scraped and saved to taqat_talents.csv


# This code is for scraping all the pages

In [16]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_page(page_num):
    url = f"https://taqat-gaza.com/public/en/talents?page={page_num}"
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(url, headers=headers)

    if response.status_code != 200:
        return [], False  # Stop if the page is not accessible

    soup = BeautifulSoup(response.content, "html.parser")
    cards = soup.find_all("div", class_="card")
    if not cards:
        return [], False  # Stop if there are no more talents/cards

    page_data = []

    for card in cards:
        name = card.find("h5", class_="mb-0")
        specialty = card.find("span", class_="text-muted d-block mb-2")

        experience = ""
        projects = ""

        stats_blocks = card.find_all("div", class_="stats m-1")
        for stat in stats_blocks:
            label = stat.find("p")
            value = stat.find("span")
            if label and value:
                label_text = label.get_text(strip=True).lower()
                if "projects" in label_text:
                    projects = value.get_text(strip=True)
                elif "experience" in label_text:
                    experience = value.get_text(strip=True)

        page_data.append({
            "Name": name.text.strip() if name else "",
            "Specialty": specialty.text.strip() if specialty else "",
            "Experience (years)": experience,
            "Projects": projects
        })

    return page_data, True

# Loop over pages until no more data
all_data = []
page = 1
while True:
    print(f"Scraping page {page}...")
    data, has_more = scrape_page(page)
    if not has_more:
        break
    all_data.extend(data)
    page += 1

# Export to CSV
df = pd.DataFrame(all_data)
df.to_csv("taqat_all_talents.csv", index=False)
print("✅ All pages scraped and saved to taqat_all_talents.csv")


Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Scraping page 14...
✅ All pages scraped and saved to taqat_all_talents.csv
