In [None]:
from playwright.sync_api import sync_playwright

def format_exam_link(title: str):
    """Formats the exam detail page link based on the title."""
    title_part = title.split(" - ")[0]  # Take only part before "-"
    formatted_title = title_part.lower().replace(" ", "-")
    return f"https://engineering.careers360.com/exams/{formatted_title}"

def search_and_scrape_exams(exam_name: str):
    url = "https://www.careers360.com/exams"

    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        page = browser.new_page()
        page.goto(url, timeout=60000)

        # Search for the exam
        search_box = page.query_selector("#top_content_area input")
        if search_box:
            search_box.fill(exam_name)
            search_box.press("Enter")
            page.wait_for_load_state("networkidle")

        # Wait for search results to load
        page.wait_for_selector(
            "#root > div.school_container.college_listing > div:nth-child(2) > div.px-0.row > div.px-0.px-md-3.col-lg-9.col-md-8.col-sm-12 > div:nth-child(2) > div",
            timeout=10000
        )

        exams = []
        exam_containers = page.query_selector_all(
            "#root > div.school_container.college_listing > div:nth-child(2) > div.px-0.row > div.px-0.px-md-3.col-lg-9.col-md-8.col-sm-12 > div:nth-child(2) > div"
        )

        for exam in exam_containers:
            # Extract title
            title_el = exam.query_selector("div.exam_listing_info > div.exam_detail.d-flex > div.school_infooo > div.title > h2 > a")
            title = title_el.inner_text().strip() if title_el else "N/A"

            # Extract application date
            date_el = exam.query_selector("div.exam_listing_info > div.admission_correction > div.online_offline")
            application_date = date_el.inner_text().strip() if date_el else "N/A"

            # Extract exam type (Online/Offline)
            exam_type_el = exam.query_selector("div.exam_listing_info > div.exam_detail.d-flex > div.school_infooo > div.offline ul li:nth-child(1)")
            exam_type = exam_type_el.inner_text().strip() if exam_type_el else "N/A"

            # Extract exam level (National/State)
            level_el = exam.query_selector("div.exam_listing_info > div.exam_detail.d-flex > div.school_infooo > div.offline ul li:nth-child(2)")
            exam_level = level_el.inner_text().strip() if level_el else "N/A"

            # Extract frequency (1 time/year, etc.)
            frequency_el = exam.query_selector("div.exam_listing_info > div.exam_detail.d-flex > div.school_infooo > div.offline ul li:nth-child(3)")
            frequency = frequency_el.inner_text().strip() if frequency_el else "N/A"

            # Extract conducting body
            body_el = exam.query_selector("div.exam_listing_info > div.exam_detail.d-flex > div.school_infooo > div.offline ul li:nth-child(4)")
            conducting_body = body_el.inner_text().strip() if body_el else "N/A"

            # Extract number of accepting colleges
            colleges_el = exam.query_selector("div.exam_listing_info > div.exam_detail.d-flex > div.school_infooo > div.offline ul li:nth-child(5)")
            accepting_colleges = colleges_el.inner_text().strip() if colleges_el else "N/A"

            # Extract total seats
            seats_el = exam.query_selector("div.exam_listing_info > div.exam_detail.d-flex > div.school_infooo > div.offline ul li:nth-child(6)")
            total_seats = seats_el.inner_text().strip() if seats_el else "N/A"

            # Generate exam detail page link
            exam_link = format_exam_link(title) if title != "N/A" else "N/A"

            exams.append({
                "title": title,
                "application_date": application_date,
                "exam_link": exam_link,
                "exam_type": exam_type,
                "exam_level": exam_level,
                "frequency": frequency,
                "conducting_body": conducting_body,
                "accepting_colleges": accepting_colleges,
                "total_seats": total_seats
            })

        browser.close()

    return exams


In [None]:
from fastapi import FastAPI, Query
from playwright.sync_api import sync_playwright
import asyncio
import sys

# Fix for Windows event loop
if sys.platform == "win32":
    asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())

app = FastAPI()

def format_exam_link(title: str):
    """Formats the exam detail page link based on the title."""
    title_part = title.split(" - ")[0]  # Take only part before "-"
    formatted_title = title_part.lower().replace(" ", "-")
    return f"https://engineering.careers360.com/exams/{formatted_title}"

def search_and_scrape_exams(exam_name: str):
    url = "https://engineering.careers360.com/exams"

    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        page = browser.new_page()
        page.goto(url, timeout=60000)

        # Search for the exam
        search_box = page.query_selector("#top_content_area input")
        if search_box:
            search_box.fill(exam_name)
            search_box.press("Enter")
            page.wait_for_load_state("networkidle")

        # Wait for search results to load
        page.wait_for_selector("div.examListing_card.position-relative", timeout=10000)

        exams = []
        exam_containers = page.query_selector_all("div.examListing_card.position-relative")

        for exam in exam_containers:
            title_el = exam.query_selector("div.title > h2 > a")
            title = title_el.inner_text().strip() if title_el else "N/A"

            date_el = exam.query_selector("div.online_offline")
            application_date = date_el.inner_text().strip() if date_el else "N/A"

            details_list = exam.query_selector_all("div.offline > ul > li")
            details = [li.inner_text().strip() for li in details_list]

            exam_link = format_exam_link(title) if title != "N/A" else "N/A"

            exams.append({
                "title": title,
                "application_date": application_date,
                "exam_link": exam_link,
                "details": details
            })

        browser.close()
    
    return exams

@app.get("/search_exams")
def get_exams(exam_name: str = Query(..., title="Exam Name", description="Enter the exam name to search")):
    return {"exams": search_and_scrape_exams(exam_name)}


In [None]:
from fastapi import FastAPI, Query
from playwright.sync_api import sync_playwright
import asyncio
import sys
from typing import List

# Fix for Windows event loop
if sys.platform == "win32":
    asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())

app = FastAPI()

CATEGORY_URLS = {
    "engineering": "https://engineering.careers360.com/exams",
    "management": "https://bschool.careers360.com/exams",
    "law": "https://law.careers360.com/exams",
    "design": "https://design.careers360.com/exams"
}

def format_exam_link(title: str, base_url: str):
    """Formats the exam detail page link based on the title."""
    title_part = title.split(" - ")[0]  # Take only part before "-"
    formatted_title = title_part.lower().replace(" ", "-")
    return f"{base_url}/{formatted_title}"

def scrape_exams_from_page(page, base_url):
    """Extracts exam details from the current page."""
    exams = []
    exam_containers = page.query_selector_all("div.exam_listing_info")

    for exam in exam_containers:
        title_el = exam.query_selector("div.exam_detail.d-flex > div.school_infooo > div.title > h2 > a")
        title = title_el.inner_text().strip() if title_el else "N/A"

        date_el = exam.query_selector("div.admission_correction > div.online_offline")
        application_date = date_el.inner_text().strip() if date_el else "N/A"

        exam_type_el = exam.query_selector("div.offline ul li:nth-child(1)")
        exam_type = exam_type_el.inner_text().strip() if exam_type_el else "N/A"

        level_el = exam.query_selector("div.offline ul li:nth-child(2)")
        exam_level = level_el.inner_text().strip() if level_el else "N/A"

        frequency_el = exam.query_selector("div.offline ul li:nth-child(3)")
        frequency = frequency_el.inner_text().strip() if frequency_el else "N/A"

        body_el = exam.query_selector("div.offline ul li:nth-child(4)")
        conducting_body = body_el.inner_text().strip() if body_el else "N/A"

        colleges_el = exam.query_selector("div.offline ul li:nth-child(5)")
        accepting_colleges = colleges_el.inner_text().strip() if colleges_el else "N/A"

        seats_el = exam.query_selector("div.offline ul li:nth-child(6)")
        total_seats = seats_el.inner_text().strip() if seats_el else "N/A"

        exam_link = format_exam_link(title, base_url) if title != "N/A" else "N/A"

        exams.append({
            "title": title,
            "application_date": application_date,
            "exam_link": exam_link,
            "exam_type": exam_type,
            "exam_level": exam_level,
            "frequency": frequency,
            "conducting_body": conducting_body,
            "accepting_colleges": accepting_colleges,
            "total_seats": total_seats
        })
    
    return exams

def scrape_all_exams(category: str):
    """Scrapes all exam details from the selected category including pagination."""
    if category not in CATEGORY_URLS:
        return {"error": "Invalid category. Choose from: engineering, management, law, design."}
    
    base_url = CATEGORY_URLS[category]
    url = base_url
    exams = []
    
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        page = browser.new_page()
        
        while url:
            page.goto(url, timeout=60000)
            page.wait_for_selector("div.exam_listing_info", timeout=10000)
            exams.extend(scrape_exams_from_page(page, base_url))
            
            next_page_el = page.query_selector("a.pagination_list_last")
            url = next_page_el.get_attribute("href") if next_page_el else None
        
        browser.close()
    
    return exams

@app.get("/get_exams")
def get_exams(category: str = Query(..., title="Exam Category", description="Choose from: engineering, management, law, design")):
    return {"exams": scrape_all_exams(category)}
