In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import UnexpectedAlertPresentException, NoAlertPresentException, StaleElementReferenceException
from bs4 import BeautifulSoup
import csv
import time

options = webdriver.ChromeOptions()
options.add_argument("--headless")  
driver = webdriver.Chrome(options=options)

base_url = "https://greenbook.nafdac.gov.ng"
driver.get(base_url)

data = []

def get_data_from_page(soup):
    table = soup.find("table", {"id": "DataTables_Table_0"})
    rows = table.find_all("tr")
    for row in rows:
        cells = row.find_all("td")
        if len(cells) >= 9: 
            try:
                row_data = {
                    "Product Name": cells[0].text.strip(),
                    "Active Ingredient": cells[1].text.strip(),
                    "NRN": cells[2].text.strip(),
                    "form": cells[3].text.strip(),
                    "ROA": cells[4].text.strip(),
                    "Strengths": cells[5].text.strip(),
                    "Applicant Name": cells[6].text.strip(),
                    "approval_date": cells[7].text.strip(),
                    "status": cells[8].text.strip(),
                }
                data.append(row_data)
                print(row_data)
            except IndexError as e:
                print(f"Skipping row due to IndexError: {e}")
                continue

def navigate_to_next_page():
    try:
        # Chờ để phần tử có thể nhấp được
        next_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, "li.next"))
        )
        next_button_class = next_button.get_attribute("class")
        if "disabled" in next_button_class:
            return False
        # Cuộn đến phần tử
        driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
        time.sleep(1) 
        # Nhấp vào nút
        next_button.click()
        time.sleep(5)  # Đợi để trang tải
        return True
    except StaleElementReferenceException:
        # Lấy lại phần tử và thử lại
        try:
            next_button = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "li.next"))
            )
            driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
            time.sleep(1) 
            next_button.click()
            time.sleep(5)
            return True
        except Exception as e:
            print(f"Error navigating to the next page: {e}")
            return False
    except Exception as e:
        print(f"Error navigating to the next page: {e}")
        return False

def set_results_per_page():
    try:
        results_per_page_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.NAME, "DataTables_Table_0_length"))
        )
        results_per_page_button.click()
        option_100 = driver.find_element(By.XPATH, "//option[@value='100']")
        option_100.click()
        time.sleep(5) 
    except Exception as e:
        print(f"Error setting results per page: {e}")

def handle_alert():
    try:
        alert = driver.switch_to.alert
        alert.accept()
        print("Alert accepted.")
    except NoAlertPresentException:
        pass

def save_data_to_csv():
    if data:
        with open("data.csv", "w", newline="", encoding="utf-8") as f:
            writer = csv.DictWriter(f, fieldnames=["Product Name ", "Active Ingredient", "NRN", "form", "ROA", "Strengths", "Applicant Name", "approval_date", "status"])
            writer.writeheader()
            writer.writerows(data)
        print("Data saved to data.csv")

set_results_per_page()

while True:
    try:
        time.sleep(5)  # Đợi một chút để trang tải
        soup = BeautifulSoup(driver.page_source, "html.parser")
        get_data_from_page(soup)
        
        if not navigate_to_next_page():
            break
    except UnexpectedAlertPresentException as e:
        print(f"Unexpected alert: {e}")
        handle_alert()  # Xử lý cảnh báo và tiếp tục
        save_data_to_csv()  # Lưu dữ liệu tạm thời
    except Exception as e:
        print(f"An error occurred: {e}")
        handle_alert()  # Xử lý cảnh báo và tiếp tục
        save_data_to_csv()  # Lưu dữ liệu tạm thời

driver.quit()

# Lưu dữ liệu cuối cùng vào file CSV
save_data_to_csv()


{'name': '4 Oral Powder', 'ingredient': 'Oral Rehydration Salts', 'product_id': 'B4-5544', 'form': 'Powder', 'route': 'Oral', 'strength': '20.5 g', 'applicant': 'Geneith Pharmaceuticals Limited', 'approval_date': '2021-03-01', 'status': 'Active'}
{'name': '4.3% Dextrose & Normal Saline Infusion', 'ingredient': 'Glucose; Sodium Chloride', 'product_id': 'A11-0238', 'form': 'Solution for infusion', 'route': 'Intravenous', 'strength': '4.3%; 0.9%', 'applicant': 'Fidson Healthcare PLC', 'approval_date': '2019-02-09', 'status': 'Inactive'}
{'name': '5% Dextrose & Normal Saline Infusion', 'ingredient': 'Glucose; Sodium Chloride', 'product_id': 'A11-0236', 'form': 'Solution for infusion', 'route': 'Intravenous', 'strength': '5%; 0.9%', 'applicant': 'Fidson Healthcare PLC', 'approval_date': '2019-02-09', 'status': 'Inactive'}
{'name': '5% DEXTROSE AND NORMAL SALINE INFUSION', 'ingredient': 'Dextrose anhydrous; Sodium Chloride', 'product_id': 'A11-0236', 'form': 'Intravenous infusion', 'route': 