## Insatlling and Loading Libraries

In [3]:
# ! pip install requests beautifulsoup4 pandas
# ! pip install selenium
# ! pip install webdriver-manager

Collecting webdriver-manager
  Using cached webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)
Using cached webdriver_manager-4.0.2-py2.py3-none-any.whl (27 kB)
Installing collected packages: webdriver-manager
Successfully installed webdriver-manager-4.0.2




In [1]:
import requests
from bs4 import BeautifulSoup


import csv
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException
from selenium.webdriver.common.action_chains import ActionChains
from webdriver_manager.chrome import ChromeDriverManager

## Data Extraction 

Note: The MAUDE Website only loads 500 instances at one time, so multiple queries are being used to extract a sufficient volume of data 

Running to Extract data for 8/23/2024 - 8/30/2024

In [68]:

# Set up the Chrome driver using Service
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)

# Open a webpage
driver.get("https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfmaude/results.cfm")

# Wait for the Manufacturer input field to be present
wait = WebDriverWait(driver, 10)

# Locate the input field for Manufacturer using its ID
manufacturer_input = wait.until(EC.presence_of_element_located((By.ID, "Manufacturer")))

# Enter "Abbott Vascular" into the Manufacturer input field
manufacturer_input.send_keys("Abbott Vascular")

# Locate the Date Report Received by FDA input field and change the date to "08/01/2023"
report_date_from_input = wait.until(EC.presence_of_element_located((By.ID, "ReportDateFrom")))
report_date_from_input.clear()  # Clear the current date
report_date_from_input.send_keys("08/01/2023")  # Enter the desired date

# Wait for the Records per Report Page dropdown and select "100"
records_dropdown = wait.until(EC.presence_of_element_located((By.ID, "pagenum")))
Select(records_dropdown).select_by_visible_text("100")

# Wait for the Search button and click it
search_button = wait.until(EC.element_to_be_clickable((By.NAME, "Search")))
search_button.click()

# Helper function to safely extract text with multiple field options
def safe_extract(field_options):
    for xpath in field_options:
        try:
            element = driver.find_element(By.XPATH, xpath)
            return element.get_attribute("innerText").strip()
        except NoSuchElementException:
            continue
    return "N/A"

# Function to extract data from a page
def extract_data():
    all_data = []
    brand_links = driver.find_elements(By.XPATH, "//a[contains(@style, 'text-decoration:underline')]")

    for i in range(len(brand_links)):
        try:
            brand_name = brand_links[i].text
            brand_links[i].click()
            time.sleep(2)  # Adjust sleep time if necessary

            mdrfoi_id = driver.current_url.split('mdrfoi__id=')[1].split('&')[0]

            # Define possible field options for each data point
            model_number_options = ["//strong[text()='Model Number']/..", "//strong[text()='Catalog Number']/.."]
            device_problem_options = ["//strong[text()='Device Problem']/..", "//strong[text()='Device Problems']/.."]
            patient_problem_options = ["//strong[text()='Patient Problem']/.."]
            event_date_options = ["//strong[text()='Event Date']/.."]
            event_type_options = ["//strong[text()='Event Type']/.."]
            manufacturer_narrative_options = ["//div[contains(@style, 'word-wrap: break-word')]"]
            event_description_options = ["/html/body/div[3]/maxamineignore/div[2]/div[2]/span[2]/table[2]/tbody/tr/td/table/tbody/tr[2]/td/table/tbody/tr[10]/td/div"]

            # Extract the required data
            model_number = safe_extract(model_number_options)
            device_problem = safe_extract(device_problem_options)
            patient_problem = safe_extract(patient_problem_options)
            event_date = safe_extract(event_date_options)
            event_type = safe_extract(event_type_options)
            manufacturer_narrative = safe_extract(manufacturer_narrative_options)
            event_description = safe_extract(event_description_options)

            all_data.append([mdrfoi_id, brand_name, model_number, device_problem, patient_problem, event_date, event_type, manufacturer_narrative, event_description])

            driver.back()
            brand_links = driver.find_elements(By.XPATH, "//a[contains(@style, 'text-decoration:underline')]")

        except Exception as e:
            print(f"An error occurred: {e}")
            continue

    return all_data

# Function to handle pagination and extract data from multiple pages
def extract_data_from_all_pages():
    all_data = []
    page_number = 1

    while True:
        print(f"Extracting data from page {page_number}")
        all_data.extend(extract_data())

        try:
            # Locate the "Next" page link (or pagination link) and click it
            next_page_link = driver.find_element(By.XPATH, "//a[@title='Records {} to {}']".format(page_number * 100 + 1, (page_number + 1) * 100))
            next_page_link.click()

            # Wait for the new page to load
            time.sleep(2)  # Adjust as needed
            page_number += 1

        except NoSuchElementException:
            print("No more pages to navigate.")
            break

    return all_data

# Extract data from all pages and save to a CSV file
extracted_data = extract_data_from_all_pages()

with open('maude_data.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['MDRFOI ID', 'Brand Name', 'Model Number', 'Device Problem', 'Patient Problem', 'Event Date', 'Event Type', 'Manufacturer Narrative', 'Event Description'])
    writer.writerows(extracted_data)

print("Data extraction complete. The results have been saved to 'maude_data.csv'.")

Extracting data from page 1
Extracting data from page 2
Extracting data from page 3
Extracting data from page 4
Extracting data from page 5
An error occurred: list index out of range
No more pages to navigate.
Data extraction complete. The results have been saved to 'maude_data.csv'.


In [5]:
# Read the CSV file
df = pd.read_csv('maude_data.csv')

# Display the contents
print(df.tail())

     MDRFOI ID                      Brand Name                 Model Number  \
494   20049888             PERCLOSE¿ PROSTYLE¿        Model Number 12773-02   
495   20049398                   XIENCE ALPINE      Model Number 1120300-28   
496   20049388  TRICLIP DELIVERY SYSTEM (TCDS)  Catalog Number UNKNOWN TCDS   
497   20049311             PERCLOSE¿ PROSTYLE¿        Model Number 12773-03   
498   20048862             PERCLOSE¿ PROSTYLE¿        Model Number 12773-03   

                                        Device Problem  \
494             Device Problem Failure to Cycle (1142)   
495  Device Problem Adverse Event Without Identifie...   
496  Device Problem Adverse Event Without Identifie...   
497             Device Problem Failure to Cycle (1142)   
498             Device Problem Failure to Cycle (1142)   

                                       Patient Problem             Event Date  \
494  Patient Problem No Clinical Signs, Symptoms or...  Event Date 08/07/2024   
495         Pa

In [71]:
print(len(df))

499


Rerunning to get data for 8/12/2024 8/23/2024 

In [74]:

# Set up the Chrome driver using Service
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)

# Open a webpage
driver.get("https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfmaude/results.cfm")

# Wait for the Manufacturer input field to be present
wait = WebDriverWait(driver, 10)

# Locate the input field for Manufacturer using its ID
manufacturer_input = wait.until(EC.presence_of_element_located((By.ID, "Manufacturer")))

# Enter "Abbott Vascular" into the Manufacturer input field
manufacturer_input.send_keys("Abbott Vascular")

# Locate the Date Report Received by FDA input field and change the date to "08/01/2023"
report_date_from_input = wait.until(EC.presence_of_element_located((By.ID, "ReportDateFrom")))
report_date_from_input.clear()  # Clear the current date
report_date_from_input.send_keys("08/01/2023")  # Enter the desired date

# Locate the "To Date" input field and set the value
report_date_to_input = wait.until(EC.presence_of_element_located((By.ID, "ReportDateTo")))
report_date_to_input.clear()  # Clear the current value
report_date_to_input.send_keys("08/22/2024")  # Set the new end date

# Wait for the Records per Report Page dropdown and select "100"
records_dropdown = wait.until(EC.presence_of_element_located((By.ID, "pagenum")))
Select(records_dropdown).select_by_visible_text("100")

# Wait for the Search button and click it
search_button = wait.until(EC.element_to_be_clickable((By.NAME, "Search")))
search_button.click()

# Helper function to safely extract text with multiple field options
def safe_extract(field_options):
    for xpath in field_options:
        try:
            element = driver.find_element(By.XPATH, xpath)
            return element.get_attribute("innerText").strip()
        except NoSuchElementException:
            continue
    return "N/A"

# Function to extract data from a page
def extract_data():
    all_data = []
    brand_links = driver.find_elements(By.XPATH, "//a[contains(@style, 'text-decoration:underline')]")

    for i in range(len(brand_links)):
        try:
            brand_name = brand_links[i].text
            brand_links[i].click()
            time.sleep(2)  # Adjust sleep time if necessary

            mdrfoi_id = driver.current_url.split('mdrfoi__id=')[1].split('&')[0]

            # Define possible field options for each data point
            model_number_options = ["//strong[text()='Model Number']/..", "//strong[text()='Catalog Number']/.."]
            device_problem_options = ["//strong[text()='Device Problem']/..", "//strong[text()='Device Problems']/.."]
            patient_problem_options = ["//strong[text()='Patient Problem']/.."]
            event_date_options = ["//strong[text()='Event Date']/.."]
            event_type_options = ["//strong[text()='Event Type']/.."]
            manufacturer_narrative_options = ["//div[contains(@style, 'word-wrap: break-word')]"]
            event_description_options = ["/html/body/div[3]/maxamineignore/div[2]/div[2]/span[2]/table[2]/tbody/tr/td/table/tbody/tr[2]/td/table/tbody/tr[10]/td/div"]

            # Extract the required data
            model_number = safe_extract(model_number_options)
            device_problem = safe_extract(device_problem_options)
            patient_problem = safe_extract(patient_problem_options)
            event_date = safe_extract(event_date_options)
            event_type = safe_extract(event_type_options)
            manufacturer_narrative = safe_extract(manufacturer_narrative_options)
            event_description = safe_extract(event_description_options)

            all_data.append([mdrfoi_id, brand_name, model_number, device_problem, patient_problem, event_date, event_type, manufacturer_narrative, event_description])

            driver.back()
            brand_links = driver.find_elements(By.XPATH, "//a[contains(@style, 'text-decoration:underline')]")

        except Exception as e:
            print(f"An error occurred: {e}")
            continue

    return all_data

# Function to handle pagination and extract data from multiple pages
def extract_data_from_all_pages():
    all_data = []
    page_number = 1

    while True:
        print(f"Extracting data from page {page_number}")
        all_data.extend(extract_data())

        try:
            # Locate the "Next" page link (or pagination link) and click it
            next_page_link = driver.find_element(By.XPATH, "//a[@title='Records {} to {}']".format(page_number * 100 + 1, (page_number + 1) * 100))
            next_page_link.click()

            # Wait for the new page to load
            time.sleep(2)  # Adjust as needed
            page_number += 1

        except NoSuchElementException:
            print("No more pages to navigate.")
            break

    return all_data

# Extract data from all pages and save to a CSV file
extracted_data = extract_data_from_all_pages()

with open('maude_data2.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['MDRFOI ID', 'Brand Name', 'Model Number', 'Device Problem', 'Patient Problem', 'Event Date', 'Event Type', 'Manufacturer Narrative', 'Event Description'])
    writer.writerows(extracted_data)

print("Data extraction complete. The results have been saved to 'maude_data2.csv'.")

TimeoutException: Message: 
Stacktrace:
	GetHandleVerifier [0x00277143+25587]
	(No symbol) [0x0020A2E4]
	(No symbol) [0x00102113]
	(No symbol) [0x00146F62]
	(No symbol) [0x001471AB]
	(No symbol) [0x00187852]
	(No symbol) [0x0016ABE4]
	(No symbol) [0x00185370]
	(No symbol) [0x0016A936]
	(No symbol) [0x0013BA73]
	(No symbol) [0x0013C4CD]
	GetHandleVerifier [0x00554C63+3030803]
	GetHandleVerifier [0x005A6B99+3366473]
	GetHandleVerifier [0x003095F2+624802]
	GetHandleVerifier [0x00310E6C+655644]
	(No symbol) [0x00212C9D]
	(No symbol) [0x0020FD68]
	(No symbol) [0x0020FF05]
	(No symbol) [0x00202336]
	BaseThreadInitThunk [0x75D17BA9+25]
	RtlInitializeExceptionChain [0x76F5C11B+107]
	RtlClearBits [0x76F5C09F+191]
	(No symbol) [0x00000000]


In [4]:
# Read the CSV file
df2 = pd.read_csv('maude_data2.csv')

# Display the contents
print(df2.head())

   MDRFOI ID                                Brand Name  \
0   20050146  PERCLOSE VASCULAR SUTURE DELIVERY DEVICE   
1   20043827                       PERCLOSE¿ PROSTYLE¿   
2   20043377                       PERCLOSE¿ PROSTYLE¿   
3   20043235                       PERCLOSE¿ PROSTYLE¿   
4   20043173                                 MITRACLIP   

                 Model Number                             Device Problem  \
0                         NaN                                        NaN   
1       Model Number 12773-02     Device Problem Failure to Cycle (1142)   
2       Model Number 12773-03     Device Problem Failure to Cycle (1142)   
3       Model Number 12773-03  Device Problem Material Separation (1562)   
4  Catalog Number CDS0705-XTW  Device Problem Difficult to Remove (1528)   

                                     Patient Problem             Event Date  \
0         Patient Problem Hemorrhage/Bleeding (1888)  Event Date 08/13/2024   
1  Patient Problem No Clinical Signs

Rerunning to get data for 7/31/2024 - 8/11/2024 

In [49]:
# Set up the Chrome driver using Service
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)

# Open a webpage
driver.get("https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfmaude/results.cfm")

# Wait for the Manufacturer input field to be present
wait = WebDriverWait(driver, 10)

# Locate the input field for Manufacturer using its ID
manufacturer_input = wait.until(EC.presence_of_element_located((By.ID, "Manufacturer")))

# Enter "Abbott Vascular" into the Manufacturer input field
manufacturer_input.send_keys("Abbott Vascular")

# Locate the Date Report Received by FDA input field and change the date to "08/01/2023"
report_date_from_input = wait.until(EC.presence_of_element_located((By.ID, "ReportDateFrom")))
report_date_from_input.clear()  # Clear the current date
report_date_from_input.send_keys("08/01/2023")  # Enter the desired date

# Locate the "To Date" input field and set the value
report_date_to_input = wait.until(EC.presence_of_element_located((By.ID, "ReportDateTo")))
report_date_to_input.clear()  # Clear the current value
report_date_to_input.send_keys("08/11/2024")  # Set the new end date

# Wait for the Records per Report Page dropdown and select "100"
records_dropdown = wait.until(EC.presence_of_element_located((By.ID, "pagenum")))
Select(records_dropdown).select_by_visible_text("100")

# Wait for the Search button and click it
search_button = wait.until(EC.element_to_be_clickable((By.NAME, "Search")))
search_button.click()

# Helper function to safely extract text with multiple field options
def safe_extract(field_options):
    for xpath in field_options:
        try:
            element = driver.find_element(By.XPATH, xpath)
            return element.get_attribute("innerText").strip()
        except NoSuchElementException:
            continue
    return "N/A"

# Function to extract data from a page
def extract_data():
    all_data = []
    brand_links = driver.find_elements(By.XPATH, "//a[contains(@style, 'text-decoration:underline')]")

    for i in range(len(brand_links)):
        try:
            brand_name = brand_links[i].text
            brand_links[i].click()
            time.sleep(2)  # Adjust sleep time if necessary

            mdrfoi_id = driver.current_url.split('mdrfoi__id=')[1].split('&')[0]

            # Define possible field options for each data point
            model_number_options = ["//strong[text()='Model Number']/..", "//strong[text()='Catalog Number']/.."]
            device_problem_options = ["//strong[text()='Device Problem']/..", "//strong[text()='Device Problems']/.."]
            patient_problem_options = ["//strong[text()='Patient Problem']/.."]
            event_date_options = ["//strong[text()='Event Date']/.."]
            event_type_options = ["//strong[text()='Event Type']/.."]
            manufacturer_narrative_options = ["//div[contains(@style, 'word-wrap: break-word')]"]
            event_description_options = ["/html/body/div[3]/maxamineignore/div[2]/div[2]/span[2]/table[2]/tbody/tr/td/table/tbody/tr[2]/td/table/tbody/tr[10]/td/div"]

            # Extract the required data
            model_number = safe_extract(model_number_options)
            device_problems = safe_extract(device_problems_options)
            patient_problem = safe_extract(patient_problem_options)
            event_date = safe_extract(event_date_options)
            event_type = safe_extract(event_type_options)
            manufacturer_narrative = safe_extract(manufacturer_narrative_options)
            event_description = safe_extract(event_description_options)

            all_data.append([mdrfoi_id, brand_name, model_number, device_problems, patient_problem, event_date, event_type, manufacturer_narrative, event_description])

            driver.back()
            brand_links = driver.find_elements(By.XPATH, "//a[contains(@style, 'text-decoration:underline')]")

        except Exception as e:
            print(f"An error occurred: {e}")
            continue

    return all_data

# Function to handle pagination and extract data from multiple pages
def extract_data_from_all_pages():
    all_data = []
    page_number = 1

    while True:
        print(f"Extracting data from page {page_number}")
        all_data.extend(extract_data())

        try:
            # Locate the "Next" page link (or pagination link) and click it
            next_page_link = driver.find_element(By.XPATH, "//a[@title='Records {} to {}']".format(page_number * 100 + 1, (page_number + 1) * 100))
            next_page_link.click()

            # Wait for the new page to load
            time.sleep(2)  # Adjust as needed
            page_number += 1

        except NoSuchElementException:
            print("No more pages to navigate.")
            break

    return all_data

# Extract data from all pages and save to a CSV file
extracted_data = extract_data_from_all_pages()

with open('maude_data3.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['MDRFOI ID', 'Brand Name', 'Model Number', 'Device Problems', 'Patient Problem', 'Event Date', 'Event Type', 'Manufacturer Narrative', 'Event Description'])
    writer.writerows(extracted_data)

print("Data extraction complete. The results have been saved to 'maude_data3.csv'.")

Extracting data from page 1
Extracting data from page 2
Extracting data from page 3
Extracting data from page 4
Extracting data from page 5
No more pages to navigate.
Data extraction complete. The results have been saved to 'maude_data3.csv'.


In [6]:
# Read the CSV file
df3 = pd.read_csv('maude_data3.csv')

# Display the contents
print(df3.tail(5))

     MDRFOI ID           Brand Name           Model Number Device Problems  \
495   19878030  PERCLOSE¿ PROSTYLE¿  Model Number 12773-02             NaN   
496   19878023  PERCLOSE¿ PROSTYLE¿  Model Number 12773-02             NaN   
497   19878020  PERCLOSE¿ PROSTYLE¿  Model Number 12773-02             NaN   
498   19878019  PERCLOSE¿ PROSTYLE¿  Model Number 12773-02             NaN   
499   19878018  PERCLOSE¿ PROSTYLE¿  Model Number 12773-02             NaN   

                                       Patient Problem             Event Date  \
495  Patient Problem No Clinical Signs, Symptoms or...  Event Date 07/09/2024   
496  Patient Problem No Clinical Signs, Symptoms or...  Event Date 07/09/2024   
497  Patient Problem No Clinical Signs, Symptoms or...  Event Date 07/11/2024   
498  Patient Problem No Clinical Signs, Symptoms or...  Event Date 07/11/2024   
499  Patient Problem No Clinical Signs, Symptoms or...  Event Date 07/11/2024   

             Event Type                     

Rerunning to get data for 7/30/2024 

In [14]:
# Rerunning to get data for 7/30/2024 
# Set up the Chrome driver using Service
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)

# Open the webpage
driver.get("https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfmaude/results.cfm")

# Wait for the Manufacturer input field to be present
wait = WebDriverWait(driver, 10)
manufacturer_input = wait.until(EC.presence_of_element_located((By.ID, "Manufacturer")))

# Enter "Abbott Vascular" into the Manufacturer input field
manufacturer_input.send_keys("Abbott Vascular")

# Set the date range
report_date_from_input = wait.until(EC.presence_of_element_located((By.ID, "ReportDateFrom")))
report_date_from_input.clear()
report_date_from_input.send_keys("08/01/2023")

report_date_to_input = wait.until(EC.presence_of_element_located((By.ID, "ReportDateTo")))
report_date_to_input.clear()
report_date_to_input.send_keys("07/30/2024")

# Select "100" records per page
records_dropdown = wait.until(EC.presence_of_element_located((By.ID, "pagenum")))
Select(records_dropdown).select_by_visible_text("100")

# Click the search button
search_button = wait.until(EC.element_to_be_clickable((By.NAME, "Search")))
search_button.click()

# Helper function to safely extract text with multiple field options
def safe_extract(field_options):
    for xpath in field_options:
        try:
            element = driver.find_element(By.XPATH, xpath)
            return element.get_attribute("innerText").strip()
        except NoSuchElementException:
            continue
    return "N/A"

# Function to extract data from a page
def extract_data():
    all_data = []
    brand_links = driver.find_elements(By.XPATH, "//a[contains(@style, 'text-decoration:underline')]")

    for i in range(len(brand_links)):
        try:
            brand_name = brand_links[i].text
            brand_links[i].click()
            time.sleep(2)  # Adjust sleep time if necessary

            mdrfoi_id = driver.current_url.split('mdrfoi__id=')[1].split('&')[0]

            # Define possible field options for each data point
            model_number_options = ["//strong[text()='Model Number']/..", "//strong[text()='Catalog Number']/.."]
            device_problem_options = ["//strong[text()='Device Problem']/..", "//strong[text()='Device Problems']/.."]
            patient_problem_options = ["//strong[text()='Patient Problem']/.."]
            event_date_options = ["//strong[text()='Event Date']/.."]
            event_type_options = ["//strong[text()='Event Type']/.."]
            manufacturer_narrative_options = ["//div[contains(@style, 'word-wrap: break-word')]"]
            event_description_options = ["/html/body/div[3]/maxamineignore/div[2]/div[2]/span[2]/table[2]/tbody/tr/td/table/tbody/tr[2]/td/table/tbody/tr[10]/td/div"]

            # Extract the required data
            model_number = safe_extract(model_number_options)
            device_problems = safe_extract(device_problem_options)
            patient_problem = safe_extract(patient_problem_options)
            event_date = safe_extract(event_date_options)
            event_type = safe_extract(event_type_options)
            manufacturer_narrative = safe_extract(manufacturer_narrative_options)
            event_description = safe_extract(event_description_options)

            all_data.append([mdrfoi_id, brand_name, model_number, device_problems, patient_problem, event_date, event_type, manufacturer_narrative, event_description])

            driver.back()
            brand_links = driver.find_elements(By.XPATH, "//a[contains(@style, 'text-decoration:underline')]")

        except Exception as e:
            print(f"An error occurred: {e}")
            continue

    return all_data

# Function to handle pagination and extract data from multiple pages
def extract_data_from_all_pages():
    all_data = []
    page_number = 1

    while True:
        print(f"Extracting data from page {page_number}")
        all_data.extend(extract_data())

        try:
            # Locate the "Next" page link (or pagination link) and click it
            next_page_link = driver.find_element(By.XPATH, "//a[@title='Records {} to {}']".format(page_number * 100 + 1, (page_number + 1) * 100))
            next_page_link.click()

            # Wait for the new page to load
            time.sleep(2)  # Adjust as needed
            page_number += 1

        except NoSuchElementException:
            print("No more pages to navigate.")
            break

    return all_data

# Extract data from all pages and save to a CSV file
extracted_data = extract_data_from_all_pages()

with open('maude_data4.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['MDRFOI ID', 'Brand Name', 'Model Number', 'Device Problems', 'Patient Problem', 'Event Date', 'Event Type', 'Manufacturer Narrative', 'Event Description'])
    writer.writerows(extracted_data)

print("Data extraction complete. The results have been saved to 'maude_data4.csv'.")

Extracting data from page 1
Extracting data from page 2
Extracting data from page 3
An error occurred: list index out of range
An error occurred: list index out of range
An error occurred: list index out of range
An error occurred: list index out of range
Extracting data from page 4
Extracting data from page 5
No more pages to navigate.
Data extraction complete. The results have been saved to 'maude_data4.csv'.


In [15]:
# Read the CSV file
df4 = pd.read_csv('maude_data4.csv')

# Display the contents
print(df4.tail(5))

     MDRFOI ID                                Brand Name  \
491   19794447  HI-TORQUE BALANCE MIDDLEWEIGHT UNIVERSAL   
492   19794415  HI-TORQUE BALANCE MIDDLEWEIGHT UNIVERSAL   
493   19794233  HI-TORQUE BALANCE MIDDLEWEIGHT UNIVERSAL   
494   19794226  HI-TORQUE BALANCE MIDDLEWEIGHT UNIVERSAL   
495   19794117  HI-TORQUE BALANCE MIDDLEWEIGHT UNIVERSAL   

                          Model Number  \
491  Model Number UNK UNIVERSAL BMW II   
492  Model Number UNK UNIVERSAL BMW II   
493  Model Number UNK UNIVERSAL BMW II   
494  Model Number UNK UNIVERSAL BMW II   
495  Model Number UNK UNIVERSAL BMW II   

                                       Device Problems  \
491  Device Problems Break (1069); Difficult to Rem...   
492  Device Problems Break (1069); Difficult to Rem...   
493  Device Problems Break (1069); Difficult to Rem...   
494  Device Problems Break (1069); Difficult to Rem...   
495  Device Problems Break (1069); Difficult to Rem...   

                                     

# Combining all 4 CSVs 

In [16]:
# Combining df, df2, df3 DataFrames
combined_df = pd.concat([df, df2, df3, df4], ignore_index=True)
combined_df.head()

Unnamed: 0,MDRFOI ID,Brand Name,Model Number,Device Problem,Patient Problem,Event Date,Event Type,Manufacturer Narrative,Event Description,Device Problems
0,20112651,HI-TORQUE BALANCE MIDDLEWEIGHT UNIVERSAL,Model Number 1009664J,Device Problems Difficult to Remove (1528); Ma...,Patient Problem Device Embedded In Tissue or P...,Event Date 08/12/2024,Event Type Injury,The device was not returned for analysis.A rev...,It was reported the procedure was to treat a l...,
1,20112634,XIENCE SKYPOINT¿,Model Number 1804225-18,Device Problems Material Rupture (1546); Diffi...,"Patient Problem No Clinical Signs, Symptoms or...",Event Date 08/08/2024,Event Type malfunction,The device was not returned for evaluation.Pro...,It was reported that the procedure was to trea...,
2,20112157,TRICLIP G4 SYSTEM,Catalog Number TCDS0302-XTW,Device Problem Incomplete Coaptation (2507),Patient Problem Tricuspid Valve Insufficiency/...,Event Date 08/14/2024,Event Type malfunction,The device was not returned for analysis.A rev...,It was reported this was a triclip procedure t...,
3,20112156,TRICLIP G4 SYSTEM,Catalog Number TCDS0302-XTW,Device Problems Positioning Failure (1158); En...,"Patient Problem No Clinical Signs, Symptoms or...",Event Date 08/12/2024,Event Type Injury,Investigation is not yet complete.A follow-up ...,It is reported this was a triclip procedure to...,
4,20111290,PERCLOSE¿ PROSTYLE¿,Model Number 12773-02,Device Problem Failure to Cycle (1142),"Patient Problem No Clinical Signs, Symptoms or...",Event Date 08/09/2024,Event Type Injury,It was reported that this was an arteriotomy c...,Manufacturer's investigation is still pending ...,


In [17]:
print(len(combined_df))

1995


In [18]:
combined_df.to_csv('combined_df.csv', index=False)