In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import re

def save_and_cleanup(driver, df, modified_excel_path):
    # Save the modified DataFrame back to Excel
    df.to_excel(modified_excel_path, index=False)
    
    # Close the browser
    driver.quit()
    
    print("Task completed successfully. Modified Excel file saved.")

def scrape_and_extract(df, count, chrome_driver_path, modified_excel_path):
    # Create a Service object for the Chrome driver
    service = Service(chrome_driver_path)

    # Configure Chrome options
    options = Options()
    options.add_argument("--start-maximized")  # Start browser maximized

    # Initialize Chrome WebDriver with service and options
    driver = webdriver.Chrome(service=service, options=options)
    wait = WebDriverWait(driver, 20)  # Adjust wait time as needed

    try:
        loop_counter = 0

        for index, row in df.iterrows():
            if loop_counter >= count:
                break

            try:
                # Open the initial website
                driver.get('https://www.cms.gov/medicare-coverage-database/search.aspx')

                # Input search criteria (adjust according to your website)
                cpt_code = row['ID']
                word = 'denied'

                cpt_input = wait.until(EC.element_to_be_clickable((By.ID, 'tbxSearchBox')))
                cpt_input.clear()
                cpt_input.send_keys(cpt_code)

                # Perform the search
                search_button = wait.until(EC.element_to_be_clickable((By.ID, 'btnSubmitSearch')))
                search_button.click()
                
                # Handle initial pop-ups, if any
                try:
                    accept_button = wait.until(EC.element_to_be_clickable((By.ID, 'btnAcceptLicense')))
                    accept_button.click()
                except Exception as e:
                    print("No initial pop-up to accept or error accepting pop-up:", e)


                # Wait for results to load (adjust according to your website)
                wait.until(EC.presence_of_element_located((By.ID, 'h3ArticleGuidanceHeader')))

                # Extract paragraphs containing the target word
                soup = BeautifulSoup(driver.page_source, 'html.parser')
                paragraphs = soup.find_all('p')  # Adjust tag based on actual structure

                similar_paragraphs = []
                for paragraph in paragraphs:
                    if re.search(r'\b{}\b'.format(re.escape(word)), paragraph.get_text(), re.IGNORECASE):
                        similar_paragraphs.append(paragraph.get_text())

                # Store results in DataFrame
                similar_paragraphs_str = '\n\n'.join(similar_paragraphs)
                df.at[index, 'Similar_Paragraphs'] = similar_paragraphs_str

                loop_counter += 1

            except Exception as e:
                print(f"Error processing row {index}: {e}")

    finally:
        save_and_cleanup(driver, df, modified_excel_path)

if __name__ == "__main__":
    excel_path = 'sample.xlsx'  # Replace with your input Excel file path
    chrome_driver_path = 'chromedriver.exe'  # Replace with your Chrome driver path
    modified_excel_path = 'modified_excel_file.xlsx'  # Replace with your desired output Excel file path
    
    # Load the Excel file
    df = pd.read_excel(excel_path)
    
    # Count the number of rows in the Excel file
    row_count = df.shape[0]
    
    # Scrape and extract data
    scrape_and_extract(df, row_count, chrome_driver_path, modified_excel_path)


In [None]:
# new code 

In [None]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

def process_cpt_code(driver, cpt_code):
    results = []
    wait = WebDriverWait(driver, 10)  # Increased timeout

    # Enter the CPT code into the search box
    search_box = wait.until(EC.presence_of_element_located((By.ID, 'tbxSearchBox')))
    search_box.clear()
    search_box.send_keys(cpt_code)
    time.sleep(0.1)  # Sleep for 0.1 seconds
    
    # Click the search button
    submit_button = driver.find_element(By.ID, 'btnSubmitSearch')
    submit_button.click()
    print(f"Submitted CPT code: {cpt_code}")
    
    time.sleep(20)  # Sleep for 0.1 seconds

    # Wait for the search results to load
    try:
        search_results_div = wait.until(EC.visibility_of_element_located((By.ID, 'searchResultsDiv')))
        print("Search results loaded successfully.")
    except TimeoutException:
        print(f"Search results not loaded for CPT code: {cpt_code}")
        return results

    # Get the total number of results
    try:
        total_results_element = driver.find_element(By.ID, 'lblTotalResults')
        total_results = int(total_results_element.text)
        print(f"Total results for CPT code {cpt_code}: {total_results}")
    except ValueError:
        print(f"Invalid total results value for CPT code: {cpt_code}")
        return results

    # Iterate through each result
    for i in range(total_results):
        article_elements = driver.find_elements(By.CLASS_NAME, 'table-title-col')
        if i < len(article_elements):
            article_elements[i].click()
            time.sleep(0.5)  # Allow time for the article to load

            # Handle initial pop-ups (e.g., accept cookies)
            try:
                accept_button = wait.until(EC.element_to_be_clickable((By.ID, 'btnAcceptLicense')))
                accept_button.click()
            except TimeoutException:
                pass

            # Search for denied/non-covered words in the article
            article_content = driver.find_element(By.ID, 'h3ArticleGuidanceHeader').text.lower()
            if any(keyword in article_content for keyword in ['denied', 'non-covered', 'not covered']):
                article_id = driver.find_element(By.ID, 'lblTitleId').text
                results.append({'cpt_code': cpt_code, 'article_id': article_id})

            # Return to search results page
            driver.back()

    return results

def main():
    # Load the Excel file
    input_file = 'sample.xlsx'
    output_file = 'output.xlsx'
    df = pd.read_excel(input_file)

    # Ensure the CPT_CODE column is treated as a string
    df['CPT_CODE'] = df['CPT_CODE'].astype(str)

    # Setup Selenium WebDriver
    driver = webdriver.Chrome()  # Adjust the driver if needed (e.g., Edge, Firefox)
    driver.get('https://www.cms.gov/medicare-coverage-database/search.aspx')

    # Accept cookies if prompted
    try:
        wait = WebDriverWait(driver, 5)
        accept_cookies = wait.until(EC.element_to_be_clickable((By.ID, 'acceptCookiesButton')))
        accept_cookies.click()
    except TimeoutException:
        print("No cookies prompt displayed.")

    # Process each CPT code
    all_results = []
    for _, row in df.iterrows():
        cpt_code = row['CPT_CODE']
        results = process_cpt_code(driver, cpt_code)
        all_results.extend(results)

    # Save results to a new Excel file
    results_df = pd.DataFrame(all_results)
    results_df.to_excel(output_file, index=False)

    print("Process completed successfully!")
    driver.quit()

if __name__ == '__main__':
    main()


In [7]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

def process_cpt_code(driver, cpt_code):
    wait = WebDriverWait(driver, 20)  # Increased timeout
    results = []  # Initialize the results list

    # Enter the CPT code into the search box
    time.sleep(5)  
    try:
        search_box = wait.until(EC.presence_of_element_located((By.ID, 'tbxSearchBox')))
        search_box.clear()
        search_box.send_keys(cpt_code)
    except TimeoutException:
        print(f"Search box not found for CPT code: {cpt_code}")
        return results
    
    # Click the search button
    submit_button = driver.find_element(By.ID, 'btnSubmitSearch')
    submit_button.click()
    print(f"Submitted CPT code: {cpt_code}")
    
    time.sleep(20)

    # Wait for the search results to load
    try:
        wait.until(EC.visibility_of_element_located((By.ID, 'searchResultsDiv')))
        print("Search results loaded successfully.")
    except TimeoutException:
        print(f"Search results not loaded for CPT code: {cpt_code}")
        return results

    # Get the total number of results
    try:
        total_results_element = driver.find_element(By.ID, 'lblTotalResults')
        total_results = int(total_results_element.text)
        print(f"Total results for CPT code {cpt_code}: {total_results}")
    except ValueError:
        print(f"Invalid total results value for CPT code: {cpt_code}")
        return results

    # Iterate through each result
    for i in range(total_results):
        print(f"Processing article {i + 1} of {total_results} for CPT code: {cpt_code}")
        time.sleep(3)

        # Re-fetch article elements after each iteration
        article_elements = driver.find_elements(By.CLASS_NAME, 'table-title-col')  # Replace with the correct class name
        if i < len(article_elements):
            try:
                # Click on the article
                article_elements[i].click()
                time.sleep(4)  # Allow time for the article to load

                # Handle initial pop-ups (e.g., accept cookies)
                try:
                    accept_button = wait.until(EC.element_to_be_clickable((By.ID, 'btnAcceptLicense')))
                    accept_button.click()
                except TimeoutException:
                    pass

                try:
                    article_content = wait.until(EC.presence_of_element_located((By.ID, 'frmMainForm'))).text
                    if any(keyword in article_content.lower() for keyword in ['denied', 'non-covered', 'not covered']):
                        article_id = driver.find_element(By.ID, 'lblTitleId').text
                        print(f"Found denied/non-covered article: {article_id}")
                        results.append({'cpt_code': cpt_code, 'article_id': article_id})
                except TimeoutException:
                    print(f"Article content not found for CPT code: {cpt_code}")

            finally:
                # Return to search results page
                driver.back()
                print(f"Returned to search results for CPT code: {cpt_code}")
                time.sleep(2)

                # Explicit wait to ensure the page reloads
                wait.until(EC.visibility_of_element_located((By.ID, 'searchResultsDiv')))
                time.sleep(5)
        else:
            print(f"No article element found for index {i}. Skipping.")

    
    return results


def main():
    input_file = 'input.xlsx'
    output_file = 'output.xlsx'

    # Load the input Excel file
    df = pd.read_excel(input_file)

    # Ensure the CPT_CODE column is treated as a string
    df['CPT_CODE'] = df['CPT_CODE'].astype(str)

    # Setup Selenium WebDriver
    driver = webdriver.Chrome()  # Adjust the driver if needed (e.g., Edge, Firefox)
    driver.get('https://www.cms.gov/medicare-coverage-database/search.aspx')

    # Accept cookies if prompted
    try:
        wait = WebDriverWait(driver, 2)
        accept_cookies = wait.until(EC.element_to_be_clickable((By.ID, 'acceptCookiesButton')))
        accept_cookies.click()
    except TimeoutException:
        print("No cookies prompt displayed.")

    # Process each CPT code
    all_results = []
    for _, row in df.iterrows():
        cpt_code = row['CPT_CODE']
        results = process_cpt_code(driver, cpt_code)
        if results:  # Check if results are not empty
            all_results.extend(results)

    # Save results to a new Excel file
    if all_results:
        results_df = pd.DataFrame(all_results)
        results_df.to_excel(output_file, index=False)
        print("Results written to Excel successfully!")
    else:
        print("No results to write to Excel.")

    # Close the browser
    driver.quit()
    print("Process completed successfully!")


if __name__ == '__main__':
    main()


No cookies prompt displayed.
Submitted CPT code: L4631
Search results loaded successfully.
Total results for CPT code L4631: 2
Processing article 1 of 2 for CPT code: L4631
Returned to search results for CPT code: L4631
Processing article 2 of 2 for CPT code: L4631
Found denied/non-covered article: L33686
Returned to search results for CPT code: L4631


NoSuchWindowException: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=131.0.6778.205)
Stacktrace:
	GetHandleVerifier [0x00007FF69B90FB05+28789]
	(No symbol) [0x00007FF69B8786E0]
	(No symbol) [0x00007FF69B71592A]
	(No symbol) [0x00007FF69B6EF505]
	(No symbol) [0x00007FF69B796477]
	(No symbol) [0x00007FF69B7AEF42]
	(No symbol) [0x00007FF69B78F1E3]
	(No symbol) [0x00007FF69B75A938]
	(No symbol) [0x00007FF69B75BAA1]
	GetHandleVerifier [0x00007FF69BC4933D+3410093]
	GetHandleVerifier [0x00007FF69BC5E7DD+3497293]
	GetHandleVerifier [0x00007FF69BC52A73+3448803]
	GetHandleVerifier [0x00007FF69B9D7BBB+848171]
	(No symbol) [0x00007FF69B883C3F]
	(No symbol) [0x00007FF69B87F6E4]
	(No symbol) [0x00007FF69B87F87D]
	(No symbol) [0x00007FF69B86ED49]
	BaseThreadInitThunk [0x00007FFAF6A97374+20]
	RtlUserThreadStart [0x00007FFAF741CC91+33]


In [None]:
# ARTICLE ID 

import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

def process_cpt_code(driver, cpt_code):
    wait = WebDriverWait(driver, 20)  # Increased timeout
    results = []  # Initialize the results list

    # Enter the CPT code into the search box
    time.sleep(5)  
    try:
        search_box = wait.until(EC.presence_of_element_located((By.ID, 'tbxSearchBox')))
        search_box.clear()
        search_box.send_keys(cpt_code)
    except TimeoutException:
        print(f"Search box not found for CPT code: {cpt_code}")
        return results
    
    # Click the search button
    submit_button = driver.find_element(By.ID, 'btnSubmitSearch')
    submit_button.click()
    print(f"Submitted CPT code: {cpt_code}")
    
    time.sleep(10)

    # Wait for the search results to load
    try:
        wait.until(EC.visibility_of_element_located((By.ID, 'searchResultsDiv')))
        print("Search results loaded successfully.")
    except TimeoutException:
        print(f"Search results not loaded for CPT code: {cpt_code}")
        return results

    # Get the total number of results
    try:
        total_results_element = driver.find_element(By.ID, 'lblTotalResults')
        total_results = int(total_results_element.text)
        print(f"Total results for CPT code {cpt_code}: {total_results}")
    except ValueError:
        print(f"Invalid total results value for CPT code: {cpt_code}")
        return results

    # Iterate through each result
    for i in range(total_results):
        print(f"Processing article {i + 1} of {total_results} for CPT code: {cpt_code}")
        time.sleep(1)

        # Re-fetch article elements after each iteration
        article_elements = driver.find_elements(By.CLASS_NAME, 'table-title-col')  # Replace with the correct class name
        if i < len(article_elements):
            try:
                # Click on the article
                article_elements[i].click()
                time.sleep(2)  # Allow time for the article to load

                # Handle initial pop-ups (e.g., accept cookies)
                try:
                    accept_button = wait.until(EC.element_to_be_clickable((By.ID, 'btnAcceptLicense')))
                    accept_button.click()
                except TimeoutException:
                    pass

                try:
                    article_content = wait.until(EC.presence_of_element_located((By.ID, 'pnlArticleInformation'))).text
                    if any(keyword in article_content.lower() for keyword in ['denied', 'non-covered', 'not covered']):
                        article_id = driver.find_element(By.ID, 'lblTitleId').text
                        print(f"Found denied/non-covered article: {article_id}")
                        results.append({'cpt_code': cpt_code, 'article_id': article_id})
                except TimeoutException:
                    print(f"Article content not found for CPT code: {cpt_code}")

            finally:
                # After processing the article, go back to the search page (tbxSearchBox)
                print(f"Returned to search page for CPT code: {cpt_code}")
                time.sleep(2)

                # Explicit wait to ensure the page reloads
                search_box = wait.until(EC.presence_of_element_located((By.ID, 'tbxSearchBox')))
                time.sleep(5)
        else:
            print(f"No article element found for index {i}. Skipping.")
    
    return results



def main():
    input_file = 'input.xlsx'
    output_file = 'output.xlsx'

    # Load the input Excel file
    df = pd.read_excel(input_file)

    # Ensure the CPT_CODE column is treated as a string
    df['CPT_CODE'] = df['CPT_CODE'].astype(str)

    # Setup Selenium WebDriver
    driver = webdriver.Chrome()  # Adjust the driver if needed (e.g., Edge, Firefox)
    driver.get('https://www.cms.gov/medicare-coverage-database/search.aspx')

    # Process each CPT code
    all_results = []
    for _, row in df.iterrows():
        cpt_code = row['CPT_CODE']
        results = process_cpt_code(driver, cpt_code)
        if results:  # Check if results are not empty
            all_results.extend(results)

    # Save results to a new Excel file
    if all_results:
        results_df = pd.DataFrame(all_results)
        results_df.to_excel(output_file, index=False)
        print("Results written to Excel successfully!")
    else:
        print("No results to write to Excel.")

    # Close the browser
    driver.quit()
    print("Process completed successfully!")


if __name__ == '__main__':
    main()


In [15]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

def process_cpt_code(driver, cpt_code):
    wait = WebDriverWait(driver, 20)  # Increased timeout
    results = []  # Initialize the results list

    # Enter the CPT code into the search box
    time.sleep(5)  
    try:
        search_box = wait.until(EC.presence_of_element_located((By.ID, 'tbxSearchBox')))
        search_box.clear()
        search_box.send_keys(cpt_code)
    except TimeoutException:
        print(f"Search box not found for CPT code: {cpt_code}")
        return results
    
    # Click the search button
    submit_button = driver.find_element(By.ID, 'btnSubmitSearch')
    submit_button.click()
    print(f"Submitted CPT code: {cpt_code}")
    
    time.sleep(20)

    # Wait for the search results to load
    try:
        wait.until(EC.visibility_of_element_located((By.ID, 'searchResultsDiv')))
        print("Search results loaded successfully.")
    except TimeoutException:
        print(f"Search results not loaded for CPT code: {cpt_code}")
        return results

    # Get the total number of results
    try:
        total_results_element = driver.find_element(By.ID, 'lblTotalResults')
        total_results = int(total_results_element.text)
        print(f"Total results for CPT code {cpt_code}: {total_results}")
    except ValueError:
        print(f"Invalid total results value for CPT code: {cpt_code}")
        return results

    # Iterate through each result
    for i in range(total_results):
        print(f"Processing article {i + 1} of {total_results} for CPT code: {cpt_code}")
        time.sleep(3)

        # Re-fetch article elements after each iteration
        article_elements = driver.find_elements(By.CLASS_NAME, 'table-title-col')  # Replace with the correct class name
        if i <=len(article_elements):
            try:
                # Click on the article
                article_elements[i].click()
                time.sleep(4)  # Allow time for the article to load

                # Handle initial pop-ups (e.g., accept cookies)
                try:
                    accept_button = wait.until(EC.element_to_be_clickable((By.ID, 'btnAcceptLicense')))
                    accept_button.click()
                except TimeoutException:
                    pass

                try:
                    article_content = wait.until(EC.presence_of_element_located((By.ID, 'lblArticleText'))).text
                    if any(keyword in article_content.lower() for keyword in ['denied' 'non-covered', 'not covered','noncovered']):
                        article_id = driver.find_element(By.ID, 'lblTitleId').text
                        print(f"Found denied/non-covered article: {article_id}")
                        results.append({'cpt_code': cpt_code, 'article_id': article_id})
                except TimeoutException:
                    print(f"Article content not found for CPT code: {cpt_code}")

            finally:
                # Return to search results page
                driver.back()
                print(f"Returned to search results for CPT code: {cpt_code}")
                time.sleep(2)

                # Explicit wait to ensure the page reloads
                wait.until(EC.visibility_of_element_located((By.ID, 'searchResultsDiv')))
                time.sleep(5)
        else:
            print(f"No article element found for index {i}. Skipping.")

    
    return results


def main():
    input_file = 'input.xlsx'
    output_file = 'output.xlsx'

    # Load the input Excel file
    df = pd.read_excel(input_file)

    # Ensure the CPT_CODE column is treated as a string
    df['CPT_CODE'] = df['CPT_CODE'].astype(str)

    # Setup Selenium WebDriver
    driver = webdriver.Chrome()  # Adjust the driver if needed (e.g., Edge, Firefox)
    

    # Process each CPT code
    all_results = []
    for _, row in df.iterrows():
        cpt_code = row['CPT_CODE']
        driver.get('https://www.cms.gov/medicare-coverage-database/search.aspx')
        results = process_cpt_code(driver, cpt_code)
        if results:  # Check if results are not empty
            all_results.extend(results)

    results_df = pd.DataFrame(all_results)
    results_df.to_excel(output_file, index=False)
    print("Results written to Excel successfully!")
    
    driver.quit()
    print("Process completed successfully!")


if __name__ == '__main__':
    main()


Submitted CPT code: L4631
Search results loaded successfully.
Total results for CPT code L4631: 2
Processing article 1 of 2 for CPT code: L4631
Returned to search results for CPT code: L4631
Processing article 2 of 2 for CPT code: L4631
Article content not found for CPT code: L4631
Returned to search results for CPT code: L4631
Results written to Excel successfully!
Process completed successfully!


In [17]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

def process_cpt_code(driver, cpt_code):
    wait = WebDriverWait(driver, 20)  # Increased timeout
    results = []  # Initialize the results list

    # Enter the CPT code into the search box
    time.sleep(5)
    try:
        search_box = wait.until(EC.presence_of_element_located((By.ID, 'tbxSearchBox')))
        search_box.clear()
        search_box.send_keys(cpt_code)
    except TimeoutException:
        print(f"Search box not found for CPT code: {cpt_code}")
        results.append({'cpt_code': cpt_code, 'article_id': ''})  # Add CPT code with no article ID
        return results
    
    # Click the search button
    submit_button = driver.find_element(By.ID, 'btnSubmitSearch')
    submit_button.click()
    print(f"Submitted CPT code: {cpt_code}")
    
    time.sleep(10)

    # Wait for the search results to load
    try:
        wait.until(EC.visibility_of_element_located((By.ID, 'searchResultsDiv')))
        print("Search results loaded successfully.")
    except TimeoutException:
        print(f"Search results not loaded for CPT code: {cpt_code}")
        results.append({'cpt_code': cpt_code, 'article_id': ''})  # Add CPT code with no article ID
        return results

    # Get the total number of results
    try:
        total_results_element = driver.find_element(By.ID, 'lblTotalResults')
        total_results = int(total_results_element.text)
        print(f"Total results for CPT code {cpt_code}: {total_results}")
    except ValueError:
        print(f"Invalid total results value for CPT code: {cpt_code}")
        results.append({'cpt_code': cpt_code, 'article_id': ''})  # Add CPT code with no article ID
        return results

    # Iterate through each result
    for i in range(total_results):
        print(f"Processing article {i + 1} of {total_results} for CPT code: {cpt_code}")
        time.sleep(3)

        # Re-fetch article elements after each iteration
        article_elements = driver.find_elements(By.CLASS_NAME, 'table-title-col')
        if i < len(article_elements):
            try:
                # Click on the article
                article_elements[i].click()
                time.sleep(1)  # Allow time for the article to load

                # Handle initial pop-ups (e.g., accept cookies)
                try:
                    accept_button = wait.until(EC.element_to_be_clickable((By.ID, 'btnAcceptLicense')))
                    accept_button.click()
                except TimeoutException:
                    pass

                try:
                    article_content = wait.until(EC.presence_of_element_located((By.ID, 'lblArticleText'))).text
                    if any(keyword in article_content.lower() for keyword in ['denied', 'non-covered', 'not covered', 'noncovered']):
                        article_id = driver.find_element(By.ID, 'lblTitleId').text
                        print(f"Found denied/non-covered article: {article_id}")
                        results.append({'cpt_code': cpt_code, 'article_id': article_id})
                except TimeoutException:
                    print(f"Article content not found for CPT code: {cpt_code}")

            finally:
                # Return to search results page
                driver.back()
                print(f"Returned to search results for CPT code: {cpt_code}")
                time.sleep(0.1)

                # Explicit wait to ensure the page reloads
                wait.until(EC.visibility_of_element_located((By.ID, 'searchResultsDiv')))
                time.sleep(1)
        else:
            print(f"No article element found for index {i}. Skipping.")

    # If no articles are found for this CPT code, ensure it gets added with an empty article ID
    if not results:
        results.append({'cpt_code': cpt_code, 'article_id': ''})

    return results


def main():
    input_file = 'input.xlsx'
    output_file = 'output.xlsx'

    # Load the input Excel file
    df = pd.read_excel(input_file)

    # Ensure the CPT_CODE column is treated as a string
    df['CPT_CODE'] = df['CPT_CODE'].astype(str)

    # Setup Selenium WebDriver
    driver = webdriver.Chrome()  # Adjust the driver if needed (e.g., Edge, Firefox)

    # Process each CPT code
    all_results = []
    for _, row in df.iterrows():
        cpt_code = row['CPT_CODE']
        driver.get('https://www.cms.gov/medicare-coverage-database/search.aspx')
        results = process_cpt_code(driver, cpt_code)
        if results:  # Check if results are not empty
            all_results.extend(results)

    # Save results to a new Excel file
    results_df = pd.DataFrame(all_results)
    results_df.to_excel(output_file, index=False)
    print("Results written to Excel successfully!")

    # Close the driver
    driver.quit()
    print("Process completed successfully!")


if __name__ == '__main__':
    main()


Submitted CPT code: L4631
Search results loaded successfully.
Total results for CPT code L4631: 2
Processing article 1 of 2 for CPT code: L4631
Returned to search results for CPT code: L4631
Processing article 2 of 2 for CPT code: L4631
Article content not found for CPT code: L4631
Returned to search results for CPT code: L4631
Results written to Excel successfully!
Process completed successfully!


In [19]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

def process_cpt_code(driver, cpt_code):
    wait = WebDriverWait(driver, 20)  # Increased timeout
    results = []  # Initialize the results list

    # Enter the CPT code into the search box
    time.sleep(5)
    try:
        search_box = wait.until(EC.presence_of_element_located((By.ID, 'tbxSearchBox')))
        search_box.clear()
        search_box.send_keys(cpt_code)
    except TimeoutException:
        print(f"Search box not found for CPT code: {cpt_code}")
        results.append({'cpt_code': cpt_code, 'article_id': ''})  # Add CPT code with no article ID
        return results
    
    # Click the search button
    submit_button = driver.find_element(By.ID, 'btnSubmitSearch')
    submit_button.click()
    print(f"Submitted CPT code: {cpt_code}")
    
    time.sleep(10)

    # Wait for the search results to load
    try:
        wait.until(EC.visibility_of_element_located((By.ID, 'searchResultsDiv')))
        print("Search results loaded successfully.")
    except TimeoutException:
        print(f"Search results not loaded for CPT code: {cpt_code}")
        results.append({'cpt_code': cpt_code, 'article_id': ''})  # Add CPT code with no article ID
        return results

    # Get the total number of results
    try:
        total_results_element = driver.find_element(By.ID, 'lblTotalResults')
        total_results = int(total_results_element.text)
        print(f"Total results for CPT code {cpt_code}: {total_results}")
    except ValueError:
        print(f"Invalid total results value for CPT code: {cpt_code}")
        results.append({'cpt_code': cpt_code, 'article_id': ''})  # Add CPT code with no article ID
        return results

    # Flag to track if we have processed all articles
    processed_all_articles = False

    # Iterate through each result
    for i in range(total_results):
        if processed_all_articles:
            break  # If all articles are processed, exit the loop
        
        print(f"Processing article {i + 1} of {total_results} for CPT code: {cpt_code}")
        time.sleep(3)

        # Re-fetch article elements after each iteration
        article_elements = driver.find_elements(By.CLASS_NAME, 'table-title-col')
        if i < len(article_elements):
            try:
                # Click on the article
                article_elements[i].click()
                time.sleep(1)  # Allow time for the article to load

                # Handle initial pop-ups (e.g., accept cookies)
                try:
                    accept_button = wait.until(EC.element_to_be_clickable((By.ID, 'btnAcceptLicense')))
                    accept_button.click()
                except TimeoutException:
                    pass

                try:
                    article_content = wait.until(EC.presence_of_element_located((By.ID, 'lblArticleText'))).text
                    if any(keyword in article_content.lower() for keyword in ['denied', 'non-covered', 'not covered', 'noncovered']):
                        article_id = driver.find_element(By.ID, 'lblTitleId').text
                        print(f"Found denied/non-covered article: {article_id}")
                        results.append({'cpt_code': cpt_code, 'article_id': article_id})
                except TimeoutException:
                    print(f"Article content not found for CPT code: {cpt_code}")

            finally:
                # After processing each article, check if all results are processed
                if i + 1 == total_results:
                    processed_all_articles = True  # We have processed all articles
                else:
                    # Return to search results page if not the last article
                    driver.back()
                    print(f"Returned to search results for CPT code: {cpt_code}")
                    time.sleep(0.1)

                    # Explicit wait to ensure the page reloads
                    wait.until(EC.visibility_of_element_located((By.ID, 'searchResultsDiv')))
                    time.sleep(1)
        else:
            print(f"No article element found for index {i}. Skipping.")

    # If no articles are found for this CPT code, ensure it gets added with an empty article ID
    if not results:
        results.append({'cpt_code': cpt_code, 'article_id': ''})

    return results


def main():
    input_file = 'input.xlsx'
    output_file = 'output.xlsx'

    # Load the input Excel file
    df = pd.read_excel(input_file)

    # Ensure the CPT_CODE column is treated as a string
    df['CPT_CODE'] = df['CPT_CODE'].astype(str)

    # Setup Selenium WebDriver
    driver = webdriver.Chrome()  # Adjust the driver if needed (e.g., Edge, Firefox)

    # Process each CPT code
    all_results = []
    for _, row in df.iterrows():
        cpt_code = row['CPT_CODE']
        driver.get('https://www.cms.gov/medicare-coverage-database/search.aspx')
        results = process_cpt_code(driver, cpt_code)
        if results:  # Check if results are not empty
            all_results.extend(results)

    # Save results to a new Excel file
    results_df = pd.DataFrame(all_results)
    results_df.to_excel(output_file, index=False)
    print("Results written to Excel successfully!")

    # Close the driver
    driver.quit()
    print("Process completed successfully!")


if __name__ == '__main__':
    main()


Submitted CPT code: L4631
Search results loaded successfully.
Total results for CPT code L4631: 2
Processing article 1 of 2 for CPT code: L4631
Returned to search results for CPT code: L4631
Processing article 2 of 2 for CPT code: L4631
Article content not found for CPT code: L4631
Submitted CPT code: L3421
Search results loaded successfully.
Total results for CPT code L3421: 0
Results written to Excel successfully!
Process completed successfully!


In [21]:
import time
import pandas as pd
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup

def process_cpt_code(driver, cpt_code):
    wait = WebDriverWait(driver, 20)  # Increased timeout
    results = []  # Initialize the results list

    # Enter the CPT code into the search box
    time.sleep(5)
    try:
        search_box = wait.until(EC.presence_of_element_located((By.ID, 'tbxSearchBox')))
        search_box.clear()
        search_box.send_keys(cpt_code)
    except TimeoutException:
        print(f"Search box not found for CPT code: {cpt_code}")
        results.append({'cpt_code': cpt_code, 'article_id': ''})  # Add CPT code with no article ID
        return results
    
    # Click the search button
    submit_button = driver.find_element(By.ID, 'btnSubmitSearch')
    submit_button.click()
    print(f"Submitted CPT code: {cpt_code}")
    
    time.sleep(10)

    # Wait for the search results to load
    try:
        wait.until(EC.visibility_of_element_located((By.ID, 'searchResultsDiv')))
        print("Search results loaded successfully.")
    except TimeoutException:
        print(f"Search results not loaded for CPT code: {cpt_code}")
        results.append({'cpt_code': cpt_code, 'article_id': ''})  # Add CPT code with no article ID
        return results

    # Get the total number of results
    try:
        total_results_element = driver.find_element(By.ID, 'lblTotalResults')
        total_results = int(total_results_element.text)
        print(f"Total results for CPT code {cpt_code}: {total_results}")
    except ValueError:
        print(f"Invalid total results value for CPT code: {cpt_code}")
        results.append({'cpt_code': cpt_code, 'article_id': ''})  # Add CPT code with no article ID
        return results

    # Flag to track if we have processed all articles
    processed_all_articles = False

    # Keywords to search within the article
    keywords = ['denied', 'non-covered', 'not covered', 'noncovered']

    # Iterate through each result
    for i in range(total_results):
        if processed_all_articles:
            break  # If all articles are processed, exit the loop
        
        print(f"Processing article {i + 1} of {total_results} for CPT code: {cpt_code}")
        time.sleep(3)

        # Re-fetch article elements after each iteration
        article_elements = driver.find_elements(By.CLASS_NAME, 'table-title-col')
        if i < len(article_elements):
            try:
                # Click on the article
                article_elements[i].click()
                time.sleep(1)  # Allow time for the article to load

                # Handle initial pop-ups (e.g., accept cookies)
                try:
                    accept_button = wait.until(EC.element_to_be_clickable((By.ID, 'btnAcceptLicense')))
                    accept_button.click()
                except TimeoutException:
                    pass

                # Wait for the article content to load and extract paragraphs
                wait.until(EC.presence_of_element_located((By.ID, 'h3ArticleGuidanceHeader')))
                
                # Use BeautifulSoup to parse the article page
                soup = BeautifulSoup(driver.page_source, 'html.parser')
                paragraphs = soup.find_all('p')  # Extract all paragraphs
                
                similar_paragraphs = []
                for paragraph in paragraphs:
                    for keyword in keywords:
                        if re.search(r'\b{}\b'.format(re.escape(keyword)), paragraph.get_text(), re.IGNORECASE):
                            similar_paragraphs.append(paragraph.get_text())
                            break  # Stop after finding the first matching keyword in a paragraph

                # If any similar paragraphs are found, save them
                if similar_paragraphs:
                    article_id = driver.find_element(By.ID, 'lblTitleId').text
                    print(f"Found denied/non-covered article: {article_id}")
                    similar_paragraphs_str = '\n\n'.join(similar_paragraphs)
                    results.append({'cpt_code': cpt_code, 'article_id': article_id, 'similar_paragraphs': similar_paragraphs_str})

            except Exception as e:
                print(f"Error processing article {i + 1}: {e}")

            finally:
                # After processing each article, check if all results are processed
                if i + 1 == total_results:
                    processed_all_articles = True  # We have processed all articles
                else:
                    # Return to search results page if not the last article
                    driver.back()
                    print(f"Returned to search results for CPT code: {cpt_code}")
                    time.sleep(0.1)

                    # Explicit wait to ensure the page reloads
                    wait.until(EC.visibility_of_element_located((By.ID, 'searchResultsDiv')))
                    time.sleep(1)
        else:
            print(f"No article element found for index {i}. Skipping.")

    # If no articles are found for this CPT code, ensure it gets added with an empty article ID
    if not results:
        results.append({'cpt_code': cpt_code, 'article_id': '', 'similar_paragraphs': ''})

    return results


def main():
    input_file = 'input.xlsx'
    output_file = 'output.xlsx'

    # Load the input Excel file
    df = pd.read_excel(input_file)

    # Ensure the CPT_CODE column is treated as a string
    df['CPT_CODE'] = df['CPT_CODE'].astype(str)

    # Setup Selenium WebDriver
    driver = webdriver.Chrome()  # Adjust the driver if needed (e.g., Edge, Firefox)

    # Process each CPT code
    all_results = []
    for _, row in df.iterrows():
        cpt_code = row['CPT_CODE']
        driver.get('https://www.cms.gov/medicare-coverage-database/search.aspx')
        results = process_cpt_code(driver, cpt_code)
        if results:  # Check if results are not empty
            all_results.extend(results)

    # Save results to a new Excel file
    results_df = pd.DataFrame(all_results)
    results_df.to_excel(output_file, index=False)
    print("Results written to Excel successfully!")

    # Close the driver
    driver.quit()
    print("Process completed successfully!")


if __name__ == '__main__':
    main()

Submitted CPT code: L4631
Search results loaded successfully.
Total results for CPT code L4631: 2
Processing article 1 of 2 for CPT code: L4631
Found denied/non-covered article: A52457
Returned to search results for CPT code: L4631
Processing article 2 of 2 for CPT code: L4631
Error processing article 2: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF69B90FB05+28789]
	(No symbol) [0x00007FF69B8786E0]
	(No symbol) [0x00007FF69B71592A]
	(No symbol) [0x00007FF69B76930E]
	(No symbol) [0x00007FF69B7695FC]
	(No symbol) [0x00007FF69B7B28A7]
	(No symbol) [0x00007FF69B78F47F]
	(No symbol) [0x00007FF69B7AF654]
	(No symbol) [0x00007FF69B78F1E3]
	(No symbol) [0x00007FF69B75A938]
	(No symbol) [0x00007FF69B75BAA1]
	GetHandleVerifier [0x00007FF69BC4933D+3410093]
	GetHandleVerifier [0x00007FF69BC5E7DD+3497293]
	GetHandleVerifier [0x00007FF69BC52A73+3448803]
	GetHandleVerifier [0x00007FF69B9D7BBB+848171]
	(No symbol) [0x00007FF69B883C3F]
	(No symbol) [0x00007FF69B87F6E4]
	(No symbol) [0x00007FF69B8

In [23]:
import time
import pandas as pd
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup

def process_cpt_code(driver, cpt_code):
    wait = WebDriverWait(driver, 20)  # Increased timeout
    results = []  # Initialize the results list

    # Enter the CPT code into the search box
    time.sleep(5)
    try:
        search_box = wait.until(EC.presence_of_element_located((By.ID, 'tbxSearchBox')))
        search_box.clear()
        search_box.send_keys(cpt_code)
    except TimeoutException:
        print(f"Search box not found for CPT code: {cpt_code}")
        results.append({'cpt_code': cpt_code, 'article_id': '', 'keyword': '', 'similar_paragraphs': ''})  # Add CPT code with no article ID
        return results
    
    # Click the search button
    submit_button = driver.find_element(By.ID, 'btnSubmitSearch')
    submit_button.click()
    print(f"Submitted CPT code: {cpt_code}")
    
    time.sleep(10)

    # Wait for the search results to load
    try:
        wait.until(EC.visibility_of_element_located((By.ID, 'searchResultsDiv')))
        print("Search results loaded successfully.")
    except TimeoutException:
        print(f"Search results not loaded for CPT code: {cpt_code}")
        results.append({'cpt_code': cpt_code, 'article_id': '', 'keyword': '', 'similar_paragraphs': ''})  # Add CPT code with no article ID
        return results

    # Get the total number of results
    try:
        total_results_element = driver.find_element(By.ID, 'lblTotalResults')
        total_results = int(total_results_element.text)
        print(f"Total results for CPT code {cpt_code}: {total_results}")
    except ValueError:
        print(f"Invalid total results value for CPT code: {cpt_code}")
        results.append({'cpt_code': cpt_code, 'article_id': '', 'keyword': '', 'similar_paragraphs': ''})  # Add CPT code with no article ID
        return results

    # Flag to track if we have processed all articles
    processed_all_articles = False

    # Keywords to search within the article
    keywords = ['denied', 'non-covered', 'not covered', 'noncovered']

    # Iterate through each result
    for i in range(total_results):
        if processed_all_articles:
            break  # If all articles are processed, exit the loop
        
        print(f"Processing article {i + 1} of {total_results} for CPT code: {cpt_code}")
        time.sleep(3)

        # Re-fetch article elements after each iteration
        article_elements = driver.find_elements(By.CLASS_NAME, 'table-title-col')
        if i < len(article_elements):
            try:
                # Scroll the article element into view to ensure it's clickable
                driver.execute_script("arguments[0].scrollIntoView();", article_elements[i])
                time.sleep(1)  # Allow some time for the page to settle
                
                # Try clicking the article using JavaScript to avoid interception
                driver.execute_script("arguments[0].click();", article_elements[i])
                time.sleep(1)  # Allow time for the article to load

                # Handle initial pop-ups (e.g., accept cookies)
                try:
                    accept_button = wait.until(EC.element_to_be_clickable((By.ID, 'btnAcceptLicense')))
                    accept_button.click()
                except TimeoutException:
                    pass

                # Wait for the article content to load and extract paragraphs
                wait.until(EC.presence_of_element_located((By.ID, 'h3ArticleGuidanceHeader')))
                
                # Use BeautifulSoup to parse the article page
                soup = BeautifulSoup(driver.page_source, 'html.parser')
                paragraphs = soup.find_all('p')  # Extract all paragraphs
                
                similar_paragraphs = []
                for paragraph in paragraphs:
                    for keyword in keywords:
                        if re.search(r'\b{}\b'.format(re.escape(keyword)), paragraph.get_text(), re.IGNORECASE):
                            similar_paragraphs.append(paragraph.get_text())
                            # Store the keyword that matched
                            results.append({
                                'cpt_code': cpt_code, 
                                'article_id': driver.find_element(By.ID, 'lblTitleId').text, 
                                'keyword': keyword, 
                                'similar_paragraphs': paragraph.get_text()
                            })
                            break  # Stop after finding the first matching keyword in a paragraph

            except Exception as e:
                print(f"Error processing article {i + 1}: {e}")

            finally:
                # After processing each article, check if all results are processed
                if i + 1 == total_results:
                    processed_all_articles = True  # We have processed all articles
                else:
                    # Return to search results page if not the last article
                    driver.back()
                    print(f"Returned to search results for CPT code: {cpt_code}")
                    time.sleep(0.1)

                    # Explicit wait to ensure the page reloads
                    wait.until(EC.visibility_of_element_located((By.ID, 'searchResultsDiv')))
                    time.sleep(1)
        else:
            print(f"No article element found for index {i}. Skipping.")

    # If no articles are found for this CPT code, ensure it gets added with an empty article ID
    if not results:
        results.append({'cpt_code': cpt_code, 'article_id': '', 'keyword': '', 'similar_paragraphs': ''})

    return results


def main():
    input_file = 'input.xlsx'
    output_file = 'output.xlsx'

    # Load the input Excel file
    df = pd.read_excel(input_file)

    # Ensure the CPT_CODE column is treated as a string
    df['CPT_CODE'] = df['CPT_CODE'].astype(str)

    # Setup Selenium WebDriver
    driver = webdriver.Chrome()  # Adjust the driver if needed (e.g., Edge, Firefox)

    # Process each CPT code
    all_results = []
    for _, row in df.iterrows():
        cpt_code = row['CPT_CODE']
        driver.get('https://www.cms.gov/medicare-coverage-database/search.aspx')
        results = process_cpt_code(driver, cpt_code)
        if results:  # Check if results are not empty
            all_results.extend(results)

    # Save results to a new Excel file
    results_df = pd.DataFrame(all_results)
    results_df.to_excel(output_file, index=False)
    print("Results written to Excel successfully!")

    # Close the driver
    driver.quit()
    print("Process completed successfully!")


if __name__ == '__main__':
    main()

Submitted CPT code: L4631
Search results loaded successfully.
Total results for CPT code L4631: 2
Processing article 1 of 2 for CPT code: L4631
Returned to search results for CPT code: L4631
Processing article 2 of 2 for CPT code: L4631
Error processing article 2: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF69B90FB05+28789]
	(No symbol) [0x00007FF69B8786E0]
	(No symbol) [0x00007FF69B71592A]
	(No symbol) [0x00007FF69B76930E]
	(No symbol) [0x00007FF69B7695FC]
	(No symbol) [0x00007FF69B7B28A7]
	(No symbol) [0x00007FF69B78F47F]
	(No symbol) [0x00007FF69B7AF654]
	(No symbol) [0x00007FF69B78F1E3]
	(No symbol) [0x00007FF69B75A938]
	(No symbol) [0x00007FF69B75BAA1]
	GetHandleVerifier [0x00007FF69BC4933D+3410093]
	GetHandleVerifier [0x00007FF69BC5E7DD+3497293]
	GetHandleVerifier [0x00007FF69BC52A73+3448803]
	GetHandleVerifier [0x00007FF69B9D7BBB+848171]
	(No symbol) [0x00007FF69B883C3F]
	(No symbol) [0x00007FF69B87F6E4]
	(No symbol) [0x00007FF69B87F87D]
	(No symbol) [0x00007FF69B86ED49]


In [24]:
import time
import pandas as pd
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup

def process_cpt_code(driver, cpt_code):
    wait = WebDriverWait(driver, 20)  # Increased timeout
    results = []  # Initialize the results list

    # Enter the CPT code into the search box
    time.sleep(5)
    try:
        search_box = wait.until(EC.presence_of_element_located((By.ID, 'tbxSearchBox')))
        search_box.clear()
        search_box.send_keys(cpt_code)
    except TimeoutException:
        print(f"Search box not found for CPT code: {cpt_code}")
        results.append({'cpt_code': cpt_code, 'article_id': '', 'keyword': '', 'similar_paragraphs': ''})  # Add CPT code with no article ID
        return results
    
    # Click the search button
    submit_button = driver.find_element(By.ID, 'btnSubmitSearch')
    submit_button.click()
    print(f"Submitted CPT code: {cpt_code}")
    
    time.sleep(10)

    # Wait for the search results to load
    try:
        wait.until(EC.visibility_of_element_located((By.ID, 'searchResultsDiv')))
        print("Search results loaded successfully.")
    except TimeoutException:
        print(f"Search results not loaded for CPT code: {cpt_code}")
        results.append({'cpt_code': cpt_code, 'article_id': '', 'keyword': '', 'similar_paragraphs': ''})  # Add CPT code with no article ID
        return results

    # Get the total number of results
    try:
        total_results_element = driver.find_element(By.ID, 'lblTotalResults')
        total_results = int(total_results_element.text)
        print(f"Total results for CPT code {cpt_code}: {total_results}")
    except ValueError:
        print(f"Invalid total results value for CPT code: {cpt_code}")
        results.append({'cpt_code': cpt_code, 'article_id': '', 'keyword': '', 'similar_paragraphs': ''})  # Add CPT code with no article ID
        return results

    # Flag to track if we have processed all articles
    processed_all_articles = False

    # Keywords to search within the article
    keywords = ['denied', 'non-covered', 'not covered', 'noncovered']

    # Initialize dictionaries to hold aggregated results
    article_ids = []
    keywords_found = []
    similar_paragraphs = []

    # Iterate through each result
    for i in range(total_results):
        if processed_all_articles:
            break  # If all articles are processed, exit the loop
        
        print(f"Processing article {i + 1} of {total_results} for CPT code: {cpt_code}")
        time.sleep(3)

        # Re-fetch article elements after each iteration
        article_elements = driver.find_elements(By.CLASS_NAME, 'table-title-col')
        if i < len(article_elements):
            try:
                # Scroll the article element into view to ensure it's clickable
                driver.execute_script("arguments[0].scrollIntoView();", article_elements[i])
                time.sleep(1)  # Allow some time for the page to settle
                
                # Try clicking the article using JavaScript to avoid interception
                driver.execute_script("arguments[0].click();", article_elements[i])
                time.sleep(1)  # Allow time for the article to load

                # Handle initial pop-ups (e.g., accept cookies)
                try:
                    accept_button = wait.until(EC.element_to_be_clickable((By.ID, 'btnAcceptLicense')))
                    accept_button.click()
                except TimeoutException:
                    pass

                # Wait for the article content to load and extract paragraphs
                wait.until(EC.presence_of_element_located((By.ID, 'h3ArticleGuidanceHeader')))
                
                # Use BeautifulSoup to parse the article page
                soup = BeautifulSoup(driver.page_source, 'html.parser')
                paragraphs = soup.find_all('p')  # Extract all paragraphs
                
                # Track whether any keywords were found in the article
                article_keywords = []
                article_paragraphs = []
                for paragraph in paragraphs:
                    for keyword in keywords:
                        if re.search(r'\b{}\b'.format(re.escape(keyword)), paragraph.get_text(), re.IGNORECASE):
                            article_keywords.append(keyword)
                            article_paragraphs.append(paragraph.get_text())
                            break  # Stop after finding the first matching keyword in a paragraph

                if article_keywords:
                    article_id = driver.find_element(By.ID, 'lblTitleId').text
                    article_ids.append(article_id)
                    keywords_found.extend(article_keywords)
                    similar_paragraphs.extend(article_paragraphs)

            except Exception as e:
                print(f"Error processing article {i + 1}: {e}")

            finally:
                # After processing each article, check if all results are processed
                if i + 1 == total_results:
                    processed_all_articles = True  # We have processed all articles
                else:
                    # Return to search results page if not the last article
                    driver.back()
                    print(f"Returned to search results for CPT code: {cpt_code}")
                    time.sleep(0.1)

                    # Explicit wait to ensure the page reloads
                    wait.until(EC.visibility_of_element_located((By.ID, 'searchResultsDiv')))
                    time.sleep(1)
        else:
            print(f"No article element found for index {i}. Skipping.")

    # Aggregate results into single row for the CPT code
    if article_ids:
        results.append({
            'cpt_code': cpt_code, 
            'article_id': ', '.join(article_ids), 
            'keyword': ', '.join(keywords_found), 
            'similar_paragraphs': '\n\n'.join(similar_paragraphs)
        })
    else:
        results.append({'cpt_code': cpt_code, 'article_id': '', 'keyword': '', 'similar_paragraphs': ''})

    return results


def main():
    input_file = 'input.xlsx'
    output_file = 'output.xlsx'

    # Load the input Excel file
    df = pd.read_excel(input_file)

    # Ensure the CPT_CODE column is treated as a string
    df['CPT_CODE'] = df['CPT_CODE'].astype(str)

    # Setup Selenium WebDriver
    driver = webdriver.Chrome()  # Adjust the driver if needed (e.g., Edge, Firefox)

    # Process each CPT code
    all_results = []
    for _, row in df.iterrows():
        cpt_code = row['CPT_CODE']
        driver.get('https://www.cms.gov/medicare-coverage-database/search.aspx')
        results = process_cpt_code(driver, cpt_code)
        if results:  # Check if results are not empty
            all_results.extend(results)

    # Save results to a new Excel file
    results_df = pd.DataFrame(all_results)
    results_df.to_excel(output_file, index=False)
    print("Results written to Excel successfully!")

    # Close the driver
    driver.quit()
    print("Process completed successfully!")


if __name__ == '__main__':
    main()


Submitted CPT code: L4631
Search results loaded successfully.
Total results for CPT code L4631: 2
Processing article 1 of 2 for CPT code: L4631
Returned to search results for CPT code: L4631
Processing article 2 of 2 for CPT code: L4631
Error processing article 2: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF69B90FB05+28789]
	(No symbol) [0x00007FF69B8786E0]
	(No symbol) [0x00007FF69B71592A]
	(No symbol) [0x00007FF69B76930E]
	(No symbol) [0x00007FF69B7695FC]
	(No symbol) [0x00007FF69B7B28A7]
	(No symbol) [0x00007FF69B78F47F]
	(No symbol) [0x00007FF69B7AF654]
	(No symbol) [0x00007FF69B78F1E3]
	(No symbol) [0x00007FF69B75A938]
	(No symbol) [0x00007FF69B75BAA1]
	GetHandleVerifier [0x00007FF69BC4933D+3410093]
	GetHandleVerifier [0x00007FF69BC5E7DD+3497293]
	GetHandleVerifier [0x00007FF69BC52A73+3448803]
	GetHandleVerifier [0x00007FF69B9D7BBB+848171]
	(No symbol) [0x00007FF69B883C3F]
	(No symbol) [0x00007FF69B87F6E4]
	(No symbol) [0x00007FF69B87F87D]
	(No symbol) [0x00007FF69B86ED49]


In [25]:
import time
import pandas as pd
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup

def process_cpt_code(driver, cpt_code):
    wait = WebDriverWait(driver, 20)  # Increased timeout
    results = []  # Initialize the results list

    # Enter the CPT code into the search box
    time.sleep(5)
    try:
        search_box = wait.until(EC.presence_of_element_located((By.ID, 'tbxSearchBox')))
        search_box.clear()
        search_box.send_keys(cpt_code)
    except TimeoutException:
        print(f"Search box not found for CPT code: {cpt_code}")
        results.append({'cpt_code': cpt_code, 'article_id': '', 'keyword': '', 'similar_paragraphs': ''})  # Add CPT code with no article ID
        return results
    
    # Click the search button
    submit_button = driver.find_element(By.ID, 'btnSubmitSearch')
    submit_button.click()
    print(f"Submitted CPT code: {cpt_code}")
    
    time.sleep(10)

    # Wait for the search results to load
    try:
        wait.until(EC.visibility_of_element_located((By.ID, 'searchResultsDiv')))
        print("Search results loaded successfully.")
    except TimeoutException:
        print(f"Search results not loaded for CPT code: {cpt_code}")
        results.append({'cpt_code': cpt_code, 'article_id': '', 'keyword': '', 'similar_paragraphs': ''})  # Add CPT code with no article ID
        return results

    # Get the total number of results
    try:
        total_results_element = driver.find_element(By.ID, 'lblTotalResults')
        total_results = int(total_results_element.text)
        print(f"Total results for CPT code {cpt_code}: {total_results}")
    except ValueError:
        print(f"Invalid total results value for CPT code: {cpt_code}")
        results.append({'cpt_code': cpt_code, 'article_id': '', 'keyword': '', 'similar_paragraphs': ''})  # Add CPT code with no article ID
        return results

    # Flag to track if we have processed all articles
    processed_all_articles = False

    # Keywords to search within the article
    keywords = ['denied', 'non-covered', 'not covered', 'noncovered']

    # Initialize dictionaries to hold aggregated results
    article_ids = []
    keywords_found = []
    similar_paragraphs = []

    # Iterate through each result
    for i in range(total_results):
        if processed_all_articles:
            break  # If all articles are processed, exit the loop
        
        print(f"Processing article {i + 1} of {total_results} for CPT code: {cpt_code}")
        time.sleep(3)

        # Re-fetch article elements after each iteration
        article_elements = driver.find_elements(By.CLASS_NAME, 'table-title-col')
        if i < len(article_elements):
            try:
                # Scroll the article element into view to ensure it's clickable
                driver.execute_script("arguments[0].scrollIntoView();", article_elements[i])
                time.sleep(1)  # Allow some time for the page to settle
                
                # Try clicking the article using JavaScript to avoid interception
                driver.execute_script("arguments[0].click();", article_elements[i])
                time.sleep(1)  # Allow time for the article to load

                # Handle initial pop-ups (e.g., accept cookies)
                try:
                    accept_button = wait.until(EC.element_to_be_clickable((By.ID, 'btnAcceptLicense')))
                    accept_button.click()
                except TimeoutException:
                    pass

                # Wait for the article content to load and extract paragraphs
                wait.until(EC.presence_of_element_located((By.ID, 'h3ArticleGuidanceHeader')))
                
                # Use BeautifulSoup to parse the article page
                soup = BeautifulSoup(driver.page_source, 'html.parser')
                paragraphs = soup.find_all('p')  # Extract all paragraphs
                
                # Track whether any keywords were found in the article
                article_keywords = []
                article_paragraphs = []
                for paragraph in paragraphs:
                    for keyword in keywords:
                        if re.search(r'\b{}\b'.format(re.escape(keyword)), paragraph.get_text(), re.IGNORECASE):
                            article_keywords.append(keyword)
                            article_paragraphs.append(paragraph.get_text())
                            break  # Stop after finding the first matching keyword in a paragraph

                if article_keywords:
                    article_id = driver.find_element(By.ID, 'lblTitleId').text
                    article_ids.append(article_id)
                    keywords_found.extend(article_keywords)
                    similar_paragraphs.extend(article_paragraphs)

            except Exception as e:
                print(f"Error processing article {i + 1}: {e}")

            finally:
                # After processing each article, check if all results are processed
                if i + 1 == total_results:
                    processed_all_articles = True  # We have processed all articles
                else:
                    # Return to search results page if not the last article
                    driver.back()
                    print(f"Returned to search results for CPT code: {cpt_code}")
                    time.sleep(0.1)

                    # Explicit wait to ensure the page reloads
                    wait.until(EC.visibility_of_element_located((By.ID, 'searchResultsDiv')))
                    time.sleep(1)
        else:
            print(f"No article element found for index {i}. Skipping.")

    # Aggregate results into single row for the CPT code
    if article_ids:
        results.append({
            'cpt_code': cpt_code, 
            'article_id': ', '.join(article_ids), 
            'keyword': ', '.join(keywords_found), 
            'similar_paragraphs': '\n\n'.join(similar_paragraphs)
        })
    else:
        results.append({'cpt_code': cpt_code, 'article_id': '', 'keyword': '', 'similar_paragraphs': ''})

    return results


def main():
    input_file = 'input.xlsx'
    output_file = 'output.xlsx'

    # Load the input Excel file
    df = pd.read_excel(input_file)

    # Ensure the CPT_CODE column is treated as a string
    df['CPT_CODE'] = df['CPT_CODE'].astype(str).apply(lambda x: x.zfill(5))

    # Setup Selenium WebDriver
    driver = webdriver.Chrome()  # Adjust the driver if needed (e.g., Edge, Firefox)

    # Process each CPT code
    all_results = []
    for _, row in df.iterrows():
        cpt_code = row['CPT_CODE']
        driver.get('abc.com')
        results = process_cpt_code(driver, cpt_code)
        if results:  # Check if results are not empty
            all_results.extend(results)

    # Save results to a new Excel file
    results_df = pd.DataFrame(all_results)
    results_df.to_excel(output_file, index=False)
    print("Results written to Excel successfully!")

    # Close the driver
    driver.quit()
    print("Process completed successfully!")


if __name__ == '__main__':
    main()


Submitted CPT code: L4631
Search results loaded successfully.
Total results for CPT code L4631: 2
Processing article 1 of 2 for CPT code: L4631
Returned to search results for CPT code: L4631
Processing article 2 of 2 for CPT code: L4631
Error processing article 2: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF69B90FB05+28789]
	(No symbol) [0x00007FF69B8786E0]
	(No symbol) [0x00007FF69B71592A]
	(No symbol) [0x00007FF69B76930E]
	(No symbol) [0x00007FF69B7695FC]
	(No symbol) [0x00007FF69B7B28A7]
	(No symbol) [0x00007FF69B78F47F]
	(No symbol) [0x00007FF69B7AF654]
	(No symbol) [0x00007FF69B78F1E3]
	(No symbol) [0x00007FF69B75A938]
	(No symbol) [0x00007FF69B75BAA1]
	GetHandleVerifier [0x00007FF69BC4933D+3410093]
	GetHandleVerifier [0x00007FF69BC5E7DD+3497293]
	GetHandleVerifier [0x00007FF69BC52A73+3448803]
	GetHandleVerifier [0x00007FF69B9D7BBB+848171]
	(No symbol) [0x00007FF69B883C3F]
	(No symbol) [0x00007FF69B87F6E4]
	(No symbol) [0x00007FF69B87F87D]
	(No symbol) [0x00007FF69B86ED49]


NoSuchWindowException: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=131.0.6778.205)
Stacktrace:
	GetHandleVerifier [0x00007FF69B90FB05+28789]
	(No symbol) [0x00007FF69B8786E0]
	(No symbol) [0x00007FF69B71592A]
	(No symbol) [0x00007FF69B6EF505]
	(No symbol) [0x00007FF69B796477]
	(No symbol) [0x00007FF69B7AEF42]
	(No symbol) [0x00007FF69B78F1E3]
	(No symbol) [0x00007FF69B75A938]
	(No symbol) [0x00007FF69B75BAA1]
	GetHandleVerifier [0x00007FF69BC4933D+3410093]
	GetHandleVerifier [0x00007FF69BC5E7DD+3497293]
	GetHandleVerifier [0x00007FF69BC52A73+3448803]
	GetHandleVerifier [0x00007FF69B9D7BBB+848171]
	(No symbol) [0x00007FF69B883C3F]
	(No symbol) [0x00007FF69B87F6E4]
	(No symbol) [0x00007FF69B87F87D]
	(No symbol) [0x00007FF69B86ED49]
	BaseThreadInitThunk [0x00007FFAF6A97374+20]
	RtlUserThreadStart [0x00007FFAF741CC91+33]
