In [104]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.action_chains import ActionChains
from bs4 import BeautifulSoup
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
import json
import re


In [105]:
def search_dogs(URL, location, driver):
    try:
        # Navigating to the website
        driver.get(URL)

        # Giving the website some time to load (to ensure "Dogs" will set in the animal type search bar) 
        time.sleep(2)

        # Waiting for animal type search bar to load and be ready for interaction 
        form_type = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.ID, 'simpleSearchAnimalType'))
        )
        form_type.clear()
        form_type.send_keys("Dogs")

        # Finding the location search bar element and ensure it is ready for interaction 
        form_location = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.ID, 'simpleSearchLocation'))
        )
        form_location.clear()
        form_location.send_keys(location)
        
        # Allowing the location to be set before clicking the search button
        time.sleep(2)

        # Clicking the search button to submit the form
        search_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.ID, 'petSearchBarSearchButton'))
        )
        # search_button = driver.find_element(By.ID, 'petSearchBarSearchButton')
        search_button.click()

    except Exception as e:
        # Handling any errors that occur during the search process
        print(f"An error occurred while searching for adoptable dogs: {str(e)}")

In [94]:
driver = webdriver.Chrome()
URL = "https://www.petfinder.com"
location = "Atlanta, GA"
search_dogs(URL, location, driver)

In [129]:
def get_info(driver):
    try:

        # Ensuring the dog cards have time to load and be clicked upon 
        WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, '//a[@class="petCard-link"]'))
        )
        dog_cards = driver.find_elements(By.XPATH, '//a[@class="petCard-link"]')

        # Limit the number of dogs to process
        num_on_page = len(dog_cards)
        processed_dogs = 0

        dog_data_list = []

        # Looping through each dog card to extract information 
        while processed_dogs < num_on_page:
            try:
                # Re-finding the dog cards to avoid stale references
                WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable((By.XPATH, '//a[@class="petCard-link"]'))
                )
                dog_cards = driver.find_elements(By.XPATH, '//a[@class="petCard-link"]')

                # Click on the current dog card
                card = dog_cards[processed_dogs]
                driver.execute_script("arguments[0].scrollIntoView();", card)
                card.click()

                # Wait for the dog's detailed page to load
                WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, '//h1[@data-test="Pet_Detail_Block"]'))
                )

                # Finding the <pf-ad> element that contains the dogs details 
                pf_ad = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, '//pf-ad[contains(@id, "PetDetail")]'))
                )

                # Extracting 'targeting' attribute and load to a json file 
                targeting_data = pf_ad.get_attribute("targeting")
                dog_info = json.loads(targeting_data)

                # Extracted desired field names from dog_info 
                pet_id = dog_info.get('Pet_ID', 'N/A')
                pet_name = dog_info.get('Pet_Name', 'N/A')
                primary_breed = dog_info.get('Primary_Breed', 'N/A')
                secondary_breed = dog_info.get('Secondary_Breed', 'N/A')
                mixed_breed = dog_info.get('Mixed_Breed', 'N/A')
                age = dog_info.get('Age', 'N/A')
                gender = dog_info.get('Gender', 'N/A')
                size = dog_info.get('Size', 'N/A')
                primary_colour = dog_info.get('Primary_color', 'N/A')
                secondary_colour = dog_info.get('Secondary_color', 'N/A')
                coat_length = dog_info.get('Coat_length', 'N/A')
                shelter_name = dog_info.get('Shelter_Name', 'N/A')
                shelter_id = dog_info.get('Shelter_ID', 'N/A')
                zip_code = dog_info.get('Zip_Code', 'N/A')
                num_photos = dog_info.get('Number_of_photos_in_profile', 'N/A')
                children = dog_info.get('Good_with_children', 'N/A')
                cats = dog_info.get('Good_with_cats', 'N/A')
                other_dogs = dog_info.get('Good_with_dogs', 'N/A')
                other_animals = dog_info.get('Good_with_other_animals', 'N/A')
                fee_waived = dog_info.get('Adoption_fee_waived', 'N/A')

                # Extracting desired field names not in dog_info 
                try:
                    pet_location = driver.find_element('xpath', '//span[@data-test="Pet_Location"]').text
                except:
                    pet_location = 'N/A'

                try:
                    characteristics = driver.find_element('xpath', '//dt[contains(text(), "Characteristics")]/following-sibling::dd').text
                except:
                    characteristics = 'N/A'

                try:
                    house_trained = driver.find_element('xpath', '//dt[contains(text(), "House-trained")]/following-sibling::dd').text
                except:
                    house_trained = 'N/A'

                try:
                    health = driver.find_element('xpath', '//dt[contains(text(), "Health")]/following-sibling::dd').text
                except:
                    health = 'N/A'

                try:
                    adoption_fee = driver.find_element('xpath', '//dt[contains(text(), "Adoption fee")]/following-sibling::dd').text
                except:
                    try:
                        pet_story_section = driver.find_element('xpath', '//div[@data-test="Pet_Story_Section"]').text
                        adoption_fee_search = re.search(r'Adoption fee (\d+)', pet_story_section, re.IGNORECASE)

                        if adoption_fee_search:
                            adoption_fee = adoption_fee_search.group(1)
                        else:
                            adoption_fee = 'N/A'  # If the regex doesn't find a match
                    except:
                        # If neither method works, set the adoption fee to 'N/A'
                        adoption_fee = 'N/A'

                dog_data_list.append({
                    'pet_id': pet_id,
                    'pet_name': pet_name,
                    'primary_breed': primary_breed,
                    'secondary_breed': secondary_breed,
                    'mixed_breed': mixed_breed,
                    'age': age,
                    'gender': gender,
                    'size': size,
                    'primary_colour': primary_colour,
                    'secondary_colour': secondary_colour,
                    'coat_length': coat_length,
                    'shelter_name': shelter_name,
                    'shelter_id': shelter_id,
                    'zip_code': zip_code,
                    'num_photos': num_photos,
                    'children': children,
                    'cats': cats,
                    'other_dogs': other_dogs,
                    'other_animals': other_animals,
                    'fee_waived': fee_waived,
                    'pet_location': pet_location,
                    'characteristics': characteristics,
                    'house_trained': house_trained,
                    'health': health,
                    'adoption_fee': adoption_fee
                })

                # Navigate back to the results page
                driver.back()

                # Wait for the results page to reload
                WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, '//a[@class="petCard-link"]'))
                )
                
                # Increment the counter for processed dogs
                processed_dogs += 1

            except Exception as e:
                print(f"Error processing dog {processed_dogs}: {e}")
                break
        return dog_data_list
    finally:
        # Close the browser once finished
        driver.quit()

In [130]:
driver = webdriver.Chrome()
URL = "https://www.petfinder.com"
location = "Atlanta, GA"
search_dogs(URL, location, driver)

# Giving the dog page 5 seconds to load before clicking in to each card and extracting the information 
time.sleep(5)

dog_data_list = get_info(driver)


Error processing dog 39: list index out of range


In [120]:
print(dog_data_list)

[{'pet_id': '72096072', 'pet_name': 'Jackson', 'primary_breed': 'Boxer', 'secondary_breed': 'American Bulldog', 'mixed_breed': True, 'age': 'Senior', 'gender': 'Male', 'size': 'Medium', 'primary_colour': 'Brindle', 'secondary_colour': '', 'coat_length': 'Short', 'shelter_name': 'Freedom Bridge Animal Rescue', 'shelter_id': 'GA988', 'zip_code': '30303', 'num_photos': 5, 'children': '', 'cats': False, 'other_dogs': False, 'other_animals': '', 'fee_waived': False, 'pet_location': 'Atlanta , GA', 'characteristics': 'Friendly, Affectionate, Smart, Athletic, Food Motivated, Dog Reactive When On Leash', 'house_trained': 'Yes', 'health': 'Vaccinations up to date, spayed / neutered.', 'adoption_fee': 'N/A'}, {'pet_id': '73529097', 'pet_name': 'Marshmallow', 'primary_breed': 'Great Pyrenees', 'secondary_breed': '', 'mixed_breed': True, 'age': 'Baby', 'gender': 'Female', 'size': 'Large', 'primary_colour': '', 'secondary_colour': '', 'coat_length': '', 'shelter_name': 'Carolina German Shepherd Res

In [121]:
df_dog_info = pd.DataFrame(dog_data_list)

In [122]:
display(df_dog_info)

Unnamed: 0,pet_id,pet_name,primary_breed,secondary_breed,mixed_breed,age,gender,size,primary_colour,secondary_colour,...,children,cats,other_dogs,other_animals,fee_waived,pet_location,characteristics,house_trained,health,adoption_fee
0,72096072,Jackson,Boxer,American Bulldog,True,Senior,Male,Medium,Brindle,,...,,False,False,,False,"Atlanta , GA","Friendly, Affectionate, Smart, Athletic, Food ...",Yes,"Vaccinations up to date, spayed / neutered.",
1,73529097,Marshmallow,Great Pyrenees,,True,Baby,Female,Large,,,...,True,True,True,,False,"Atlanta , GA",,,Vaccinations up to date.,500
2,73491201,Bowie,Labrador Retriever,Mixed Breed,True,Baby,Male,Large,,,...,True,,True,,False,"Atlanta , GA",,Yes,Vaccinations up to date.,450
3,70952437,Ezra - Pending,Italian Greyhound,American Bulldog,True,Adult,Male,Medium,,,...,,True,False,,False,"Atlanta, GA",,Yes,"Vaccinations up to date, spayed / neutered.",
4,73327856,Athena,Great Dane,Black Labrador Retriever,True,Young,Female,Extra Large,Black,,...,True,False,True,False,False,"Atlanta, GA",,Yes,"Vaccinations up to date, spayed / neutered.",$600.00
5,73327823,Sven,Great Dane,Doberman Pinscher,True,Adult,Male,Extra Large,Brown / Chocolate,,...,True,,True,,False,"Atlanta, GA",,Yes,"Vaccinations up to date, spayed / neutered.",$600.00
6,72600711,Storm,Great Dane,,False,Adult,Male,Extra Large,Merle (Blue),,...,False,False,True,,False,"Atlanta, GA",,Yes,"Vaccinations up to date, spayed / neutered.",$250.00
7,72455199,Dio,Black Labrador Retriever,Great Dane,True,Adult,Male,Large,Black,,...,True,,True,,False,"Atlanta, GA","Affectionate, Athletic, Curious, Friendly, Fun...",Yes,"Vaccinations up to date, spayed / neutered.",$300.00
8,59347792,Freddy,Plott Hound,,True,Adult,Male,Medium,Brindle,,...,True,,True,,False,"Atlanta, GA","Friendly, Affectionate, Loyal, Playful, Smart,...",Yes,"Vaccinations up to date, spayed / neutered.",$200.00
9,73641063,Eleanor,Treeing Walker Coonhound,Black Mouth Cur,True,Young,Female,Medium,Brindle,Golden,...,True,False,True,,False,"Atlanta, GA","Affectionate, Playful, Loves, Friendly, Gentle...",Yes,"Vaccinations up to date, spayed / neutered.",$250.00


In [146]:
def get_info3(driver):
    try:

        # Ensuring the dog cards have time to load and be clicked upon 
        WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, '//a[@class="petCard-link"]'))
        )
        dog_cards = driver.find_elements(By.XPATH, '//a[@class="petCard-link"]')

        # Limit the number of dogs to process
        max_pages = 10  # You can adjust this number as needed
        page_counter = 0
        processed_dogs = 0

        dog_data_list = []

        # Looping through each dog card to extract information 
        while True:
            # Check if the maximum number of pages has been reached
            if page_counter >= max_pages:
                print(f"Reached the maximum of {max_pages} pages. Stopping.")
                break

            # Your code for processing the current page
            print(f"Processing page {page_counter + 1}...")
            try:
                # Re-finding the dog cards to avoid stale references
                WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable((By.XPATH, '//a[@class="petCard-link"]'))
                )
                dog_cards = driver.find_elements(By.XPATH, '//a[@class="petCard-link"]')

                # Click on the current dog card
                card = dog_cards[processed_dogs]
                driver.execute_script("arguments[0].scrollIntoView();", card)
                card.click()

                # Wait for the dog's detailed page to load
                WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, '//h1[@data-test="Pet_Detail_Block"]'))
                )

                # Finding the <pf-ad> element that contains the dogs details 
                pf_ad = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, '//pf-ad[contains(@id, "PetDetail")]'))
                )

                # Extracting 'targeting' attribute and load to a json file 
                targeting_data = pf_ad.get_attribute("targeting")
                dog_info = json.loads(targeting_data)

                # Extracted desired field names from dog_info 
                pet_id = dog_info.get('Pet_ID', 'N/A')
                pet_name = dog_info.get('Pet_Name', 'N/A')
                primary_breed = dog_info.get('Primary_Breed', 'N/A')
                secondary_breed = dog_info.get('Secondary_Breed', 'N/A')
                mixed_breed = dog_info.get('Mixed_Breed', 'N/A')
                age = dog_info.get('Age', 'N/A')
                gender = dog_info.get('Gender', 'N/A')
                size = dog_info.get('Size', 'N/A')
                primary_colour = dog_info.get('Primary_color', 'N/A')
                secondary_colour = dog_info.get('Secondary_color', 'N/A')
                coat_length = dog_info.get('Coat_length', 'N/A')
                shelter_name = dog_info.get('Shelter_Name', 'N/A')
                shelter_id = dog_info.get('Shelter_ID', 'N/A')
                zip_code = dog_info.get('Zip_Code', 'N/A')
                num_photos = dog_info.get('Number_of_photos_in_profile', 'N/A')
                children = dog_info.get('Good_with_children', 'N/A')
                cats = dog_info.get('Good_with_cats', 'N/A')
                other_dogs = dog_info.get('Good_with_dogs', 'N/A')
                other_animals = dog_info.get('Good_with_other_animals', 'N/A')
                fee_waived = dog_info.get('Adoption_fee_waived', 'N/A')

                # Extracting desired field names not in dog_info 
                try:
                    pet_location = driver.find_element('xpath', '//span[@data-test="Pet_Location"]').text
                except:
                    pet_location = 'N/A'

                try:
                    characteristics = driver.find_element('xpath', '//dt[contains(text(), "Characteristics")]/following-sibling::dd').text
                except:
                    characteristics = 'N/A'

                try:
                    house_trained = driver.find_element('xpath', '//dt[contains(text(), "House-trained")]/following-sibling::dd').text
                except:
                    house_trained = 'N/A'

                try:
                    health = driver.find_element('xpath', '//dt[contains(text(), "Health")]/following-sibling::dd').text
                except:
                    health = 'N/A'

                try:
                    adoption_fee = driver.find_element('xpath', '//dt[contains(text(), "Adoption fee")]/following-sibling::dd').text
                except:
                    try:
                        pet_story_section = driver.find_element('xpath', '//div[@data-test="Pet_Story_Section"]').text
                        adoption_fee_search = re.search(r'Adoption fee (\d+)', pet_story_section, re.IGNORECASE)

                        if adoption_fee_search:
                            adoption_fee = adoption_fee_search.group(1)
                        else:
                            adoption_fee = 'N/A'  # If the regex doesn't find a match
                    except:
                        # If neither method works, set the adoption fee to 'N/A'
                        adoption_fee = 'N/A'

                dog_data_list.append({
                    'pet_id': pet_id,
                    'pet_name': pet_name,
                    'primary_breed': primary_breed,
                    'secondary_breed': secondary_breed,
                    'mixed_breed': mixed_breed,
                    'age': age,
                    'gender': gender,
                    'size': size,
                    'primary_colour': primary_colour,
                    'secondary_colour': secondary_colour,
                    'coat_length': coat_length,
                    'shelter_name': shelter_name,
                    'shelter_id': shelter_id,
                    'zip_code': zip_code,
                    'num_photos': num_photos,
                    'children': children,
                    'cats': cats,
                    'other_dogs': other_dogs,
                    'other_animals': other_animals,
                    'fee_waived': fee_waived,
                    'pet_location': pet_location,
                    'characteristics': characteristics,
                    'house_trained': house_trained,
                    'health': health,
                    'adoption_fee': adoption_fee
                })

                # Navigate back to the results page
                driver.back()

                # Wait for the results page to reload
                WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, '//a[@class="petCard-link"]'))
                )

                page_counter += 1
                processed_dogs += 1
                # Click to the next page, if applicable
                next_button = WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable((By.XPATH, '//button[span[@class="fieldBtn-label" and text()="Next"]]'))
                )

                driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
                next_button.click()

            except Exception as e:
                print(f"Error processing dog {page_counter}: {e}")
                break
        return dog_data_list
    finally:
        # Close the browser once finished
        driver.quit()

In [147]:
driver = webdriver.Chrome()
URL = "https://www.petfinder.com"
location = "Atlanta, GA"
search_dogs(URL, location, driver)

# Giving the dog page 5 seconds to load before clicking in to each card and extracting the information 
time.sleep(5)

dog_data_list2 = get_info3(driver)

Processing page 1...
CHECK1
CHECK2
CHECK3
CHECK4
CHECK5
CHECK6
CHECK7
Processing page 2...
CHECK1
CHECK2
CHECK3
CHECK4
CHECK5
CHECK6
CHECK7
Processing page 3...
CHECK1
CHECK2
CHECK3
CHECK4
CHECK5
CHECK6
CHECK7
Processing page 4...
CHECK1
CHECK2
CHECK3
CHECK4
CHECK5
CHECK6
CHECK7
Processing page 5...
CHECK1
CHECK2
CHECK3
CHECK4
CHECK5
CHECK6
CHECK7
Processing page 6...
CHECK1
CHECK2
CHECK3
CHECK4
CHECK5
CHECK6
CHECK7
Processing page 7...
CHECK1
CHECK2
CHECK3
CHECK4
CHECK5
CHECK6
CHECK7
Processing page 8...
CHECK1
CHECK2
CHECK3
CHECK4
CHECK5
CHECK6
CHECK7
Processing page 9...
CHECK1
CHECK2
CHECK3
CHECK4
CHECK5
CHECK6
CHECK7
Processing page 10...
CHECK1
CHECK2
CHECK3
CHECK4
CHECK5
CHECK6
CHECK7
Reached the maximum of 10 pages. Stopping.


In [152]:
def get_info4(driver):
    try:
        max_pages = 2  # Adjust the number of pages to process
        page_counter = 0

        dog_data_list = []

        while page_counter < max_pages:
            print(f"Processing page {page_counter + 1}...")

            # Re-finding the dog cards to avoid stale references
            WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.XPATH, '//a[@class="petCard-link"]'))
            )
            dog_cards = driver.find_elements(By.XPATH, '//a[@class="petCard-link"]')

            # Loop through each dog card on the page
            for i in range(len(dog_cards)):
                try:
                    # Re-fetch the dog cards after navigating back to avoid stale elements
                    dog_cards = driver.find_elements(By.XPATH, '//a[@class="petCard-link"]')
                    card = dog_cards[i]
                    # Click on each dog card
                    driver.execute_script("arguments[0].scrollIntoView();", card)
                    card.click()

                    # Wait for the dog's detail page to load
                    WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.XPATH, '//h1[@data-test="Pet_Detail_Block"]'))
                    )
                    
                    # Finding the <pf-ad> element that contains the dogs details 
                    pf_ad = WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.XPATH, '//pf-ad[contains(@id, "PetDetail")]'))
                    )

                    # Extracting 'targeting' attribute and load to a json file 
                    targeting_data = pf_ad.get_attribute("targeting")
                    dog_info = json.loads(targeting_data)

                    # Extracted desired field names from dog_info 
                    pet_id = dog_info.get('Pet_ID', 'N/A')
                    pet_name = dog_info.get('Pet_Name', 'N/A')
                    primary_breed = dog_info.get('Primary_Breed', 'N/A')
                    secondary_breed = dog_info.get('Secondary_Breed', 'N/A')
                    mixed_breed = dog_info.get('Mixed_Breed', 'N/A')
                    age = dog_info.get('Age', 'N/A')
                    gender = dog_info.get('Gender', 'N/A')
                    size = dog_info.get('Size', 'N/A')
                    primary_colour = dog_info.get('Primary_color', 'N/A')
                    secondary_colour = dog_info.get('Secondary_color', 'N/A')
                    coat_length = dog_info.get('Coat_length', 'N/A')
                    shelter_name = dog_info.get('Shelter_Name', 'N/A')
                    shelter_id = dog_info.get('Shelter_ID', 'N/A')
                    zip_code = dog_info.get('Zip_Code', 'N/A')
                    num_photos = dog_info.get('Number_of_photos_in_profile', 'N/A')
                    children = dog_info.get('Good_with_children', 'N/A')
                    cats = dog_info.get('Good_with_cats', 'N/A')
                    other_dogs = dog_info.get('Good_with_dogs', 'N/A')
                    other_animals = dog_info.get('Good_with_other_animals', 'N/A')
                    fee_waived = dog_info.get('Adoption_fee_waived', 'N/A')

                    # Extracting desired field names not in dog_info 
                    try:
                        pet_location = driver.find_element('xpath', '//span[@data-test="Pet_Location"]').text
                    except:
                        pet_location = 'N/A'

                    try:
                        characteristics = driver.find_element('xpath', '//dt[contains(text(), "Characteristics")]/following-sibling::dd').text
                    except:
                        characteristics = 'N/A'

                    try:
                        house_trained = driver.find_element('xpath', '//dt[contains(text(), "House-trained")]/following-sibling::dd').text
                    except:
                        house_trained = 'N/A'

                    try:
                        health = driver.find_element('xpath', '//dt[contains(text(), "Health")]/following-sibling::dd').text
                    except:
                        health = 'N/A'

                    try:
                        adoption_fee = driver.find_element('xpath', '//dt[contains(text(), "Adoption fee")]/following-sibling::dd').text
                    except:
                        try:
                            pet_story_section = driver.find_element('xpath', '//div[@data-test="Pet_Story_Section"]').text
                            adoption_fee_search = re.search(r'Adoption fee (\d+)', pet_story_section, re.IGNORECASE)

                            if adoption_fee_search:
                                adoption_fee = adoption_fee_search.group(1)
                            else:
                                adoption_fee = 'N/A'  # If the regex doesn't find a match
                        except:
                            # If neither method works, set the adoption fee to 'N/A'
                            adoption_fee = 'N/A'

                    dog_data_list.append({
                        'pet_id': pet_id,
                        'pet_name': pet_name,
                        'primary_breed': primary_breed,
                        'secondary_breed': secondary_breed,
                        'mixed_breed': mixed_breed,
                        'age': age,
                        'gender': gender,
                        'size': size,
                        'primary_colour': primary_colour,
                        'secondary_colour': secondary_colour,
                        'coat_length': coat_length,
                        'shelter_name': shelter_name,
                        'shelter_id': shelter_id,
                        'zip_code': zip_code,
                        'num_photos': num_photos,
                        'children': children,
                        'cats': cats,
                        'other_dogs': other_dogs,
                        'other_animals': other_animals,
                        'fee_waived': fee_waived,
                        'pet_location': pet_location,
                        'characteristics': characteristics,
                        'house_trained': house_trained,
                        'health': health,
                        'adoption_fee': adoption_fee
                    })


                    # Navigate back to the results page
                    driver.back()

                    # Wait for the results page to reload
                    WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.XPATH, '//a[@class="petCard-link"]'))
                    )

                except Exception as e:
                    print(f"Error processing dog {i + 1} on page {page_counter + 1}: {e}")
                    continue  # Continue with the next dog if there's an error

            # Move to the next page
            try:
                # Find and click the Next button
                next_button = WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable((By.XPATH, '//button[span[@class="fieldBtn-label" and text()="Next"]]'))
                )
                driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
                next_button.click()

                # Wait for the next page to load
                WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, '//a[@class="petCard-link"]'))
                )

            except Exception as e:
                print(f"Error moving to the next page: {e}")
                break  # Stop if you can't move to the next page

            # Increment the page counter after successfully moving to the next page
            page_counter += 1

        return dog_data_list

    finally:
        # Close the browser once finished
        driver.quit()

In [153]:
driver = webdriver.Chrome()
URL = "https://www.petfinder.com"
location = "Atlanta, GA"
search_dogs(URL, location, driver)

# Giving the dog page 5 seconds to load before clicking in to each card and extracting the information 
time.sleep(5)

dog_data_list2 = get_info4(driver)

Processing page 1...
Error processing dog 40 on page 1: list index out of range
Processing page 2...
Error processing dog 40 on page 2: list index out of range


In [None]:
dog_cards = driver.find_elements(By.XPATH, '//a[@class="petCard-link"]')

# Limit the number of dogs to process
max_dogs = 10

# Step 2: Loop through each card, click on it, extract info, and go back (limit to 10)
for i, card in enumerate(dog_cards[:max_dogs]):
    # Scroll into view and click on the card
    driver.execute_script("arguments[0].scrollIntoView();", card)
    card.click()

    # Step 3: Wait for the dog's detailed page to load
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, '//h1[@data-test="Pet_Detail_Block"]'))
    )

    # Step 4: Extract relevant information with try-except blocks to handle missing data
    try:
        pet_name = driver.find_element(By.XPATH, '//span[@data-test="Pet_Name"]').text
    except:
        pet_name = 'N/A'

    try:
        pet_breeds = driver.find_element(By.XPATH, '//span[@data-test="Pet_Breeds"]').text
    except:
        pet_breeds = 'N/A'

    try:
        pet_location = driver.find_element(By.XPATH, '//span[@data-test="Pet_Location"]').text
    except:
        pet_location = 'N/A'

    try:
        pet_age = driver.find_element(By.XPATH, '//span[@data-test="Pet_Age"]').text
    except:
        pet_age = 'N/A'

    try:
        pet_sex = driver.find_element(By.XPATH, '//span[@data-test="Pet_Sex"]').text
    except:
        pet_sex = 'N/A'

    try:
        pet_size = driver.find_element(By.XPATH, '//span[@data-test="Pet_Full_Grown_Size"]').text
    except:
        pet_size = 'N/A'

    try:
        pet_color = driver.find_element(By.XPATH, '//span[@data-test="Pet_Primary_Color"]').text
    except:
        pet_color = 'N/A'

    try:
        characteristics = driver.find_element(By.XPATH, '//dt[contains(text(), "Characteristics")]/following-sibling::dd').text
    except:
        characteristics = 'N/A'

    try:
        coat_length = driver.find_element(By.XPATH, '//dt[contains(text(), "Coat length")]/following-sibling::dd').text
    except:
        coat_length = 'N/A'

    try:
        house_trained = driver.find_element(By.XPATH, '//dt[contains(text(), "House-trained")]/following-sibling::dd').text
    except:
        house_trained = 'N/A'

    try:
        health = driver.find_element(By.XPATH, '//dt[contains(text(), "Health")]/following-sibling::dd').text
    except:
        health = 'N/A'

    try:
        prefers_without = driver.find_element(By.XPATH, '//dt[contains(text(), "Prefers a home without")]/following-sibling::dd').text
    except:
        prefers_without = 'N/A'

    # Print or store the extracted details
    print(f"Pet Name: {pet_name}")
    print(f"Breeds: {pet_breeds}")
    print(f"Location: {pet_location}")
    print(f"Age: {pet_age}")
    print(f"Sex: {pet_sex}")
    print(f"Size: {pet_size}")
    print(f"Color: {pet_color}")
    print(f"Characteristics: {characteristics}")
    print(f"Coat Length: {coat_length}")
    print(f"House-Trained: {house_trained}")
    print(f"Health: {health}")
    print(f"Prefers a Home Without: {prefers_without}")
    print("=" * 40)

    # Step 5: Navigate back to the search results page
    driver.back()

    # Wait for the results page to reload and find the dog cards again
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, '//a[@class="petCard-link"]'))
    )
    
    # Re-find the cards since the page was refreshed after clicking back
    dog_cards = driver.find_elements(By.XPATH, '//a[@class="petCard-link"]')


Pet Name: Jackson
Breeds: Boxer & American Bulldog Mix
Location: Atlanta , GA
Age: Senior
Sex: Male
Size: Medium
Color: Brindle
Characteristics: Friendly, Affectionate, Smart, Athletic, Food Motivated, Dog Reactive When On Leash
Coat Length: Short
House-Trained: Yes
Health: Vaccinations up to date, spayed / neutered.
Prefers a Home Without: Other dogs, cats.


StaleElementReferenceException: Message: stale element reference: stale element not found
  (Session info: chrome=129.0.6668.103); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#stale-element-reference-exception
Stacktrace:
0   chromedriver                        0x00000001032f8500 cxxbridge1$str$ptr + 1917112
1   chromedriver                        0x00000001032f0890 cxxbridge1$str$ptr + 1885256
2   chromedriver                        0x0000000102f00538 cxxbridge1$string$len + 89424
3   chromedriver                        0x0000000102f0ded8 cxxbridge1$string$len + 145136
4   chromedriver                        0x0000000102f05c80 cxxbridge1$string$len + 111768
5   chromedriver                        0x0000000102f05d94 cxxbridge1$string$len + 112044
6   chromedriver                        0x0000000102f045f8 cxxbridge1$string$len + 106000
7   chromedriver                        0x0000000102f0700c cxxbridge1$string$len + 116772
8   chromedriver                        0x0000000102f7f7ac cxxbridge1$string$len + 610244
9   chromedriver                        0x0000000102f7eb7c cxxbridge1$string$len + 607124
10  chromedriver                        0x0000000102f39374 cxxbridge1$string$len + 322444
11  chromedriver                        0x0000000102f39fc4 cxxbridge1$string$len + 325596
12  chromedriver                        0x00000001032bfd2c cxxbridge1$str$ptr + 1685732
13  chromedriver                        0x00000001032c4530 cxxbridge1$str$ptr + 1704168
14  chromedriver                        0x00000001032a4e08 cxxbridge1$str$ptr + 1575360
15  chromedriver                        0x00000001032c4e00 cxxbridge1$str$ptr + 1706424
16  chromedriver                        0x0000000103295f94 cxxbridge1$str$ptr + 1514316
17  chromedriver                        0x00000001032e162c cxxbridge1$str$ptr + 1823204
18  chromedriver                        0x00000001032e17ac cxxbridge1$str$ptr + 1823588
19  chromedriver                        0x00000001032f0530 cxxbridge1$str$ptr + 1884392
20  libsystem_pthread.dylib             0x00000001804eb034 _pthread_start + 136
21  libsystem_pthread.dylib             0x00000001804e5e3c thread_start + 8


In [90]:
driver = webdriver.Chrome()
URL = "https://www.petfinder.com"
location = "Atlanta, GA"
search_dogs(URL, location, driver)

# Giving the dog page 5 seconds to load before clicking in to each card and extracting the information 
time.sleep(5)

get_info(driver)

KeyboardInterrupt: 

In [74]:
def adoptable_dogs(URL, location):
    try:
        # Setting up webdriver
        driver = webdriver.Chrome()

        # Navigating to the website
        driver.get(URL)

        # Giving website some time to load (fixing time)
        time.sleep(2)

        # Having the driver wait animal type search bar is visible (searching for element by its id)
        form_type = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.ID, 'simpleSearchAnimalType'))
        )
        # Clearing the search bar just in case there is previous text 
        form_type.clear()
        # Inputing "Dogs" into the search bar for animal type 
        form_type.send_keys("Dogs")

        
        # Searching for the location search bar element by its id
        form_location = driver.find_element(By.ID, 'simpleSearchLocation') 

        # form_location = WebDriverWait(driver, 10).until(
        #     EC.element_to_be_clickable((By.ID, 'simpleSearchLocation'))
        # )
        # Clearing the search bar just in case there is previous text 
        form_location.clear()
        # Inputing the location parameter into the search bar for animal type 
        form_location.send_keys(location)
        
        # Giving the location a second to settle into the search bar  
        time.sleep(2)


        search_button = driver.find_element(By.ID, 'petSearchBarSearchButton') 
        # Click the search button
        # search_button = WebDriverWait(driver, 10).until(
        #     EC.element_to_be_clickable((By.ID, 'petSearchBarSearchButton'))
        # )
        search_button.click()

    except Exception as e:
        # Custom exception message with the actual error that occurred
        print(f"An error occurred while trying to search for adoptable dogs: {str(e)}")


In [68]:
def get_info(Driver):
# Step 1: Locate the dog cards
    dog_cards = driver.find_elements(By.XPATH, '//a[@class="petCard-link"]')

    # Limit the number of dogs to process
    max_dogs = 10
    processed_dogs = 0  # Keep track of how many dogs have been processed

    # Step 2: Loop through each card, click on it, extract info, and go back (limit to 10)
    while processed_dogs < max_dogs:
        try:
            # Step 2a: Re-locate the dog cards on each loop iteration to avoid stale references
            dog_cards = driver.find_elements(By.XPATH, '//a[@class="petCard-link"]')

            # Click on the dog card based on the index
            card = dog_cards[processed_dogs]
            
            # Scroll into view and click on the card
            driver.execute_script("arguments[0].scrollIntoView();", card)
            card.click()

            # Step 3: Wait for the dog's detailed page to load
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.XPATH, '//h1[@data-test="Pet_Detail_Block"]'))
            )

            # Step 4: Extract relevant information with try-except blocks to handle missing data
            try:
                pet_name = driver.find_element(By.XPATH, '//span[@data-test="Pet_Name"]').text
            except:
                pet_name = 'N/A'

            try:
                pet_breeds = driver.find_element(By.XPATH, '//span[@data-test="Pet_Breeds"]').text
            except:
                pet_breeds = 'N/A'

            try:
                pet_location = driver.find_element(By.XPATH, '//span[@data-test="Pet_Location"]').text
            except:
                pet_location = 'N/A'

            try:
                pet_age = driver.find_element(By.XPATH, '//span[@data-test="Pet_Age"]').text
            except:
                pet_age = 'N/A'

            try:
                pet_sex = driver.find_element(By.XPATH, '//span[@data-test="Pet_Sex"]').text
            except:
                pet_sex = 'N/A'

            try:
                pet_size = driver.find_element(By.XPATH, '//span[@data-test="Pet_Full_Grown_Size"]').text
            except:
                pet_size = 'N/A'

            try:
                pet_color = driver.find_element(By.XPATH, '//span[@data-test="Pet_Primary_Color"]').text
            except:
                pet_color = 'N/A'

            try:
                characteristics = driver.find_element(By.XPATH, '//dt[contains(text(), "Characteristics")]/following-sibling::dd').text
            except:
                characteristics = 'N/A'

            try:
                coat_length = driver.find_element(By.XPATH, '//dt[contains(text(), "Coat length")]/following-sibling::dd').text
            except:
                coat_length = 'N/A'

            try:
                house_trained = driver.find_element(By.XPATH, '//dt[contains(text(), "House-trained")]/following-sibling::dd').text
            except:
                house_trained = 'N/A'

            try:
                health = driver.find_element(By.XPATH, '//dt[contains(text(), "Health")]/following-sibling::dd').text
            except:
                health = 'N/A'

            try:
                prefers_without = driver.find_element(By.XPATH, '//dt[contains(text(), "Prefers a home without")]/following-sibling::dd').text
            except:
                prefers_without = 'N/A'

            try:
                prefers_with = driver.find_element(By.XPATH, '//dt[contains(text(), "Good in a home with")]/following-sibling::dd').text
            except:
                prefers_with = 'N/A'

            try:
                adoption_fee = driver.find_element(By.XPATH, '//dt[contains(text(), "Adoption fee")]/following-sibling::dd').text
            except:
                adoption_fee = 'N/A'

            # Print or store the extracted details
            print(f"Pet Name: {pet_name}")
            print(f"Breeds: {pet_breeds}")
            print(f"Location: {pet_location}")
            print(f"Age: {pet_age}")
            print(f"Sex: {pet_sex}")
            print(f"Size: {pet_size}")
            print(f"Color: {pet_color}")
            print(f"Characteristics: {characteristics}")
            print(f"Coat Length: {coat_length}")
            print(f"House-Trained: {house_trained}")
            print(f"Health: {health}")
            print(f"Prefers a Home Without: {prefers_without}")
            print("=" * 40)

            # Step 5: Navigate back to the search results page
            driver.back()

            # Wait for the results page to reload
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.XPATH, '//a[@class="petCard-link"]'))
            )

            # Increment the processed dogs counter
            processed_dogs += 1

        except Exception as e:
            print(f"Error processing dog {processed_dogs}: {e}")
            break

# Close the driver once done
driver.quit()

Pet Name: Jackson
Breeds: Boxer & American Bulldog Mix
Location: Atlanta , GA
Age: Senior
Sex: Male
Size: Medium
Color: Brindle
Characteristics: Friendly, Affectionate, Smart, Athletic, Food Motivated, Dog Reactive When On Leash
Coat Length: Short
House-Trained: Yes
Health: Vaccinations up to date, spayed / neutered.
Prefers a Home Without: Other dogs, cats.
Pet Name: Marshmallow
Breeds: Great Pyrenees Mix
Location: Atlanta , GA
Age: Puppy
Sex: Female
Size: Large
Color: N/A
Characteristics: N/A
Coat Length: N/A
House-Trained: N/A
Health: Vaccinations up to date.
Prefers a Home Without: N/A
Pet Name: Candy
Breeds: Chiweenie Mix
Location: Atlanta , GA
Age: Puppy
Sex: Female
Size: Small
Color: N/A
Characteristics: N/A
Coat Length: N/A
House-Trained: N/A
Health: Vaccinations up to date.
Prefers a Home Without: N/A
Pet Name: Ezra - Pending
Breeds: Italian Greyhound & American Bulldog Mix
Location: Atlanta, GA
Age: Adult
Sex: Male
Size: Medium
Color: N/A
Characteristics: N/A
Coat Length: N/A

In [73]:
# Step 1: Locate the dog cards
dog_cards = driver.find_elements(By.XPATH, '//a[@class="petCard-link"]')

# Limit the number of dogs to process (adjust as needed)
max_dogs = 10
processed_dogs = 0  # Counter to track processed dogs

# Step 2: Loop through each card, click on it, extract info, and go back (limit to max_dogs)
while processed_dogs < max_dogs:
    try:
        # Step 2a: Re-locate the dog cards to avoid stale elements
        dog_cards = driver.find_elements(By.XPATH, '//a[@class="petCard-link"]')

        # Click on the dog card based on the index
        card = dog_cards[processed_dogs]

        # Scroll into view and click on the card
        driver.execute_script("arguments[0].scrollIntoView();", card)
        card.click()

        # Step 3: Wait for the dog's detailed page to load
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, '//h1[@data-test="Pet_Detail_Block"]'))
        )

        # Step 4: Locate the <pf-ad> element containing dog details
        pf_ad = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, '//pf-ad[contains(@id, "PetDetail")]'))
        )

        # Step 5: Extract the 'targeting' attribute and parse it
        targeting_data = pf_ad.get_attribute("targeting")
        decoded_data = html.unescape(targeting_data)

        # Parse the decoded data into a dictionary
        dog_info = json.loads(decoded_data)

        # Step 6: Extract individual fields from the dictionary (using get() for missing data)
        pet_id = dog_info.get('Pet_ID', 'N/A')
        pet_name = dog_info.get('Pet_Name', 'N/A')
        pet_type = dog_info.get('Type', 'N/A')
        species = dog_info.get('Species', 'N/A')
        primary_breed = dog_info.get('Primary_Breed', 'N/A')
        secondary_breed = dog_info.get('Secondary_Breed', 'N/A')
        mixed_breed = dog_info.get('Mixed_Breed', 'N/A')
        age = dog_info.get('Age', 'N/A')
        gender = dog_info.get('Gender', 'N/A')
        characteristics = dog_info.get('Characteristics', 'N/A')
        size = dog_info.get('Size', 'N/A')
        primary_color = dog_info.get('Primary_color', 'N/A')
        coat_length = dog_info.get('Coat_length', 'N/A')
        shelter_name = dog_info.get('Shelter_Name', 'N/A')
        shelter_id = dog_info.get('Shelter_ID', 'N/A')
        zip_code = dog_info.get('Zip_Code', 'N/A')
        adoption_fee = dog_info.get('Adoption_fee', 'N/A')

        # Print the extracted information (you can save or store it as needed)
        print(f"Pet ID: {pet_id}")
        print(f"Pet Name: {pet_name}")
        print(f"Type: {pet_type}")
        print(f"Species: {species}")
        print(f"Primary Breed: {primary_breed}")
        print(f"Secondary Breed: {secondary_breed}")
        print(f"Mixed Breed: {mixed_breed}")
        print(f"Age: {age}")
        print(f"Gender: {gender}")
        print(f"Characteristics: {characteristics}")
        print(f"Size: {size}")
        print(f"Primary Color: {primary_color}")
        print(f"Coat Length: {coat_length}")
        print(f"Shelter Name: {shelter_name}")
        print(f"Shelter ID: {shelter_id}")
        print(f"Zip Code: {zip_code}")
        print(f"Adoption Fee: {adoption_fee}")
        print("=" * 40)

        # Step 7: Navigate back to the search results page
        driver.back()

        # Wait for the search results page to reload
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, '//a[@class="petCard-link"]'))
        )

        # Increment the processed dogs counter
        processed_dogs += 1

    except Exception as e:
        print(f"Error processing dog {processed_dogs}: {e}")
        break

# Close the driver once done
driver.quit()

Error processing dog 0: 'str' object has no attribute 'unescape'


In [75]:

# Step 1: Locate the dog cards
dog_cards = driver.find_elements(By.XPATH, '//a[@class="petCard-link"]')

# Limit the number of dogs to process (adjust as needed)
max_dogs = 10
processed_dogs = 0  # Counter to track processed dogs

# Step 2: Loop through each card, click on it, extract info, and go back (limit to max_dogs)
while processed_dogs < max_dogs:
    try:
        # Step 2a: Re-locate the dog cards to avoid stale elements
        dog_cards = driver.find_elements(By.XPATH, '//a[@class="petCard-link"]')

        # Click on the dog card based on the index
        card = dog_cards[processed_dogs]

        # Scroll into view and click on the card
        driver.execute_script("arguments[0].scrollIntoView();", card)
        card.click()

        # Step 3: Wait for the dog's detailed page to load
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, '//h1[@data-test="Pet_Detail_Block"]'))
        )

        # Step 4: Locate the <pf-ad> element containing dog details
        pf_ad = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, '//pf-ad[contains(@id, "PetDetail")]'))
        )

        # Step 5: Extract the 'targeting' attribute and parse it (no unescape needed)
        targeting_data = pf_ad.get_attribute("targeting")

        # Parse the targeting data directly into a dictionary
        dog_info = json.loads(targeting_data)

        # Step 6: Extract individual fields from the dictionary (using get() for missing data)
        pet_id = dog_info.get('Pet_ID', 'N/A')
        pet_name = dog_info.get('Pet_Name', 'N/A')
        pet_type = dog_info.get('Type', 'N/A')
        species = dog_info.get('Species', 'N/A')
        primary_breed = dog_info.get('Primary_Breed', 'N/A')
        secondary_breed = dog_info.get('Secondary_Breed', 'N/A')
        mixed_breed = dog_info.get('Mixed_Breed', 'N/A')
        age = dog_info.get('Age', 'N/A')
        gender = dog_info.get('Gender', 'N/A')
        characteristics = dog_info.get('Characteristics', 'N/A')
        size = dog_info.get('Size', 'N/A')
        primary_color = dog_info.get('Primary_color', 'N/A')
        coat_length = dog_info.get('Coat_length', 'N/A')
        shelter_name = dog_info.get('Shelter_Name', 'N/A')
        shelter_id = dog_info.get('Shelter_ID', 'N/A')
        zip_code = dog_info.get('Zip_Code', 'N/A')


            try:
                pet_story_section = driver.find_element(By.XPATH, '//div[@data-test="Pet_Story_Section"]').text
                # Use regular expression to find "Adoption fee" followed by a number
                adoption_fee_search = re.search(r'Adoption fee (\d+)', pet_story_section, re.IGNORECASE)
                if adoption_fee_search:
                    adoption_fee = adoption_fee_search.group(1)
                else:
                    adoption_fee = "Not Available"
            except:
                adoption_fee = "Not Available"

        
        try:
            adoption_fee = driver.find_element(By.XPATH, '//dt[contains(text(), "Adoption fee")]/following-sibling::dd').text
        except:
            adoption_fee = 'N/A'


        # Print the extracted information (you can save or store it as needed)
        print(f"Pet ID: {pet_id}")
        print(f"Pet Name: {pet_name}")
        print(f"Type: {pet_type}")
        print(f"Species: {species}")
        print(f"Primary Breed: {primary_breed}")
        print(f"Secondary Breed: {secondary_breed}")
        print(f"Mixed Breed: {mixed_breed}")
        print(f"Age: {age}")
        print(f"Gender: {gender}")
        print(f"Characteristics: {characteristics}")
        print(f"Size: {size}")
        print(f"Primary Color: {primary_color}")
        print(f"Coat Length: {coat_length}")
        print(f"Shelter Name: {shelter_name}")
        print(f"Shelter ID: {shelter_id}")
        print(f"Zip Code: {zip_code}")
        print("=" * 40)

        # Step 7: Navigate back to the search results page
        driver.back()

        # Wait for the search results page to reload
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, '//a[@class="petCard-link"]'))
        )

        # Increment the processed dogs counter
        processed_dogs += 1

    except Exception as e:
        print(f"Error processing dog {processed_dogs}: {e}")
        break

# Close the driver once done
driver.quit()

Pet ID: 72096072
Pet Name: Jackson
Type: Dog
Species: Dog
Primary Breed: Boxer
Secondary Breed: American Bulldog
Mixed Breed: True
Age: Senior
Gender: Male
Characteristics: House trained,Spay/Neuter,Shots Current
Size: Medium
Primary Color: Brindle
Coat Length: Short
Shelter Name: Freedom Bridge Animal Rescue
Shelter ID: GA988
Zip Code: 30303
Adoption Fee: consumer.animal_detail.adoption_fee.label.fee
Pet ID: 73529097
Pet Name: Marshmallow
Type: Dog
Species: Dog
Primary Breed: Great Pyrenees
Secondary Breed: 
Mixed Breed: True
Age: Baby
Gender: Female
Characteristics: Shots Current
Size: Large
Primary Color: 
Coat Length: 
Shelter Name: Carolina German Shepherd Rescue
Shelter ID: SC425
Zip Code: 30303
Adoption Fee: consumer.animal_detail.adoption_fee.label.fee
Pet ID: 73491201
Pet Name: Bowie
Type: Dog
Species: Dog
Primary Breed: Labrador Retriever
Secondary Breed: Mixed Breed
Mixed Breed: True
Age: Baby
Gender: Male
Characteristics: House trained,Shots Current
Size: Large
Primary Colo

In [None]:
from selenium.common.exceptions import StaleElementReferenceException

def get_info5(driver):
    try:
        # Maximize the browser window
        driver.maximize_window()

        max_pages = 2  # Number of pages to process
        page_counter = 0

        dog_data_list = []

        while page_counter < max_pages:
            print(f"Processing page {page_counter + 1}...")

            # Waiting for the dog cards to be clickable
            WebDriverWait(driver, 20).until(
                EC.element_to_be_clickable((By.XPATH, '//a[@class="petCard-link"]'))
            )

            dog_cards = driver.find_elements(By.XPATH, '//a[@class="petCard-link"]')
            print(f"Found {len(dog_cards)} dog cards on page {page_counter + 1}.")

            # Looping through each dog card on the page
            for i in range(len(dog_cards)):
                try:
                    # Re-fetch dog cards after navigating back to avoid stale elements
                    dog_cards = driver.find_elements(By.XPATH, '//a[@class="petCard-link"]')
                    
                    if i >= len(dog_cards):
                        print(f"Skipping dog {i + 1}: card not found after refetching.")
                        continue

                    # Setting the current card
                    card = dog_cards[i]
                    print(f"Processing dog {i + 1}: {card.text}")

                    # Check if the card is visible and clickable
                    if card.is_displayed() and card.is_enabled():
                        print(f"Dog {i + 1} is visible and clickable.")
                    else:
                        print(f"Dog {i + 1} is not visible or clickable. Skipping.")
                        continue

                    # Click the card
                    card.click()

                    # Wait for the dog's detail page to load
                    WebDriverWait(driver, 20).until(
                        EC.presence_of_element_located((By.XPATH, '//pf-ad[contains(@id, "PetDetail")]'))
                    )

                    # Process the dog's details...

                    # Navigate back to the list of dogs
                    driver.back()

                    # Ensure the page is fully loaded before refetching
                    WebDriverWait(driver, 40).until(
                        EC.element_to_be_clickable((By.XPATH, '//a[@class="petCard-link"]'))
                    )

                    # Optional: Add a small wait to ensure all elements are properly loaded
                    time.sleep(2)

                except StaleElementReferenceException:
                    print(f"Stale element encountered for dog {i + 1}, refetching...")
                    continue  # If the element is stale, continue to the next dog
                except Exception as e:
                    print(f"Error processing dog {i + 1}: {e}")
                    continue

            # Move to the next page
            try:
                next_button = WebDriverWait(driver, 20).until(
                    EC.element_to_be_clickable((By.XPATH, '//button[span[@class="fieldBtn-label" and text()="Next"]]'))
                )
                driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
                next_button.click()

                # Wait for the next page's dog cards to load
                WebDriverWait(driver, 20).until(
                    EC.presence_of_element_located((By.XPATH, '//a[@class="petCard-link"]'))
                )
            except Exception as e:
                print(f"Error moving to the next page: {e}")
                break

            # Increment the page counter after successfully moving to the next page
            page_counter += 1

        # Return the collected dog data
        return dog_data_list

    finally:
        driver.quit()
