In [1]:
from selenium import webdriver
from selenium.webdriver import Chrome, ChromeOptions
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import  WebDriverWait
import pandas as pd 
import time
import re
import os
import requests
from datetime import datetime

In [2]:
def save_full_screenshot(driver, regnumber=""):
    try:
        os.makedirs("screenshots", exist_ok=True)
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"screenshots/{regnumber}_{timestamp}.png"
        driver.save_screenshot(filename)
        print(f"üì∏ Screenshot saved: {filename}")
    except Exception as e:
        print(f"‚ùå Screenshot failed: {e}")

In [3]:
def scrape(path):
    options = ChromeOptions()
    options.headless=True
    # make use of chrome for scraping
    service = Service(ChromeDriverManager().install())
    # create a driver using chrome
    driver = Chrome(service=service, options=options)
    # run the driver through url
    driver.get(path)
    
    
    # get the username tab
    try:
        provided_u_name = "Mohsin7865"
        user_name = WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.XPATH,'.//input[@name="ctl00$ContentPlaceHolder1$txtUsername"]')))   
        user_name.send_keys(provided_u_name)
    except Exception as e:
        print(f"No username tab found and the error is {e}")
    
    # get password tab
    try:
        provided_pass = "Muhssan5687"
        password = WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.XPATH, './/input[@name="ctl00$ContentPlaceHolder1$txtPassword"]')))
        driver.execute_script("arguments[0].scrollIntoView();", password)
        password.send_keys(provided_pass)
    except Exception as e:
        print(f"No password tab found and the error is {e}") 
    
    # get the check done
    try:
        check = WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.XPATH, './/input[@name="ctl00$ContentPlaceHolder1$btnLogin"]')))
        if check:
            driver.execute_script("arguments[0].scrollIntoView();", check)
            check.click()
        
    except Exception as e:
        print(f"No check tab found and the error is {e}")
    # ======================================================
    
    # total cars --> done
    try:
        cars = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, './/span[@id ="ContentPlaceHolder1_ContentSearch_ucSearchResults_lblResults"]'))).text.strip()
        if cars:
            match = re.search(r"(\d+)", cars)

            if match:
                total_cars= int(match.group(1))  # Convert to integer
                print(f"{total_cars} number of cars found")
        else:
            print("No cars found")
    except Exception as e:
        print(f"No cars found and error is {e}")
        
    
    # list to append the values
    results = []
    
    count = 0
    
    # while loop 
    while count < total_cars:
        # find per page cars
        try:
            per_page_cars = WebDriverWait(driver, 5).until(EC.presence_of_all_elements_located((By.XPATH, './/td[@style="width:75px"]')))
            
            # loop around the number of cars
            for i in range(len(per_page_cars)): # total_cars
                if count >= total_cars:
                    break
                # get the page cars and click on each of them
                try:

                    driver.execute_script("arguments[0].scrollIntoView();", per_page_cars[i])
                    time.sleep(1)
                    per_page_cars[i].click()

                    if not driver.window_handles:
                        print("Browser window closed. Stopping script.")
                        break
                    # dict to store the values
                    details = {}
                    
                    # title --> done
                    try:
                        title = WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_lblTitle"]'))).text.strip()
                        if title:
                            details['Title'] = title
                        else:
                            details['Title'] = 'na'
                    except Exception as e:
                        print("No car title")
                        
                    # veh all info
                    try:
                        veh_btn_main =WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.XPATH, './/div[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_header"]'))) 
                        
                        # veh information button
                        veh_button = WebDriverWait(veh_btn_main, 2).until(EC.presence_of_element_located((By.XPATH, './/span[text()="Vehicle"]')))
                        if veh_button:
                            # time.sleep()
                            veh_button.click()
                            
                            veh_main_card =WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.XPATH, './/div[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle"]')))
                            
                            # imgs --> done
                            try:
                                img_card_1 = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/table[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_rptImageList_rptImages_0"]')))
                                img_card_2 = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/table[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_rptImageList_rptImages_1"]')))
                                img_card_3 = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/table[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_rptImageList_rptImages_1"]')))
                                if img_card_1 or img_card_2 or img_card_3:
                                    
                                    img_trs_1 = WebDriverWait(img_card_1, 2).until(EC.presence_of_all_elements_located((By.TAG_NAME, 'tr')))
                                    img_trs_2 = WebDriverWait(img_card_2, 2).until(EC.presence_of_all_elements_located((By.TAG_NAME, 'tr')))
                                    img_trs_3 = WebDriverWait(img_card_3, 2).until(EC.presence_of_all_elements_located((By.TAG_NAME, 'tr')))
                                    imgs_lst = []
                                    for img_tr_1 in img_trs_1:
                                        imgs_1 = img_tr_1.find_elements(By.TAG_NAME, 'a')
                                        for img_1 in imgs_1:
                                            imgs_lst.append(img_1.get_attribute("href"))
                                        
                                    for img_tr_2 in img_trs_2:
                                        imgs_2 = img_tr_2.find_elements(By.TAG_NAME, 'a')
                                        for img_2 in imgs_2:
                                            imgs_lst.append(img_2.get_attribute("href"))
                                            
                                    for img_tr_3 in img_trs_3:
                                        imgs_3 = img_tr_3.find_elements(By.TAG_NAME, 'a')
                                        for img_3 in imgs_3:
                                            imgs_lst.append(img_3.get_attribute("href"))
                                    
                                    imgs_str = ", ".join(imgs_lst)
                                    details['Images'] = imgs_str
                                else:
                                    print("No images from the first card")
                                    
                            except Exception as e:
                                print(f"Error processing images")
                    

                            # veh dets --> done
                            try:
                                model = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lblModel"]'))).text.strip()
                                if model:
                                    details['Model'] = model
                                else:
                                    details['Model'] = 'na'
                                    
                                var = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lblVariant"]'))).text.strip()
                                if var:
                                    details['Variant'] = var
                                else:
                                    details['Variant'] = 'na'
                                    
                                reg_no = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lblRegNo"]'))).text.strip()
                                if reg_no:
                                    details['reg_no'] =reg_no
                                else:
                                    details['reg_no'] = 'na'
                                save_full_screenshot(driver,details['reg_no'])
                                
                                reg_date= WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lblRegDate"]'))).text.strip()
                                if reg_date:
                                    details['reg_date'] = reg_date
                                else:
                                    details['reg_date'] = 'na'
                                
                                mileage = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lblMileage"]'))).text.strip()
                                if mileage:
                                    details['mileage'] = mileage
                                else:
                                    details['mileage'] ='na'
                                    
                                color = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lblColour"]'))).text.strip()
                                if color:
                                    details['color'] = color
                                else:
                                    details['color'] = 'na'
                                    
                                trans = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lblTransmissionType"]'))).text.strip()
                                if trans:
                                    details['Transmission_type'] = trans
                                else:
                                    details['Transmission_type'] = 'na'
                                    
                                fuel = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lblFuelType"]'))).text.strip()
                                if fuel:
                                    details['fuel'] = fuel
                                else:
                                    details['fuel'] = 'na'
                                    
                                co2 = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lblCO2"]'))).text.strip()
                                if co2:
                                    details['CO2'] = co2
                                else:
                                    details['CO2'] = 'na'
                                    
                                doors = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lblDoors"]'))).text.strip()
                                if doors:
                                    details['doors'] = doors
                                else:
                                    details['doors']= 'na'
                                    
                                start = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lblAuctionStartDate"]'))).text.strip()
                                if start:
                                    details['Start_time'] = start
                                else:
                                    details['Start_time'] = 'na'
                                    
                                ending = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lblAuctionEndDate"]'))).text.strip()
                                if ending:
                                    details['Ending_time'] = ending
                                else:
                                    details['Ending_time']= 'na'
                                    
                                trade_no = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lblTradeNo"]'))).text.strip()
                                if trade_no:
                                    details['Trade_no'] = trade_no
                                else:
                                    details['Trade_no'] = trade_no
                                    
                                lot_no = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lblLotNumber"]'))).text.strip()
                                if lot_no:
                                    details['Lot_no'] = lot_no
                                else:
                                    details['Lot_no'] = 'na'
                                    
                                curr_bid= WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lblCurrentBid"]'))).text.strip()
                                if curr_bid:
                                    details['Current_bid'] = curr_bid
                                else:
                                    details['Current_bid'] = 'na'
                                    
                                auc_fee = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lblAuctionFee"]'))).text.strip()
                                if auc_fee:
                                    details['Auction_fee'] = auc_fee
                                else:
                                    details['Auction_fee']='na'
                                    
                                no_bids = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lblNoBids"]'))).text.strip()
                                if no_bids:
                                    details['No_of_bids'] = no_bids
                                else:
                                    details['No_of_bids'] = 'na'
                                
                                # bid hist
                                bid_hist = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/a[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lnkViewBids"]')))
                                if bid_hist:
                                    bid_hist.click()
                                    bid_main = WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.XPATH, './/div[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_ucViewBids_updBids"]/div/table/tbody')))
                                    if bid_main:
                                        bidder =WebDriverWait(bid_main, 2).until(EC.presence_of_element_located((By.XPATH, '''.//tr[@onmouseover="javascript:this.style.backgroundColor='#FBFBA3'"]'''))).text.strip() 
                                        if bidder:
                                            details['Bidder'] = bidder.split(" ¬£")[0]
                                            details['Price'] = bidder.split(" ¬£")[1].split(" ")[0]
                                            details['Time'] = bidder.split(" ¬£")[1].split(" ")[1:]
                                            close_bid = WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.XPATH, './/input[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_btnCloseViewBids"]')))
                                            close_bid.click()
                                        else:
                                            print("No bids here")
                                    
                                else:
                                    print("no bids hist")
                                    
                                no_watchs = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lblNoWatchers"]'))).text.strip()
                                if no_watchs:
                                    details['No_of_watchers'] = no_watchs
                                else:
                                    details['No_of_watchers'] = 'na'
                                    
                                close = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lblModel"]'))).text.strip()
                                if close:
                                    details['Closes'] = close
                                else:
                                    details['Closes'] = 'na'
                                    
                                buy_price = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lblBuyItNow"]'))).text.strip()
                                if buy_price:
                                    details['Buy_It_Now_Price'] = buy_price
                                else:
                                    details['Buy_It_Now_Price']= 'na'
                                    
                                cap_clean = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_ucGuidePricing_lblCAPConditionValuation"]'))).text.strip()
                                if cap_clean:
                                    details['Cap_clean'] = cap_clean
                                else:
                                    details['Cap_clean'] = 'na'
                                    
                                cap_ret = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_ucGuidePricing_lblCAPRetailValuation"]'))).text.strip()
                                if cap_ret:
                                    details['Cap_retail'] = cap_ret
                                else:
                                    details['Cap_retail'] = 'na'
                                
                                glasses = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_ucGuidePricing_lblGlasses"]'))).text.strip()
                                if glasses:
                                    details['glasses'] = glasses
                                else:
                                    details['glasses'] = 'na'
                                
                                del_charges = WebDriverWait(veh_main_card, 2).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabVehicle_lblDeliveryCharge"]'))).text.strip()
                                if del_charges:
                                    details['Delivery_charges'] = del_charges
                                else:
                                    details['Delivery_charges'] = 'na'         
                            except Exception as e:
                                print(f"No veh det found")        
                                
                        else:
                            print("no veh info button")                                        
                    
                        # Additional Info --> done
                        add_info_button = WebDriverWait(veh_btn_main, 2).until(EC.presence_of_element_located((By.XPATH, './/span[text()="Additional Info"]')))
                        if add_info_button:
                            time.sleep(2)
                            add_info_button.click()
                            
                            try:
                                add_main = WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.XPATH, './/div[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabAdditionalInfo"]')))
                                if add_main:
                                    add_rows = add_main.find_elements(By.TAG_NAME, 'tr')
                                    for add_row in add_rows:
                                        add_tds = add_row.find_elements(By.TAG_NAME, 'td')
                                        
                                        if len(add_tds) == 2:
                                            add_lbl = add_tds[0]
                                            add_val  =add_tds[1]
                                            details[add_lbl.text.strip()] = add_val.text.strip()

                                    serv_hist =WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.XPATH, './/table[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_tabAdditionalInfo_gdvServiceHistory"]'))) 
                                    if serv_hist:
                                        serv_rows = serv_hist.find_elements(By.TAG_NAME, 'tr')[1:]
                                        type_values = [row.find_elements(By.TAG_NAME, 'td')[0].text for row in serv_rows]
                                        details['Type'] = type_values
                                        date_values = [row.find_elements(By.TAG_NAME, 'td')[1].text for row in serv_rows]
                                        details['Date']  =date_values
                                        miles_values = [row.find_elements(By.TAG_NAME, 'td')[2].text for row in serv_rows]
                                        details['Miles'] = miles_values
                                        dealership_values = [row.find_elements(By.TAG_NAME, 'td')[3].text for row in serv_rows]
                                        details['Dealership'] = dealership_values                 
                                else:
                                    print("No add info found")
                                    
                            except Exception as e:
                                print("No add info found")
                        else:
                            print("No add info button")
                            
                        
                        
                        # Specification --> done
                        spec_info_button = WebDriverWait(veh_btn_main, 2).until(EC.presence_of_element_located((By.XPATH, './/span[text()="Specification"]')))
                        if spec_info_button:
                            time.sleep(2)
                            spec_info_button.click()
                            

                            # standard specs --> done
                            try:
                                stnd_spec_card = WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.XPATH, './/div[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_TabPanel1_divSpec"]')))
                                if stnd_spec_card:
                                    std_uls = WebDriverWait(stnd_spec_card, 3).until(EC.presence_of_all_elements_located((By.XPATH, './/ul[starts-with(@style, "list-style-image: url(images/bullet.gif)")]')))
                                    std_val_lst = []
                                    for std_ul in std_uls:
                                        std_vals = WebDriverWait(std_ul, 3).until(EC.presence_of_all_elements_located((By.XPATH, './/li[starts-with(@style, "margin-left:13px")]')))
                                        # std_val_lst = [std_val.text.strip() for std_val in std_vals]
                                        for std_val in std_vals:
                                            std_val_lst.append(std_val.text.strip())
                                    details['Standard_specs'] = std_val_lst
                                else:
                                    print("No standard specs")
                            except Exception as e:
                                print(f"No stnadard specs")
                            
                            # additional specs --> check
                            try:
                                add_hist_spec_card = WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.XPATH, './/div[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_TabPanel1_divDFA"]')))
                                if add_hist_spec_card:
                                    add_hist_uls = WebDriverWait(add_hist_spec_card, 3).until(EC.presence_of_all_elements_located((By.XPATH, './/ul[starts-with(@style, "list-style-image: url(images/bullet.gif)")]')))
                                    add_hist_val_lst = []
                                    for add_hist_ul in add_hist_uls:
                                        add_hist_vals = WebDriverWait(add_hist_ul, 3).until(EC.presence_of_all_elements_located((By.XPATH, './/li[starts-with(@style, "margin-left:13px")]')))
                                        # std_val_lst = [std_val.text.strip() for std_val in std_vals]
                                        for add_hist_val in add_hist_vals:
                                            add_hist_val_lst.append(add_hist_val.text.strip())
                                    details['Additional_specs'] = add_hist_val_lst
                                else:
                                    print("No add specs")
                            except Exception as e:
                                print(f"No add specs")
                        else:
                            print("No specs button to clk")
                        
                        
                        # Condition --> done
                        cond_info_button = WebDriverWait(veh_btn_main, 3).until(EC.presence_of_element_located((By.XPATH, './/span[text()="Condition"]')))
                        if cond_info_button:
                            time.sleep(2)
                            cond_info_button.click()
                            
                            
                            try:
                                cond_det_card = WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.XPATH, './/div[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_TabPanel2"]')))
                                if cond_det_card:
                                    
                                    # get img --> done
                                    try:
                                        cond_img = WebDriverWait(cond_det_card, 3).until(EC.presence_of_element_located((By.XPATH, './/img[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_TabPanel2_imgCarSplat"]'))).get_attribute("src")
                                        if cond_img:
                                            details['Cond_image'] = cond_img
                                        else:
                                            details['Cond_image'] = 'na'
                                    except Exception as e:
                                        print(f"No cond image")
                                        
                                    # get other info --> done
                                    try:
                                        cond_info_upper = WebDriverWait(cond_det_card, 3).until(EC.presence_of_element_located((By.XPATH, './/td[@style="width: 325px; vertical-align:top;"]')))
                                        if cond_info_upper:
                                            cond_upper_trs = cond_info_upper.find_elements(By.TAG_NAME, 'tr')
                                            for cond_tr in cond_upper_trs:
                                                cond_up_lbl = cond_tr.find_elements(By.TAG_NAME, 'td')[0].text.strip()
                                                cond_up_val = cond_tr.find_elements(By.TAG_NAME, 'td')[1].text.strip()
                                                details[cond_up_lbl] = cond_up_val
                                            
                                        else:
                                            print("No cond upper vals")    
                                        cond_info_mid = WebDriverWait(cond_det_card, 3).until(EC.presence_of_element_located((By.XPATH, './/td[@style="width: 325px; vertical-align:top"]')))
                                        if cond_info_mid:
                                            cond_mid_trs = cond_info_mid.find_elements(By.TAG_NAME, 'tr')
                                            for cond_mid_tr in cond_mid_trs:
                                                cond_mid_lbl = cond_mid_tr.find_elements(By.TAG_NAME, 'td')[0].text.strip()
                                                cond_mid_val = cond_mid_tr.find_elements(By.TAG_NAME, 'td')[1].text.strip()
                                                details[cond_mid_lbl] = cond_mid_val
                                        else:
                                            print("No middle information")
                                        
                                        cond_note =WebDriverWait(cond_det_card, 3).until(EC.presence_of_element_located((By.XPATH, './/span[@id="ContentPlaceHolder1_ContentSearch_tbcSelling_TabPanel2_lblConditionNotes"]'))).text.strip()
                                        if cond_note:
                                            details['Condition_Notes'] = cond_note
                                        else:
                                            details['Condition_Notes'] = "na"
                                    except Exception as e:
                                        print(f"No upper vals")
                                else:
                                    print("No cond main card")
                            except Exception as e:
                                print(f"No cond main card")        
                            
                    except Exception as e:
                        print(f"No veh button")       
                    count+=1  
                    results.append(details)
                    driver.back()
                except Exception as e:
                    print("No cars found")
                              
                    
            if count % 10 ==0:
                try:
                    next_link = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, '(//a[text() = "Next"])[2]')))
                    if next_link:
                        driver.execute_script("arguments[0].scrollIntoView();", next_link)
                        next_link.click()
                        WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, './/td[@style="width:75px"]')))
                        
                except Exception as e:
                    print("No next found")
                    break
        except Exception as e:
            print("No cars found")
            break
        
    
    df = pd.DataFrame.from_dict(results)
    df.to_csv("agnew.csv", index=False)
    # time.sleep(5)
    driver.quit()
path = "https://www.agnewtradecentre.com/vehicles.aspx?filter=cars"
scrape(path)

21 number of cars found
üì∏ Screenshot saved: screenshots/LK68EPJ_20251110_182237.png
No veh det found
No add specs
üì∏ Screenshot saved: screenshots/RRZ6046_20251110_182253.png
No add specs
No veh button
No cars found
No cars found
No cars found
No cars found
No cars found
No cars found
No cars found
No cars found
No cars found
No cars found


In [4]:
df=pd.read_csv("agnew.csv")
df

Unnamed: 0,Title,Images,Model,Variant,reg_no,reg_date,mileage,color,Transmission_type,fuel,...,Bidder,Price,Time,No_of_watchers,Closes,Buy_It_Now_Price,Cap_clean,Cap_retail,glasses,Delivery_charges
0,Mercedes-Benz Gla Diesel Hatchback,https://www.agnewtradecentre.com/images/vehicl...,Gla Diesel Hatchback,Gla 200d 4matic Sport Premium 5dr Auto,LK68EPJ,27/01/2019,82178,BLACK,Automatic,Diesel,...,,,,,,,,,,
1,Land Rover Range Rover Evoque Hatchback,https://www.agnewtradecentre.com/images/vehicl...,Range Rover Evoque Hatchback,2.0 P200 R-Dynamic Se 5dr Auto,RRZ6046,24/07/2020,22020,GREY,Automatic,Petrol,...,[Other Bidder],18000.0,"['07', 'Nov', '2025', '14:37']",1.0,Range Rover Evoque Hatchback,"¬£20,250","¬£20,840","¬£23,940",,¬£349


In [5]:
r = '[Other Bidder] ¬£6,637 14 Apr 2025 13:21'
rr = r.split(" ¬£")[1].split(" ")[1:]
rr

['14', 'Apr', '2025', '13:21']

### images download

In [6]:
reg_img = df[['reg_no', "Images", "Cond_image"]]

from urllib.parse import urlparse, urljoin
from PIL import Image, ImageDraw, ImageFont

def add_watermark_to_image(image_path, text="Sourced from Agnew Trade Centre"):
    try:
        image = Image.open(image_path).convert("RGBA")
        txt_layer = Image.new("RGBA", image.size, (255, 255, 255, 0))
        draw = ImageDraw.Draw(txt_layer)
        font_size = max(15, image.width // 50)
        try:
            font = ImageFont.truetype("arial.ttf", font_size)
        except:
            font = ImageFont.load_default()

        margin = int(font_size * 0.6)
        bbox = draw.textbbox((0, 0), text, font=font)
        text_width = bbox[2] - bbox[0]
        text_height = bbox[3] - bbox[1]
        x = image.width - text_width - margin
        y = image.height - text_height - margin

        box_padding = int(font_size * 0.4)
        draw.rectangle(
            [x - box_padding, y - box_padding,
             x + text_width + box_padding, y + text_height + box_padding],
            fill=(0, 0, 0, 180)
        )
        draw.text((x, y), text, font=font, fill=(255, 255, 255, 240))
        watermarked = Image.alpha_composite(image, txt_layer).convert("RGB")
        watermarked.save(image_path)
        print(f"‚úÖ Watermark added to {image_path}")

    except Exception as e:
        print(f"‚ùå Failed to watermark {image_path}: {e}")


def download_images(data, main_folder="Images"):
    os.makedirs(main_folder, exist_ok=True)

    for index, row in data.iterrows():
        reg_no = row["reg_no"]

        image_urls = row["Images"].split(", ") if pd.notna(row["Images"]) else []
        damaged_urls = row["Cond_image"].split(", ") if pd.notna(row["Cond_image"]) else []

        reg_folder = os.path.join(main_folder, reg_no)
        os.makedirs(reg_folder, exist_ok=True)

        images_folder = os.path.join(reg_folder, "Original_images")
        damaged_folder = os.path.join(reg_folder, "Cond_image")
        os.makedirs(images_folder, exist_ok=True)
        os.makedirs(damaged_folder, exist_ok=True)

        # üîπ Download regular images
        for idx, url in enumerate(image_urls):
            url = url.strip()
            if not url.startswith(("http://", "https://")):
                url = urljoin("https://", url)

            parsed_url = urlparse(url)
            if not parsed_url.scheme or not parsed_url.netloc:
                print(f"Invalid URL skipped: {url}")
                continue

            try:
                response = requests.get(url, stream=True)
                response.raise_for_status()

                full_file_name = os.path.join(images_folder, f"{reg_no}_{idx + 1}.jpg")
                with open(full_file_name, 'wb') as f:
                    for chunk in response.iter_content(1024):
                        f.write(chunk)

                print(f"‚úÖ Downloaded: {full_file_name}")

                # üîπ Add watermark after saving
                add_watermark_to_image(full_file_name)

            except Exception as e:
                print(f"‚ùå Failed to download {url} for {reg_no}: {e}")

        # üîπ Download damaged images
        for idx, url in enumerate(damaged_urls):
            url = url.strip()
            if not url.startswith(("http://", "https://")):
                url = urljoin("https://", url)

            parsed_url = urlparse(url)
            if not parsed_url.scheme or not parsed_url.netloc:
                print(f"Invalid URL skipped: {url}")
                continue

            try:
                response = requests.get(url, stream=True)
                response.raise_for_status()

                full_file_name = os.path.join(damaged_folder, f"{reg_no}_{idx + 1}.jpg")
                with open(full_file_name, 'wb') as f:
                    for chunk in response.iter_content(1024):
                        f.write(chunk)

                print(f"‚úÖ Downloaded: {full_file_name}")

                # üîπ Add watermark after saving
                add_watermark_to_image(full_file_name)

            except Exception as e:
                print(f"‚ùå Failed to download {url} for {reg_no}: {e}")



download_images(reg_img)


‚úÖ Downloaded: Images\LK68EPJ\Original_images\LK68EPJ_1.jpg
‚úÖ Watermark added to Images\LK68EPJ\Original_images\LK68EPJ_1.jpg
‚úÖ Downloaded: Images\LK68EPJ\Original_images\LK68EPJ_2.jpg
‚úÖ Watermark added to Images\LK68EPJ\Original_images\LK68EPJ_2.jpg
‚úÖ Downloaded: Images\LK68EPJ\Original_images\LK68EPJ_3.jpg
‚úÖ Watermark added to Images\LK68EPJ\Original_images\LK68EPJ_3.jpg
‚úÖ Downloaded: Images\LK68EPJ\Original_images\LK68EPJ_4.jpg
‚úÖ Watermark added to Images\LK68EPJ\Original_images\LK68EPJ_4.jpg
‚úÖ Downloaded: Images\LK68EPJ\Original_images\LK68EPJ_5.jpg
‚úÖ Watermark added to Images\LK68EPJ\Original_images\LK68EPJ_5.jpg
‚úÖ Downloaded: Images\LK68EPJ\Original_images\LK68EPJ_6.jpg
‚úÖ Watermark added to Images\LK68EPJ\Original_images\LK68EPJ_6.jpg
‚úÖ Downloaded: Images\LK68EPJ\Original_images\LK68EPJ_7.jpg
‚úÖ Watermark added to Images\LK68EPJ\Original_images\LK68EPJ_7.jpg
‚úÖ Downloaded: Images\LK68EPJ\Original_images\LK68EPJ_8.jpg
‚úÖ Watermark added to Images\LK68EP