In [5]:
import time, os, threading, cv2
import numpy as np
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException

In [6]:
def save_full_screenshot(driver, prefix="bidding_status"):
    try:
        os.makedirs("screenshots", exist_ok=True)
        filename = f"screenshots/{prefix}.png"
        driver.save_screenshot(filename)
        print(f"üì∏ Screenshot saved: {filename}")
    except Exception as e:
        print(f"‚ùå Screenshot failed: {e}")
def scrape(path, headless=False):
    chrome_options = Options()
    if headless:
        chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--hide-scrollbars")
    
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=chrome_options)
    driver.get(path)
    driver.maximize_window()
    
    # get the username tab
    try:
        provided_u_name = "fourbrotherstrading@icloud.com"
        user_name = WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.ID,'username')))   
        user_name.send_keys(provided_u_name)
    except Exception as e:
        print(f"No username tab found and the error is {e}")
    
    # get password tab
    try:
        provided_pass = "Muhssan7865"
        password = WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.ID, 'password')))
        driver.execute_script("arguments[0].scrollIntoView();", password)
        password.send_keys(provided_pass)
    except Exception as e:
        print(f"No password tab found and the error is {e}") 
    
    # get the check done
    try:
        check = WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.XPATH, './/button[text() = "Sign in"]')))
        if check:
            # driver.execute_script("arguments[0].scrollIntoView();", check)
            check.click()
    except Exception as e:
        print(f"No check tab found and the error is {e}")
        
    # popup
    try:
        popup = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, './/button[@id="start-btn"]')))
        if popup:
            popup.click()
    except Exception as e:
        print("No popup found")
        
        
    return driver

# def record_website_screen(driver, output_file="Loughborough_rec.avi", fps=8):
#     try:
#         img = driver.get_screenshot_as_png()
#         nparr = np.frombuffer(img, np.uint8)
#         frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
#         height, width, _ = frame.shape
#         fourcc = cv2.VideoWriter_fourcc(*'XVID')
#         out = cv2.VideoWriter(output_file, fourcc, fps, (width, height))

#         frame_count = 0
#         while True:
#             img = driver.get_screenshot_as_png()
#             nparr = np.frombuffer(img, np.uint8)
#             frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
#             frame_count += 1
#             out.write(frame)
#             time.sleep(1/fps)
#     except Exception as e:
#         print(f"Error in screen recording: {e}")
#     finally:
#         out.release()
#         print(f"Recording saved, captured {frame_count} frames.")

# def start_recording(driver):
#     thread = threading.Thread(target=record_website_screen, args=(driver,))
#     thread.daemon = True
#     thread.start()

if __name__ == "__main__":
    path = "https://www.protruckauctions.co.uk/portal/auction/buyer/webauction/auction/413"
    driver = scrape(path, headless=False)

    print("Starting data extraction...\n")
    results = []
    previous_title = ""
    previous_last_bid = ""

    try:
        while True:
            if not driver.window_handles:
                print("Browser closed. Exiting loop.")
                break

            details = {}

            try:
                # === Extract vehicle title ===
                title_el = WebDriverWait(driver, 5).until(
                    EC.presence_of_element_located((By.XPATH, './/div[@class="col-lg-9"]/h1'))
                )
                current_title = title_el.text.strip()

                # Only scrape if the lot has changed
                if current_title != previous_title:
                    previous_title = current_title
                    details["Title"] = current_title

                    # === Extract vehicle table data ===
                    try:
                        tables = driver.find_elements(By.XPATH, '//table[@class="table table-condensed vehicle-table"]/tbody')
                        for body in tables:
                            rows = body.find_elements(By.TAG_NAME, 'tr')
                            for tr in rows:
                                lbl = tr.find_element(By.TAG_NAME, 'th').text.strip()
                                val = tr.find_element(By.TAG_NAME, 'td').text.strip()
                                details[lbl] = val
                    except Exception as e:
                        print(f"‚ö†Ô∏è Table parsing error: {e}")

                    # === Extract Registration if available ===
                    reg_value = details.get("Registration", "N/A")
                    details["Registration"] = reg_value

                    # === Extract Bidding History ===
                    try:
                        bidding_prices = driver.find_elements(By.XPATH, '//ul[@id="biddinghistory"]/li')
                        bid_texts = [b.text.strip() for b in bidding_prices if b.text.strip()]
                        details["Bidding History"] = " | ".join(bid_texts)

                        # Check if item sold
                        for b in bid_texts:
                            if "SOLD" in b.upper():
                                save_full_screenshot(driver, prefix=reg_value)
                                break
                    except Exception as e:
                        print(f"‚ö†Ô∏è Bidding history error: {e}")

                    # === Get current bid amount ===
                    try:
                        current_bid = driver.find_element(By.ID, "currentbid").text.strip()
                        next_bid = driver.find_element(By.ID, "nextbid").text.strip()
                        details["Current Bid"] = f"¬£{current_bid}"
                        details["Next Bid"] = f"¬£{next_bid}"
                    except:
                        details["Current Bid"] = ""
                        details["Next Bid"] = ""

                    # === Append and save to CSV ===
                    results.append(details)
                    df = pd.DataFrame(results)
                    df.to_csv("protruck_live.csv", index=False)
                    print(f"‚úÖ Saved data for: {current_title} ({reg_value})")

                # Short wait before checking again
                time.sleep(3)

            except StaleElementReferenceException:
                print("‚ôªÔ∏è Stale element, retrying...")
                continue
            except Exception as e:
                print(f"‚ö†Ô∏è Error while extracting data: {e}")

    except KeyboardInterrupt:
        print("üõë Stopped by user.")
    finally:
        driver.quit()
        print("Browser closed.")

Starting data extraction...

‚ö†Ô∏è Table parsing error: Message: no such element: Unable to locate element: {"method":"tag name","selector":"th"}
  (Session info: chrome=141.0.7390.123); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#nosuchelementexception
Stacktrace:
	GetHandleVerifier [0x0x44fe43+66515]
	GetHandleVerifier [0x0x44fe84+66580]
	(No symbol) [0x0x23dc48]
	(No symbol) [0x0x288704]
	(No symbol) [0x0x288aab]
	(No symbol) [0x0x27dc71]
	(No symbol) [0x0x2ab214]
	(No symbol) [0x0x27db74]
	(No symbol) [0x0x2ab384]
	(No symbol) [0x0x2ccba7]
	(No symbol) [0x0x2aafc6]
	(No symbol) [0x0x27c2ca]
	(No symbol) [0x0x27d154]
	GetHandleVerifier [0x0x6a7353+2521315]
	GetHandleVerifier [0x0x6a22d3+2500707]
	GetHandleVerifier [0x0x477c94+229924]
	GetHandleVerifier [0x0x4681f8+165768]
	GetHandleVerifier [0x0x46ecad+193085]
	GetHandleVerifier [0x0x458158+100072]
	GetHandleVerifier [0x0x4582f0+100480]
	GetHandleVerifier [0

InvalidSessionIdException: Message: invalid session id: session deleted as the browser has closed the connection
from disconnected: not connected to DevTools
  (Session info: chrome=141.0.7390.123); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#invalidsessionidexception
Stacktrace:
	GetHandleVerifier [0x0x44fe43+66515]
	GetHandleVerifier [0x0x44fe84+66580]
	(No symbol) [0x0x23dc48]
	(No symbol) [0x0x22b9a4]
	(No symbol) [0x0x22c937]
	(No symbol) [0x0x23e6a3]
	(No symbol) [0x0x21bcf5]
	(No symbol) [0x0x2ba708]
	(No symbol) [0x0x2ab193]
	(No symbol) [0x0x27c2ca]
	(No symbol) [0x0x27d154]
	GetHandleVerifier [0x0x6a7353+2521315]
	GetHandleVerifier [0x0x6a22d3+2500707]
	GetHandleVerifier [0x0x477c94+229924]
	GetHandleVerifier [0x0x4681f8+165768]
	GetHandleVerifier [0x0x46ecad+193085]
	GetHandleVerifier [0x0x458158+100072]
	GetHandleVerifier [0x0x4582f0+100480]
	GetHandleVerifier [0x0x4425aa+11066]
	BaseThreadInitThunk [0x0x759d7ba9+25]
	RtlInitializeExceptionChain [0x0x77ccc3ab+107]
	RtlClearBits [0x0x77ccc32f+191]


In [None]:
top_car_df = pd.read_csv("protruck_live.csv")
top_car_live_cleaned_df = pd.read_csv("protruck_data.csv")

# Rename 'Name' to 'Title' in Top_car dataset to match
top_car_df.rename(columns={'Registration': 'Reg'}, inplace=True)

merged_df = top_car_live_cleaned_df.merge(top_car_df[['Reg',"Bid History"]], on='Reg', how='left')
merged_df= merged_df[merged_df['Bid History'].notna()]
# Save the updated dataset
output_path = "protruck.csv"
merged_df.to_csv(output_path, index=False)

# df = pd.read_csv("protruck_data_merged_final.csv")
# df = df.sort_values(by='Make', ascending=True)
# df.to_csv("Final_Protruck", index=False)

# os.remove("protruck_data.csv")
# os.remove("protruck_live.csv")
# os.remove("protruck_data_merged_final.csv")

In [None]:
df = pd.read_csv("protruck.csv")
df['Lot'] = df['Lot'].astype(int)
r = df['Bid History'].iloc[-1].split("[")[1].split("]")[0]
parts = r.split("Lot ")[1:]
parts = [e for e in parts if "changed:" not in e]
all_lot_info = []
for p in parts[1:]:
    lot_num = int(p.split()[0])
    bid_status = p.split()[1]
    bid_hist = p.split()[1:]
    bids = " ".join(bid_hist)
    all_lot_info.append({"Lot": lot_num, "Bidding History": bids, "Bidding status": bid_status})
lot_df = pd.DataFrame(all_lot_info)
lot_df['Last Bid'] = lot_df['Bidding History'].apply(lambda x: x.split("¬£")[1].split("'")[0] if "¬£" in x else None)
final_df = pd.merge(df, lot_df, on="Lot", how="left")
final_df.to_csv("Final_Protruck.csv", index=False)
final_df