In [1]:
import time, os, threading, cv2
import numpy as np
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [2]:
def scrape(path, headless=False):
    chrome_options = Options()
    if headless:
        chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--hide-scrollbars")
    
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=chrome_options)
    driver.get(path)
    driver.maximize_window()
    
    try:
        cookie = WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.XPATH, './/button[@id="ccc-notify-accept"]')))
        if cookie:
            cookie.click()
    except:
        pass

    # get the username tab
    try:
        provided_u_name = "fourbrotherstrading@icloud.com"
        user_name = WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.ID,'username')))   
        user_name.send_keys(provided_u_name)
    except Exception as e:
        print(f"No username tab found and the error is {e}")
    
    # get password tab
    try:
        provided_pass = "Sultanmirza1501#"
        password = WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.ID, 'password')))
        driver.execute_script("arguments[0].scrollIntoView();", password)
        password.send_keys(provided_pass)
    except Exception as e:
        print(f"No password tab found and the error is {e}") 
    
    # get the check done
    try:
        check = WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.XPATH, './/button[text() = "Sign in"]')))
        if check:
            # driver.execute_script("arguments[0].scrollIntoView();", check)
            check.click()
    except Exception as e:
        print(f"No check tab found and the error is {e}") 
        
    # popup
    try:
        popup = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, './/button[@id="start-btn"]')))
        if popup:
            popup.click()
    except Exception as e:
        print("No popup found")
        
        
    return driver

def record_website_screen(driver, output_file="live_rec.avi", fps=7):
    try:
        img = driver.get_screenshot_as_png()
        nparr = np.frombuffer(img, np.uint8)
        frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
        height, width, _ = frame.shape
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter(output_file, fourcc, fps, (width, height))

        frame_count = 0
        while True:
            img = driver.get_screenshot_as_png()
            nparr = np.frombuffer(img, np.uint8)
            frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
            frame_count += 1
            out.write(frame)
            time.sleep(1/fps)
    except Exception as e:
        print(f"Error in screen recording: {e}")
    finally:
        out.release()
        print(f"Recording saved, captured {frame_count} frames.")

def start_recording(driver):
    thread = threading.Thread(target=record_website_screen, args=(driver,))
    thread.daemon = True
    thread.start()

if __name__ == "__main__":
    path = "https://www.centralcarauctions.com/portal/auction/buyer/webauction/auction/801"
    driver = scrape(path, headless=False)
    print("Starting screen recording...")
    start_recording(driver)
    
    print("Starting data extraction...")
    
    
    results = []
    previous_title = ""

    # Start continuous scraping loop
    try:
        while True:
            if not driver.window_handles:
                print("Browser window closed. Stopping script.")
                break
            details = {}
            try:
                # Extract Vehicle Title (.//table[@class="table table-condensed vehicle-table"])[1]/tbody
                reg = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, './/h1[@class="mb0"]')))
                current_lot = reg.text.strip()

                # If title changes, reset bidding prices
                if current_lot != previous_title:
                    previous_title = current_lot  # Update last seen title
                    details["Title"] = current_lot
                    details['Lot'] = current_lot.split(":")[0].split(" ")[-1]
                    
                    try:
                        for index in [1, 2]:
                            body =WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, f'(.//table[@class="table table-condensed vehicle-table"])[{index}]/tbody')))
                            if body:
                                trs = body.find_elements(By.TAG_NAME, 'tr')
                                for tr in trs:
                                    lbl = tr.find_element(By.TAG_NAME, 'th').text.strip()
                                    value=  tr.find_element(By.TAG_NAME, 'td').text.strip()
                                    if lbl  and value:
                                        details[lbl] = value
                    except:
                        pass 
                     
                    
                    # Extract Bidding Prices
                    bidding_prices = driver.find_elements(By.XPATH, './/ul[@id="biddinghistory"]/li')
                    details["Bid History"] = [price.text.strip() for price in bidding_prices]

                    results.append(details)

                    # Convert results to DataFrame and save to CSV
                    df = pd.DataFrame.from_dict(results)
                    df.to_csv("CCA_LIVE_data.csv", index=False)

                else:
                    print("No data card found.")

                time.sleep(3)  # Wait before checking for updates
                
            except Exception as e:
                print(f"Error while extracting data: {e}")
                    
    except KeyboardInterrupt:
        print("Stopped by user.")
    finally:
        # df = pd.DataFrame(results, columns=["Title", "Prices", "Sale text"])
        # df.to_csv("Loughborough_LIVE_data.csv", index=False)
        driver.quit()
        print("Browser closed.")


Starting screen recording...
Starting data extraction...
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No data card found.
No 

InvalidSessionIdException: Message: invalid session id: session deleted as the browser has closed the connection
from disconnected: not connected to DevTools
  (Session info: chrome=141.0.7390.123); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#invalidsessionidexception
Stacktrace:
	GetHandleVerifier [0x0x105fe43+66515]
	GetHandleVerifier [0x0x105fe84+66580]
	(No symbol) [0x0xe4dc48]
	(No symbol) [0x0xe3b9a4]
	(No symbol) [0x0xe3c937]
	(No symbol) [0x0xe4e6a3]
	(No symbol) [0x0xe2bcf5]
	(No symbol) [0x0xeca708]
	(No symbol) [0x0xebb193]
	(No symbol) [0x0xe8c2ca]
	(No symbol) [0x0xe8d154]
	GetHandleVerifier [0x0x12b7353+2521315]
	GetHandleVerifier [0x0x12b22d3+2500707]
	GetHandleVerifier [0x0x1087c94+229924]
	GetHandleVerifier [0x0x10781f8+165768]
	GetHandleVerifier [0x0x107ecad+193085]
	GetHandleVerifier [0x0x1068158+100072]
	GetHandleVerifier [0x0x10682f0+100480]
	GetHandleVerifier [0x0x10525aa+11066]
	BaseThreadInitThunk [0x0x75b37ba9+25]
	RtlInitializeExceptionChain [0x0x7726c3ab+107]
	RtlClearBits [0x0x7726c32f+191]


In [None]:
top_car_df = pd.read_csv("CCA_LIVE_data.csv")
top_car_live_cleaned_df = pd.read_csv("cca_data.csv")
# Rename 'Name' to 'Title' in Top_car dataset to match
top_car_df.rename(columns={'Registration': 'Reg'}, inplace=True)
merged_df = top_car_live_cleaned_df.merge(top_car_df[['Reg',"Bid History"]], on='Reg', how='left')
merged_df= merged_df[merged_df['Bid History'].notna()]
# Save the updated dataset
output_path = "CCA.csv"
merged_df.to_csv(output_path, index=False)

# df = pd.read_csv("cca_data_merged_final.csv")
# df = df.sort_values(by='Make', ascending=True)
# df.to_csv("cca_data_merged_final_sorted", index=False)

# os.remove("cca_data.csv")
# os.remove("CCA_LIVE_data.csv")
# os.remove("cca_data_merged_final.csv")

In [None]:
# df = pd.read_csv("CCA.csv")
# # df['Lot'] = df['Lot'].astype(int)
# r = df['Bid History'].iloc[-1].split("[")[1].split("]")[0]
# parts = r.split("Lot ")[1:]
# parts = [e for e in parts if "changed:" not in e]
# all_lot_info = []
# for p in parts[1:]:
#     lot_num = int(p.split()[0])
#     bid_status = p.split()[1]
#     bid_hist = p.split()[1:]
#     bids = " ".join(bid_hist)
#     all_lot_info.append({"Lot": lot_num, "Bidding History": bids, "Bidding status": bid_status})
# lot_df = pd.DataFrame(all_lot_info)
# lot_df['Last Bid'] = lot_df['Bidding History'].apply(lambda x: x.split("£")[1].split("'")[0] if "£" in x else None)
# final_df = pd.merge(df, lot_df, on="Lot", how="left")
# final_df.to_csv("Final_CCA.csv", index=False)



df = pd.read_csv("CCA.csv")

r = df['Bid History'].iloc[-1]
if "[" in r and "]" in r:
    r = r.split("[")[1].split("]")[0]
else:
    r = r.strip("[]")

parts = r.split("Lot ")[1:]
parts = [e for e in parts if "changed:" not in e]

all_lot_info = []
for p in parts:
    lot_num = int(p.split()[0])
    bid_status = p.split()[1]
    bids = " ".join(p.split()[1:])
    all_lot_info.append({"Lot": lot_num, "Bidding History": bids, "Bidding status": bid_status})

lot_df = pd.DataFrame(all_lot_info)
lot_df['Last Bid'] = lot_df['Bidding History'].apply(lambda x: x.split("£")[1].split("'")[0] if "£" in x else None)

# Combine both since df has no Lot column
final_df = pd.concat([df, lot_df], axis=1)
final_df.to_csv("Final_CCA.csv", index=False)
