In [30]:
import time
import pandas as pd
import matplotlib.pyplot as plt
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from openpyxl import Workbook
chrome_driver_path = r'C:\\Users\\forty\\Downloads\\chromedriver-win64\\chromedriver-win64\\chromedriver.exe'
chrome_options = Options()
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.5938.62 Safari/537.36")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--headless")  # Run in headless mode (no GUI)

# Initialize the Chrome WebDriver
service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)
driver.set_page_load_timeout(60)
url = "https://www.barchart.com/futures"
driver.get(url)
try:
    WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.XPATH, "//div[contains(@class, 'bc-datatable-header-tooltip')]")))

    # Extract the table headers (column names)
    headers = driver.find_elements(By.XPATH, "//div[contains(@class, 'bc-datatable-header-tooltip')]")
    header_names = [header.text.strip() for header in headers]

    # Extract table rows
    rows = driver.find_elements(By.XPATH, "//div[@role='row']")

    print(f"Number of rows fetched: {len(rows)}")

    data = []

    # Extract data from each row
    for row in rows[1:]:  # Skip the first row as it contains headers
        cells = row.find_elements(By.XPATH, ".//div[@role='cell']")  # Use relative XPath to get the cells in the row
        if len(cells) == len(header_names):  # Make sure the row has the correct number of cells
            row_data = [cell.text.strip() for cell in cells]
            data.append(row_data)

    # Convert the list of rows into a Pandas DataFrame
    df = pd.DataFrame(data, columns=header_names)
    print(df.head())  # Display the first few rows for verification

    # Step 4: Data analysis
    # a) Create a new column "Mean" as the average of "High" and "Low"
    df['High'] = pd.to_numeric(df['High'], errors='coerce')  # Convert "High" to numeric values
    df['Low'] = pd.to_numeric(df['Low'], errors='coerce')  # Convert "Low" to numeric values
    df['Mean'] = (df['High'] + df['Low']) / 2  # Calculate the Mean

    # b) Plot "High", "Low", and "Mean" in a single linear graph
    plt.figure(figsize=(10,6))
    plt.plot(df['Contract Name'], df['High'], label='High', marker='o')
    plt.plot(df['Contract Name'], df['Low'], label='Low', marker='o')
    plt.plot(df['Contract Name'], df['Mean'], label='Mean', marker='x')
    plt.xlabel('Contract Name')
    plt.ylabel('Price')
    plt.title('High, Low, and Mean Prices')
    plt.xticks(rotation=90)  # Rotate x-axis labels for better visibility
    plt.legend()
    plt.tight_layout()
    plt.show()

    # c) Find the row with the largest "Change" and extract the Contract Name and Last Price
    df['Change'] = pd.to_numeric(df['Change'], errors='coerce')  # Ensure "Change" is numeric
    largest_change_row = df.loc[df['Change'].idxmax()]
    print(f"Contract Name with largest change: {largest_change_row['Contract Name']}")
    print(f"Last Price: {largest_change_row['Last']}")

    # d) Save the data into an Excel file with the sheet name "Raw Data"
    output_path = r"C:\\Users\\forty\\Downloads\\futures_data.xlsx"
    with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer:
        df.to_excel(writer, sheet_name='Raw Data', index=False)

    print(f"Data has been saved to {output_path}")

except Exception as e:
    print(f"Error: {e}")

finally:
    driver.quit()  # Close the WebDriver

TimeoutException: Message: timeout: Timed out receiving message from renderer: 59.399
  (Session info: chrome=131.0.6778.86)
Stacktrace:
	GetHandleVerifier [0x00007FF739C26CF5+28821]
	(No symbol) [0x00007FF739B93880]
	(No symbol) [0x00007FF739A3578A]
	(No symbol) [0x00007FF739A23992]
	(No symbol) [0x00007FF739A2365A]
	(No symbol) [0x00007FF739A21274]
	(No symbol) [0x00007FF739A21D1F]
	(No symbol) [0x00007FF739A3084E]
	(No symbol) [0x00007FF739A4492D]
	(No symbol) [0x00007FF739A4B10A]
	(No symbol) [0x00007FF739A22484]
	(No symbol) [0x00007FF739A44753]
	(No symbol) [0x00007FF739ACF75A]
	(No symbol) [0x00007FF739AAF0A3]
	(No symbol) [0x00007FF739A7A778]
	(No symbol) [0x00007FF739A7B8E1]
	GetHandleVerifier [0x00007FF739F5FCED+3408013]
	GetHandleVerifier [0x00007FF739F7745F+3504127]
	GetHandleVerifier [0x00007FF739F6B63D+3455453]
	GetHandleVerifier [0x00007FF739CEBDFB+835995]
	(No symbol) [0x00007FF739B9EB9F]
	(No symbol) [0x00007FF739B9A854]
	(No symbol) [0x00007FF739B9A9ED]
	(No symbol) [0x00007FF739B8A1D9]
	BaseThreadInitThunk [0x00007FF8E7E0259D+29]
	RtlUserThreadStart [0x00007FF8E89EAF38+40]
