In [1]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import (
    NoSuchElementException,
    TimeoutException,
    StaleElementReferenceException,
    WebDriverException,
    ElementClickInterceptedException,
)
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
import random

zip_list = ['15004', '15005', '15006', '15010', '15014', '15066', '15310', '15376', '15411', '15443', '15445', '15501', '15502', '15522', '15532', '15533', '15534', '15536', '15539', '15544', '15551', '15552', '15557', '15564', '15601', '15613', '15618', '15620', '15861', '15906', '16038', '16101', '16102', '16105', '16110', '16111', '16112', '16113', '16114', '16125', '16127', '16133', '16134', '16136', '16140', '16143', '16154', '16312', '16314', '16326', '16328', '16329', '16345', '16351', '16362', '16401', '16402', '16411', '16415', '16421', '16611', '16701', '16910', '16926', '17003', '17006', '17007', '17201', '17211', '17212', '17345', '17353', '17506', '17517', '17527', '17720', '18917', '18976', '19001', '19362', '19506', '19507']




url = "https://www.dtnpf.com/agriculture/web/ag/markets/local-grain-bids"

# Chrome options to disable images, extensions and run headless
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
prefs = {
    "profile.managed_default_content_settings.images": 2,
    "profile.managed_default_content_settings.stylesheets": 2,
    "profile.default_content_setting_values.notifications": 2,
    # Keep JavaScript enabled so the site works
    "profile.managed_default_content_settings.javascript": 1,
}
chrome_options.experimental_options["prefs"] = prefs


def create_driver():
    driver = webdriver.Chrome(options=chrome_options)
    driver.set_page_load_timeout(20)
    return driver


def close_cookie_banner(driver):
    try:
        wait = WebDriverWait(driver, 5)
        accept_button = wait.until(
            EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))
        )
        accept_button.click()
        WebDriverWait(driver, 3).until(
            EC.invisibility_of_element_located((By.ID, "onetrust-button-group-parent"))
        )
        print("[Info] Cookie banner accepted and closed.")
    except (TimeoutException, NoSuchElementException):
        # No cookie banner present
        pass


def click_with_retry(driver, element, retries=3):
    for attempt in range(retries):
        try:
            driver.execute_script("arguments[0].scrollIntoView(true);", element)
            WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".js-simpleLocationView-submit")))
            driver.execute_script("arguments[0].click();", element)
            return True
        except ElementClickInterceptedException:
            print(f"[Warning] Click intercepted, retry {attempt + 1}/{retries}")
            time.sleep(1)
    return False


def scrape_zip(zip_code, max_retries=3):
    local_results = []
    for attempt in range(1, max_retries + 1):
        driver = None
        try:
            driver = create_driver()
            driver.get(url)
            wait = WebDriverWait(driver, 15)

            close_cookie_banner(driver)

            wait.until(EC.presence_of_element_located((By.NAME, "postalCode")))
            zip_input = driver.find_element(By.NAME, "postalCode")
            zip_input.clear()
            zip_input.send_keys(zip_code)

            go_button = driver.find_element(By.CSS_SELECTOR, ".js-simpleLocationView-submit")

            if not click_with_retry(driver, go_button):
                print(f"[Error] Could not click submit button for ZIP: {zip_code}")
                return []

            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.box")))

            boxes = driver.find_elements(By.CSS_SELECTOR, "div.box")
            for i in range(len(boxes)):
                try:
                    boxes = driver.find_elements(By.CSS_SELECTOR, "div.box")  # re-find to avoid stale element
                    box = boxes[i]

                    try:
                        header = box.find_element(By.CSS_SELECTOR, "div.box-hd h4").text.strip()
                    except NoSuchElementException:
                        header = None

                    rows = box.find_elements(By.CSS_SELECTOR, "tbody tr")
                    for j in range(len(rows)):
                        try:
                            rows = box.find_elements(By.CSS_SELECTOR, "tbody tr")  # re-find rows
                            row = rows[j]

                            cells = row.find_elements(By.TAG_NAME, "td")
                            if len(cells) >= 4:
                                local_results.append(
                                    {
                                        "zip": zip_code,
                                        "location": header,
                                        "commodity": cells[0].text.strip(),
                                        "price": cells[1].text.strip(),
                                        "basis": cells[2].text.strip(),
                                        "date": cells[3].text.strip(),
                                    }
                                )
                        except StaleElementReferenceException:
                            continue

                except StaleElementReferenceException:
                    continue

            return local_results

        except TimeoutException:
            print(f"[Timeout] Attempt {attempt} for ZIP: {zip_code}")
        except WebDriverException as e:
            print(f"[WebDriverError] Attempt {attempt} for ZIP: {zip_code} - {e}")
        except Exception as e:
            print(f"[Error] Attempt {attempt} for ZIP: {zip_code} - {e}")

        finally:
            if driver:
                driver.quit()

        time.sleep(random.uniform(2, 5))

    print(f"[Skipped] ZIP: {zip_code} after {max_retries} attempts")
    return []


def main():
    max_workers = 4  # Adjust to your CPU/network constraints
    all_results = []

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(scrape_zip, zip_code): zip_code for zip_code in zip_list}

        for future in as_completed(futures):
            zip_code = futures[future]
            try:
                data = future.result()
                if data:
                    all_results.extend(data)
            except Exception as exc:
                print(f"[Error] ZIP {zip_code} generated an exception: {exc}")

    # Create pandas DataFrame for further use in notebook
    global df
    df = pd.DataFrame(all_results)
    print(f"Scraping completed. {len(df)} records collected.")


if __name__ == "__main__":
    main()


[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and closed.
[Info] Cookie banner accepted and 

In [2]:
df_unique = df.drop_duplicates(subset=[col for col in df.columns if col != 'zip'])
df_unique

Unnamed: 0,zip,location,commodity,price,basis,date
0,15006,CENTERRA COOP-GENE COOPER FARMS - SLIPPERY ROC...,Corn,$3.38,-0.45,8/8/2025
1,15006,CENTERRA COOP-GENE COOPER FARMS - SLIPPERY ROC...,Soybeans,$9.16,-0.75,8/8/2025
2,15006,CENTERRA COOP-GENE COOPER FARMS - SLIPPERY ROC...,Corn,$3.63,-0.20,8/8/2025
3,15006,CENTERRA COOP-GENE COOPER FARMS - SLIPPERY ROC...,Soybeans,$9.51,-0.40,8/8/2025
6,15006,"AG CENTRAL - NEW CASTLE, PA",Corn,$3.63,-0.20,8/8/2025
...,...,...,...,...,...,...
677,17506,"FM BROWN SONS INC - BIRDSBORO, PA",Soybeans,$9.61,-0.11 (est.),8/8/2025
678,17506,"FM BROWN SONS INC - BIRDSBORO, PA",Soybeans,$10.16,0.25,8/8/2025
686,17527,"FM BROWN SONS INC - BIRDSBORO, PA",Corn,$4.82,0.97 (est.),8/8/2025
687,17527,"FM BROWN SONS INC - BIRDSBORO, PA",Soybeans,$9.6,-0.11 (est.),8/8/2025


In [3]:
import os

# Define path to Desktop
desktop_path = os.path.expanduser("~/Desktop/grain_bids.csv")

# Save DataFrame without the index
df_unique.to_csv(desktop_path, index=False)

print(f"File saved to: {desktop_path}")


File saved to: /Users/nshaffer/Desktop/grain_bids.csv


In [None]:
import pandas as pd

# Show all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)


df_corn = df_unique[df_unique["commodity"]=="Corn"]
df_corn = df_corn.sort_values(by="zip")
df_corn

Unnamed: 0,zip,location,commodity,price,basis,date
25,15004,"HERITAGE COOPERATIVE - EAST LIVERPOOL, OH",Corn,$3.63,-0.20,8/8/2025
29,15004,"HERITAGE COOPERATIVE - LISBON, OH",Corn,$3.63,-0.20,8/8/2025
17,15005,"HERITAGE COOPERATIVE - EAST LIVERPOOL, OH",Corn,$3.64,-0.20,8/8/2025
18,15005,"HERITAGE COOPERATIVE - EAST LIVERPOOL, OH",Corn,$3.39,-0.45,8/8/2025
22,15005,CENTERRA COOP-GENE COOPER FARMS - SLIPPERY ROC...,Corn,$3.39,-0.45,8/8/2025
0,15006,CENTERRA COOP-GENE COOPER FARMS - SLIPPERY ROC...,Corn,$3.38,-0.45,8/8/2025
2,15006,CENTERRA COOP-GENE COOPER FARMS - SLIPPERY ROC...,Corn,$3.63,-0.20,8/8/2025
6,15006,"AG CENTRAL - NEW CASTLE, PA",Corn,$3.63,-0.20,8/8/2025
8,15010,"AG CENTRAL - NEW CASTLE, PA",Corn,$3.64,-0.20,8/8/2025
14,15010,"HERITAGE COOPERATIVE - LISBON, OH",Corn,$3.64,-0.20,8/8/2025


In [16]:

import pandas as pd

# 5 highest rows by price
highest_5 = df_corn.sort_values(by='price', ascending=False).head(10)

# 5 lowest rows by price
lowest_5 = df_corn.sort_values(by='price', ascending=True).head(10)

# Combine both DataFrames
combined_df = pd.concat([highest_5, lowest_5])

combined_df


Unnamed: 0,zip,location,commodity,price,basis,date
591,16926,"TRIPLE M FARMS - LEBANON, PA",Corn,$5.2,1.35 (est.),8/8/2025
596,16926,"FM BROWN SONS INC - BIRDSBORO, PA",Corn,$5.2,1.35 (est.),8/8/2025
601,17003,"TRIPLE M FARMS - LEBANON, PA",Corn,$5.19,1.35 (est.),8/8/2025
619,17007,"PERDUE FARMS - MARIETTA, PA",Corn,$5.19,1.35 (est.),8/8/2025
569,16910,"PENNSYLVANIA GRAIN PROCESSING - CLEARFIELD, PA",Corn,$5.19,1.35 (est.),8/8/2025
574,16910,"FM BROWN SONS INC - BIRDSBORO, PA",Corn,$5.19,1.35 (est.),8/8/2025
691,17527,"TRIPLE M FARMS - LEBANON, PA",Corn,$4.82,0.97 (est.),8/8/2025
686,17527,"FM BROWN SONS INC - BIRDSBORO, PA",Corn,$4.82,0.97 (est.),8/8/2025
224,15861,"TRIPLE M FARMS - LEBANON, PA",Corn,$4.81,0.97 (est.),8/8/2025
219,15861,"PENNSYLVANIA GRAIN PROCESSING - CLEARFIELD, PA",Corn,$4.81,0.97 (est.),8/8/2025


In [23]:
import requests
from bs4 import BeautifulSoup

url = "https://gasprices.aaa.com/?state=PA"

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0 Safari/537.36'
}

response = requests.get(url, headers=headers)
response.raise_for_status()

soup = BeautifulSoup(response.text, 'html.parser')

table = soup.find('table', class_='table-mob')

last_value_beneath_current_avg = None

if table:
    rows = table.tbody.find_all('tr')
    for row in rows:
        first_cell = row.find('td').text.strip()
        if first_cell == "Current Avg.":
            prices = [td.text.strip() for td in row.find_all('td')[1:]]
            last_value_beneath_current_avg = prices[-1]
            break

if last_value_beneath_current_avg:
    print(f"Deisel price PA 'Current Avg.' is: {last_value_beneath_current_avg}")
else:
    print("Could not find the 'Current Avg.' row or the table.")
print("Avg mpg=6")
# Convert price string (e.g. "$4.021") to a float number
diesel_price = float(last_value_beneath_current_avg.replace('$', ''))

# Calculate the cost for given miles assuming usage of 1 gallon per 6 miles
print(f"200 miles = ${diesel_price * 200 / 6:.2f} and back = ${diesel_price * 200 / 3:.2f}")
print(f"100 miles = ${diesel_price * 100 / 6:.2f} and back = ${diesel_price * 100 / 3:.2f}")
print(f"50 miles = ${diesel_price * 50 / 6:.2f} and back = ${diesel_price * 50 / 3:.2f}")


Deisel price PA 'Current Avg.' is: $4.021
Avg mpg=6
200 miles = $134.03 and back = $268.07
100 miles = $67.02 and back = $134.03
50 miles = $33.51 and back = $67.02


In [34]:
Lower_bid = 3.74

Upper_bid = 4.49

Miles = 200

Driver_pay = 50 # per hour
Hours = 5

print(f"total deisel cost ${Miles * 2/6 * diesel_price :.2f}")
print(f"spread per bushel ${ Upper_bid - Lower_bid}")
print(f"shipping cost $ {Driver_pay * Hours +(Miles * 2/6 * diesel_price) :.2f}")
print(f"breakeven bushels {(Driver_pay * Hours +(Miles * 2/6 * diesel_price))/ (Upper_bid - Lower_bid):.2f}")
print(f"1000 bu net profit ${ ((Upper_bid - Lower_bid)*1000)-((Driver_pay * Hours +(Miles * 2/6 * diesel_price))):.2f}")

total deisel cost $268.07
spread per bushel $0.75
shipping cost $ 518.07
breakeven bushels 690.76
1000 bu net profit $231.93
