In [7]:
### Library Imports

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
import time

In [12]:
### Gather User Input for Search Parameters

keyword = input("What frame would you like to search for: ")

formatted_keyword = keyword.replace(" ", "+")

# eBay URL
url = "https://www.ebay.com/sch/i.html?_from=R40&_nkw=oakley+" + formatted_keyword + "&_sacat=0&rt=nc&LH_Sold=1&LH_Complete=1"
print(url)


https://www.ebay.com/sch/i.html?_from=R40&_nkw=oakley+M+Frame&_sacat=0&rt=nc&LH_Sold=1&LH_Complete=1


In [5]:
### Complete eBay Scraper -> title, condition, price, sold_date, num_bids, buy_it_now, best_offer

# Set up Chrome WebDriver
driver = webdriver.Chrome(executable_path="/Users/mburley/chromedriver/chromedriver-mac-x64/chromedriver")  # Replace with the path to your chromedriver

# Open the eBay page
driver.get(url)

# Wait for the content to load
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.CLASS_NAME, "s-item__info")))

data = []
prev_page_number = 0

while True:
    # Get the page source after waiting for dynamic content
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'html5lib')

    print(f"Extracting page: {driver.current_url}")

    for product in soup.select(".s-item"):
        # Title of the Item
        title_element = product.select_one(".s-item__title span")
        title = title_element.text.strip() if title_element else None

        # Condition of each Item (Pre-Owned or Brand New)
        condition_element = product.select_one(".s-item__subtitle")
        condition = condition_element.text.strip() if condition_element else None

        # Price of each Item
        price_element = product.select_one(".s-item__price")
        price = price_element.text.strip() if price_element else None

        # Date each Item was Sold
        sold_date_element = product.select_one(".s-item__title--tag")
        sold_date = sold_date_element.text.strip() if sold_date_element else None
        #print(sold_date)

        # Number of Bids on Each Item if it Sold via Auction
        bids_element = product.select_one(".s-item__bids")
        num_bids = bids_element.text.strip() if bids_element else None

        buy_it_now_element = product.select_one(".s-item__purchase-options")
        buy_it_now_text = buy_it_now_element.text.strip() if buy_it_now_element else None
        buy_it_now_condition = "Buy It Now" in buy_it_now_text or "or Best Offer" in buy_it_now_text if buy_it_now_text else False

        best_offer_element = product.select_one(".s-item__purchase-options")
        best_offer_text = best_offer_element.text.strip() if best_offer_element else None
        best_offer = "Yes" if best_offer_text and "Best offer accepted" in best_offer_text else "No"

        buy_it_now = "Yes" if buy_it_now_condition else "No"

        data.append({
            "title": title,
            "condition": condition,
            "price": price,
            "sold_date": sold_date,
            "num_bids": num_bids,
            "buy_it_now": buy_it_now,
            "best_offer": best_offer
        })

    try:
        # Click the next page button
        driver.find_element(By.CLASS_NAME, 'pagination__next').click()

        # Wait for the new page to load
        time.sleep(3)  # Adjust the sleep duration as needed

        # Extract the current page number
        current_page_number = int(driver.current_url.split("_pgn=")[1].split("&")[0])

        # Break if the page number does not change
        if current_page_number == prev_page_number:
            break

        prev_page_number = current_page_number

    except NoSuchElementException:
        # Break if no next page button is found
        break

# Save to CSV
pd.DataFrame(data=data).to_csv("ebay_products_selenium.csv", index=False)

# Close the WebDriver
driver.quit()


Extracting page: https://www.ebay.com/sch/i.html?_from=R40&_nkw=oakley+M+Frame&_sacat=0&rt=nc&LH_Sold=1&LH_Complete=1
Extracting page: https://www.ebay.com/sch/i.html?_from=R40&_nkw=oakley+M+Frame&_sacat=0&rt=nc&LH_Sold=1&LH_Complete=1&_pgn=2
Extracting page: https://www.ebay.com/sch/i.html?_from=R40&_nkw=oakley+M+Frame&_sacat=0&rt=nc&LH_Sold=1&LH_Complete=1&_pgn=3
Extracting page: https://www.ebay.com/sch/i.html?_from=R40&_nkw=oakley+M+Frame&_sacat=0&rt=nc&LH_Sold=1&LH_Complete=1&_pgn=4
Extracting page: https://www.ebay.com/sch/i.html?_from=R40&_nkw=oakley+M+Frame&_sacat=0&rt=nc&LH_Sold=1&LH_Complete=1&_pgn=5
Extracting page: https://www.ebay.com/sch/i.html?_from=R40&_nkw=oakley+M+Frame&_sacat=0&rt=nc&LH_Sold=1&LH_Complete=1&_pgn=6
Extracting page: https://www.ebay.com/sch/i.html?_from=R40&_nkw=oakley+M+Frame&_sacat=0&rt=nc&LH_Sold=1&LH_Complete=1&_pgn=7
Extracting page: https://www.ebay.com/sch/i.html?_from=R40&_nkw=oakley+M+Frame&_sacat=0&rt=nc&LH_Sold=1&LH_Complete=1&_pgn=8
Extract

In [13]:
### Load in and Clean Scraped Data

data = pd.read_csv("ebay_products_selenium.csv")

# Convert the 'sold_date' column to string
data['sold_date'] = data['sold_date'].astype(pd.StringDtype())

# Covert the sold_date col to the correct format
data['sold_date'] = data['sold_date'].replace(['Sold', 'Item'], '', regex=True)
data['sold_date'] = pd.to_datetime(data['sold_date'])

data = data.dropna(subset=['sold_date'])

# Remove '$' and remove rows with a price < 25
data['price'] = data['price'].astype(pd.StringDtype())
data['price'] = pd.to_numeric(data['price'].str.replace('[^\d.]', '', regex=True), errors='coerce')
data = data[data['price'] >= 30]

# Ensure the 'num_bids' column is treated as strings and 'bids' label is removed
data['num_bids'] = data['num_bids'].astype(str)
data['num_bids'] = pd.to_numeric(data['num_bids'].str.replace(r'\D', ''), errors='coerce').astype(pd.Int64Dtype())

# Convert All Other Cols to Strings
data['title'] = data['title'].astype(pd.StringDtype())
data['condition'] = data['condition'].astype(pd.StringDtype())
data['buy_it_now'] = data['buy_it_now'].astype(pd.StringDtype())
data['best_offer'] = data['best_offer'].astype(pd.StringDtype())

data.dtypes


title                 string
condition             string
price                float64
sold_date     datetime64[ns]
num_bids               Int64
buy_it_now            string
best_offer            string
dtype: object

In [14]:
### Drop all rows that do not contained the stored keyword

# All Lowercase
keyword_lower = keyword.lower()
print(keyword_lower)

# Add a hyphen for any space in the keyword
other_keyword = keyword.replace(" ", "-")
print(other_keyword)

desired_values = [keyword, keyword_lower, other_keyword]
print(desired_values)

# Use the contains method to filter rows
condition = data['title'].str.contains('|'.join(desired_values), case=False)
filtered_data = data[condition]

filtered_data.head(15)

m frame
M-Frame
['M Frame', 'm frame', 'M-Frame']


Unnamed: 0,title,condition,price,sold_date,num_bids,buy_it_now,best_offer
2,Oakley M Frame Sunglasses,Pre-Owned,72.85,2023-12-10,12.0,No,No
4,Vintage OAKLEY M Frame Sunglasses MADE IN USA ...,Pre-Owned,94.99,2023-12-10,,Yes,No
5,Oakley M Frame Men's Wrap Sunglasses - vintage...,Pre-Owned,87.0,2023-12-10,22.0,No,No
6,New ListingVINTAGE 90s OAKLEY Sunglasses Bundl...,Pre-Owned,149.0,2023-12-10,1.0,No,No
7,"Oakley ""New"" M frame blue",Pre-Owned,75.0,2023-12-10,1.0,No,No
8,Oakley M Frame Sunglasses,Pre-Owned,41.0,2023-12-10,10.0,No,No
9,RARE COLOR Oakley M Frames Pro Polished Alumin...,Pre-Owned,150.0,2023-12-10,,No,Yes
11,Oakley SI Ballistic M Frame 2.0 Strike Safety ...,Pre-Owned,49.99,2023-12-10,,Yes,No
13,New ListingVINTAGE 90s OAKLEY Sunglasses Bundl...,Pre-Owned,149.0,2023-12-09,,Yes,No
14,OAKLEY M-FRAME OO 11-162 Matte Black / Grey Sh...,Brand New,89.99,2023-12-09,,Yes,No


In [17]:
### Get auction data

auction_data = filtered_data.dropna()
auction_data

Unnamed: 0,title,condition,price,sold_date,num_bids,buy_it_now,best_offer
2,Oakley M Frame Sunglasses,Pre-Owned,72.85,2023-12-10,12,No,No
5,Oakley M Frame Men's Wrap Sunglasses - vintage...,Pre-Owned,87.00,2023-12-10,22,No,No
6,New ListingVINTAGE 90s OAKLEY Sunglasses Bundl...,Pre-Owned,149.00,2023-12-10,1,No,No
7,"Oakley ""New"" M frame blue",Pre-Owned,75.00,2023-12-10,1,No,No
8,Oakley M Frame Sunglasses,Pre-Owned,41.00,2023-12-10,10,No,No
...,...,...,...,...,...,...,...
891,Vintage Oakley Pro M Frame RED w/ 4 Total Lens...,Pre-Owned,66.00,2023-09-14,29,No,No
893,Oakley M Frame Cobalt 2 Gen. with Red iridium ...,Pre-Owned,69.00,2023-09-14,1,No,No
895,Oakley M Frame,Pre-Owned,50.00,2023-09-14,1,No,No
902,NEW OAKLEY MILITARY SI M-FRAME 2.0 SUNGLASSES ...,Brand New,76.00,2023-09-13,13,No,No


In [18]:
### Get Buy It Now data

# Select rows with 'Yes' in either 'buy_it_now' or 'best_offer'
buy_it_now_data = filtered_data.loc[(filtered_data['buy_it_now'] == 'Yes') | (filtered_data['best_offer'] == 'Yes')]
buy_it_now_data

Unnamed: 0,title,condition,price,sold_date,num_bids,buy_it_now,best_offer
4,Vintage OAKLEY M Frame Sunglasses MADE IN USA ...,Pre-Owned,94.99,2023-12-10,,Yes,No
9,RARE COLOR Oakley M Frames Pro Polished Alumin...,Pre-Owned,150.00,2023-12-10,,No,Yes
11,Oakley SI Ballistic M Frame 2.0 Strike Safety ...,Pre-Owned,49.99,2023-12-10,,Yes,No
13,New ListingVINTAGE 90s OAKLEY Sunglasses Bundl...,Pre-Owned,149.00,2023-12-09,,Yes,No
14,OAKLEY M-FRAME OO 11-162 Matte Black / Grey Sh...,Brand New,89.99,2023-12-09,,Yes,No
...,...,...,...,...,...,...,...
909,M Frames 2.0 Oakley Matte Black Sunglasses Fra...,Pre-Owned,52.00,2023-09-12,,Yes,No
910,Oakley M Frame VR28 Iridium Vented Strike Lens...,Pre-Owned,59.99,2023-09-12,,No,Yes
911,Oakley M Frame Sunglasses 2 Pair,Pre-Owned,44.99,2023-09-12,,Yes,No
913,Oakley SI Ballistic M Frame 2.0 Strike II,Brand New,80.00,2023-09-12,,Yes,No


In [31]:
### Single Frame Analysis

## Compute Avg Price
avg_price = buy_it_now_data['price'].mean()
print("Avg Sold Price for all pairs is: $" + str(avg_price))

## Compute Avg Price based on condition
cond_avg_price = buy_it_now_data.groupby('condition')['price'].mean()
cond_avg_price

# Assuming 'price' is the column containing the prices
avg_price_by_month = buy_it_now_data.groupby(buy_it_now_data['sold_date'].dt.strftime('%B'))['price'].mean().sort_values(ascending = False)
avg_price_by_month



Avg Sold Price for all pairs is: $117.41537777777765


sold_date
September    138.631364
October      124.231087
December     112.187273
November      97.248836
May           74.950000
Name: price, dtype: float64