In [1]:

import pandas as pd  
import requests as req
from bs4 import BeautifulSoup
import re
import os

from selenium import webdriver 
from selenium.webdriver.common.by import By
from selenium.webdriver.edge.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

import tkinter as tk
from tkinter import Toplevel, ttk
from PIL import Image, ImageTk

def results(dataframe, ideal_price, minimum_price, maximum_price): 
    result_window = Toplevel()
    result_window.title("Scraping Result")
    result_window.geometry("1100x500")
    result_window.configure(bg="#f5e4da") 
    
    frame = tk.Frame(result_window, bg="#f5e4da")
    frame.pack(pady=10)

    style = ttk.Style()
    style.theme_use('clam')
    style.configure("Treeview", background="#ffffff", foreground="black", rowheight=25, font=("Helvetica", 10))
    style.configure("Treeview.Heading", background="#871F78", foreground="white", font=("Helvetica", 12, "bold"))
    style.map("Treeview", background=[('selected', '#d5a6d6'), ('', '#ffffff')])
    
    tree = ttk.Treeview(frame, columns=list(dataframe.columns), show='headings')
    tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)


    for column in dataframe.columns:
        tree.heading(column, text=column)
        tree.column(column, anchor="center")

    for index, row in dataframe.iterrows():
        tree.insert('', 'end', values=list(row))

    scrollbar = ttk.Scrollbar(frame, orient="vertical", command=tree.yview)
    scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
    tree.configure(yscroll=scrollbar.set)

    bottom_frame = tk.Frame(result_window, bg="#f5e4da")
    bottom_frame.pack(pady=10, fill=tk.X)

    tk.Label(bottom_frame, text=f'Ideal Price for 100g: ${ideal_price:.2f}', bg="#f5e4da", font=("Helvetica", 12, "italic"), fg="#871F78").pack(pady=5)
    tk.Label(bottom_frame, text=f'Minimum Price for 100g: ${minimum_price:.2f}', bg="#f5e4da", font=("Helvetica", 12, "italic"), fg="#871F78").pack(pady=5)
    tk.Label(bottom_frame, text=f'Maximum Price for 100g: ${maximum_price:.2f}', bg="#f5e4da", font=("Helvetica", 12, "italic"), fg="#871F78").pack(pady=5)

    close_button = tk.Button(bottom_frame, text="Close", command=result_window.destroy, bg="#871F78", fg="white", font=("Helvetica", 10, "bold"))
    close_button.pack(pady=10)


def scrapeWithBS4(product):
    productsNames = []
    productsBrands = []
    prices = []
    products_count = 0
    products_max = 20

    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0'}
    
    respConc1 = req.get("https://www.ethicalsuperstore.com/search/?search=" + product)
    page_conc1 = BeautifulSoup(respConc1.content, 'html.parser')
    
    container = page_conc1.find_all('div', {'class': 'grid cat-listing-wrapper'})
    
    for item in container:
        articles = item.find_all('a', {'class': 'cat-list__thumb media__img view_product_link Z'})
        
        for article in articles:
            if products_count >= products_max:
                break
                
            productsNames.append(article.get('data-product-name'))
            productsBrands.append(article.get('data-product-brand'))
            prices.append(article.get('data-product-price'))
            products_count += 1

    d1 = {'Product name': productsNames, 'Product brand': productsBrands, 'Price': prices}
    d1 = pd.DataFrame(d1)
    
    def extract_weight(title):
        match = re.search(r'(\d+)g', title)
        return int(match.group(1)) if match else None

    d1['Weight (grams)'] = d1['Product name'].apply(extract_weight)
    d1=d1.drop(d1[pd.isna(d1['Weight (grams)'] )].index)
    d1['Price'] = pd.to_numeric(d1['Price'])
    d1['100g Price'] = round((d1['Price'] / d1['Weight (grams)']) * 100,2)
    
    ideal_price = d1['100g Price'].mean()
    minimum_price = d1['100g Price'].min()
    maximum_price = d1['100g Price'].max()

    results(d1, ideal_price, minimum_price, maximum_price)

def scrapeWithSelenium(URL):
    products_count = 0
    products_max = 20
    product_name = []
    product_brand = []
    product_price = []
    pricesUnit = []

    options = Options()
    options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0')

    product = product_entry.get()  
    driver = webdriver.Edge(options=options)

    max_retries = 3
    retries = 0

    while retries < max_retries:
        try:
            print(f"Searching for {product} in Target...")
            driver.get(URL + product)

            WebDriverWait(driver, 40).until(lambda d: d.execute_script('return document.readyState') == 'complete')

            products_grid = WebDriverWait(driver, 40).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div[data-test='product-grid']")))
            
            WebDriverWait(driver, 40).until(lambda d: len(products_grid.find_elements(By.CSS_SELECTOR, "a[aria-label][href]")) >= 9)

            time.sleep(5)

            products_elements = products_grid.find_elements(By.CSS_SELECTOR, "a[aria-label][href]")
            products_brand = products_grid.find_elements(By.CSS_SELECTOR, "a[data-test='@web/ProductCard/ProductCardBrandAndRibbonMessage/brand']")
            products_prices = products_grid.find_elements(By.CSS_SELECTOR, "span[data-test='current-price']")
            unit_prices = products_grid.find_elements(By.CSS_SELECTOR, "span[data-test='unit-price']")
            
            for element, brand, prix, unitPrice in zip(products_elements, products_brand, products_prices, unit_prices):
                if products_count >= products_max:
                    break

                product_name.append(element.get_attribute("aria-label"))
                product_brand.append(brand.text)
                product_price.append(prix.text)
                unit_price_text = unitPrice.text

                match = re.search(r"\$(\d*\.?\d+)/ounce", unit_price_text)
                if match:
                    price_per_ounce = float(match.group(1)) 
                    price_per_100g = (price_per_ounce / 28.35) * 100
                    pricesUnit.append(f"{price_per_100g:.2f}")
                else:
                    pricesUnit.append(unit_price_text)  

                products_count += 1

            d2 = {"Product name": product_name, "Brand": product_brand, "Price": product_price, "100g Price": pricesUnit}
            d2 = pd.DataFrame(d2)

            d2['100g Price'] = pd.to_numeric(d2['100g Price'], errors='coerce')
            d2 = d2.dropna(subset=['100g Price'])
            ideal_price = d2['100g Price'].mean()
            minimum_price = d2['100g Price'].min()
            maximum_price = d2['100g Price'].max()

            results(d2, ideal_price, minimum_price, maximum_price)
            break

        except TimeoutException:
            retries += 1
            print(f"Timeout occurred. Retrying... ({retries}/{max_retries})")

            if retries == max_retries:
                print("Failed to load the page after multiple attempts.")
                driver.quit()
                return
    
        driver.close()

root = tk.Tk()
root.title("Price Generator")
root.geometry("750x600")
root.configure(bg="#f5e4da")  

#image_path = r"C:\Users\21355\GoMyCode\banniere.png" 
image_path = 'banniere.png'

root.photo = tk.PhotoImage(file=image_path)
image_label = tk.Label(root, image=root.photo, bg="#f5e4da")
image_label.pack(pady=10)
root.image_ref = root.photo

input_frame = tk.Frame(root, bg="#f5e4da")
input_frame.pack(pady=10)

tk.Label(input_frame, text="Enter Product Name:", bg="#f5e4da", font=("Helvetica", 12, "italic"), fg="#871F78").pack(pady=5)
product_entry = tk.Entry(input_frame, width=30, font=("Helvetica", 12))
product_entry.pack(pady=5)

warning_label = tk.Label(input_frame, text="*Please enter only products that have a weight.", bg="#f5e4da", font=("Helvetica", 10, "italic"), fg="red")
warning_label.pack(pady=5)

button_frame = tk.Frame(root, bg="#f5e4da")
button_frame.pack(pady=10)

bs4_button = tk.Button(button_frame, text="Scrape with BeautifulSoup", command=lambda: scrapeWithBS4(product_entry.get()), bg="#871F78", fg="white", font=("Helvetica", 10, "bold"))
bs4_button.pack(side=tk.LEFT, padx=10)

selenium_button = tk.Button(button_frame, text="Scrape with Selenium", command=lambda: scrapeWithSelenium("https://www.target.com/s?searchTerm="), bg="#871F78", fg="white", font=("Helvetica", 10, "bold"))
selenium_button.pack(side=tk.LEFT, padx=10)

root.mainloop()



Searching for jam in Target...
