In [85]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [86]:
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

In [87]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

In [88]:
def get_amazon_product_data(url):
    driver.get(url)
    time.sleep(3)
    try:
        title = driver.find_element(By.ID, 'productTitle').text
        
        try:
            price_element = driver.execute_script(
            'return document.querySelector(".a-price.a-text-price span.a-offscreen")'
            )
            price = driver.execute_script("return arguments[0].textContent", price_element)
        except:
            price = "Price not available"
        
        try:
            features = driver.find_element(By.ID, 'feature-bullets').text
        except:
            features = "Features not available"
        
        try:
            description = driver.find_element(By.ID, 'productDescription').text
        except:
            description = "Description not available"
        
        return {
            "Title": title,
            "Price": price,
            "Features": features,
            "Description": description
        }
    
    except Exception as e:
        return {"Error": str(e)}


In [89]:
url = 'https://www.amazon.com/dp/B08SVZ775L'
product_data = get_amazon_product_data(url)


print(product_data)
driver.quit()

{'Title': 'Amazon Fire TV 50" 4-Series, 4K UHD smart TV with Alexa Voice Remote Enhanced, stream live TV without cable', 'Price': '$449.99', 'Features': 'Bring movies and shows to life with support for vivid 4K Ultra HD, HDR 10, HLG, and Dolby Digital Plus.\n4K Ultra HD, HDR 10, and HLG deliver a clearer and more vibrant picture with brighter colors compared to 1080p Full HD.\nPress and ask Alexa to easily find, launch, and control your content, or check the weather, sports scores, and more.\nStream over 1.5 million movies and TV episodes with subscriptions to Netflix, Prime Video, Disney+, and more. Subscription fees may apply.\nWatch live and free TV, play video games, and stream music.\nFire TV adds new Alexa skills, features, smart home capabilities, and voice functionality all the time.\nUse the 4 HDMI inputs to connect all your gaming, cable and audio equipment.', 'Description': 'Description not available'}


In [90]:
from kafka import KafkaProducer
import json

producer = KafkaProducer(
    bootstrap_servers='localhost:9092',
    value_serializer=lambda v: json.dumps(v).encode('utf-8')
)

def send_to_kafka(product_data):
    producer.send('product-prices', product_data)
    producer.flush()

send_to_kafka(product_data)
print("Data sent to Kafka!")

Data sent to Kafka!


In [91]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *

spark = SparkSession.builder \
    .appName("Ecommerce Price Trackings") \
    .getOrCreate()

kafka_df = spark.readStream \
    .format("kafka") \
    .option("kafka.bootstrap.servers", "localhost:9092") \
    .option("subscribe", "product-prices") \
    .load()

product_df = kafka_df.selectExpr("CAST(value AS STRING)").select(from_json("value", "struct<Product Name: string, Price: string, URL: string>").alias("product"))

query = product_df.select("product.*").writeStream.outputMode("append").format("console").start()
query.awaitTermination()

In [92]:
from pymongo import MongoClient

client = MongoClient('mongodb://localhost:27017/')
db = client['ecommerce']
collection = db['productprice']

collection.insert_one(product_data)
print("Data inserted successfully!")


Data inserted successfully!


In [93]:
for product in collection.find():
    print(f"Product Name: {product.get('Title')}")
    print(f"Price: {product.get('Price')}")
    print("-" * 30)

Product Name: Amazon Fire TV 50" 4-Series, 4K UHD smart TV with Alexa Voice Remote Enhanced, stream live TV without cable
Price: $149.99
------------------------------
Product Name: Amazon Fire TV 50" 4-Series, 4K UHD smart TV with Alexa Voice Remote Enhanced, stream live TV without cable
Price: $449.99
------------------------------


In [97]:
import re

previous_price = None  # Initialize previous_price as None for comparison

for product in collection.find():
    current_price = product.get('Price')
    
    # Extract the number after the dollar sign using regex
    match = re.search(r'\$(\d+\.\d+|\d+)', current_price)
    if match:
        number = match.group(1)
        currentprice1 = float(number) 
        
    product_name = product.get('Title')
    
    if previous_price is not None:
        pricechange = currentprice1 - previous_price
        if pricechange != 0:
            alert_subject = f"Price Alert for {product_name}"
            alert_body = f"The price for {product_name} has changed. Previous Price: {previous_price}, Current Price: {current_price}. Change: {pricechange}"
            print(alert_subject)
            print(alert_body)
    previous_price = currentprice1

Price Alert for Amazon Fire TV 50" 4-Series, 4K UHD smart TV with Alexa Voice Remote Enhanced, stream live TV without cable
The price for Amazon Fire TV 50" 4-Series, 4K UHD smart TV with Alexa Voice Remote Enhanced, stream live TV without cable has changed. Previous Price: 149.99, Current Price: $449.99. Change: 300.0


In [98]:
import streamlit as st
import pandas as pd

df = pd.DataFrame(data)
st.title('E-commerce Price Tracker')
st.write('Tracked Products and Prices:')
st.dataframe(df)

In [None]:
! streamlit run C:\Users\dagas\anaconda3\Lib\site-packages\ipykernel_launcher.py