# MAIN WORKFLOW
> -  ├── Initialize Application
> -  ├── TRY: Data Scraping Process
> -  │   ├── Build Data Table
> -  │   ├── Data Scraping Sequence
> -  │   │   ├── Browser Initialization
> -  │   │   └── Do-While Loop (Pagination Handler)
> -  │   │       ├── Search Navigation
> -  │   │       ├── While Loop (Page Processing)
> -  │   │       │       ├── Data Extraction
> -  │   │       │       ├── Data Processing
> -  │   │       │       └── Pagination Logic
> -  │   │       └── Loop Continuation
> -  │   └── Data Export
> -  │
> -  ├── CATCH: Exception Handling
> -  │   ├── Navigation Errors
> -  │   ├── Extraction Errors
> -  │   ├── Browser Errors
> -  │   └── System Errors
> -  │
> -  └── FINALLY: Cleanup & Reporting


# Sequence: Initialize_Application
> - ├── Build Data Table: Products_DataTable
> - │   ├── Columns:
> - │   │   ├─ ProductName (String)
> - │   │   ├─ Price (String)
> - │   │   ├─ Description (String)
> - │   │   ├─ Rating (String)
> - │   │   ├─ ProductURL (String)
> - │   │   └─ PageNumber (Int32)
> - │   └─ Output: ProductsDT
> - │
> - ├── Assign: Initialize_Variables
> - │   ├─ CurrentPage = 1
> - │   ├─ MaxPages = 5
> - │   ├─ NextPageExists = True
> - │   ├─ SearchQuery = "laptops"
> - │   ├─ BrowserInstance = Nothing
> - │   └─ ExtractedData = Nothing
> - │
> - └── Log Message
> -     └─ "Application initialized successfully"

In [11]:
from parsel import Selector
import requests
import json

HEADERS = {
    'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.5',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'none',
    'Sec-Fetch-User': '?1',
    'Priority': 'u=0, i',
}

In [12]:
def clean_prices(raw_price):
    if not raw_price: return 0.0
    
    raw_price = "".join(filter(str.isdigit,raw_price))
    cleaned_string = (
            raw_price.replace('₹','')
            .replace('$','')
            .replace(',','')
        )

    try:
        final_price = float(cleaned_string)
        return final_price
    except ValueError:
        return 0.0
    
def clean_data(raw_data):
    if not raw_data: return ''

    cleaned_data = raw_data.strip()

    return cleaned_data

def scrap(responce):
          
        data = Selector(responce.text)
        print(data)

        PRODUCT_TITLE = "//span[@id='productTitle']/text()"
        CURRENT_PRICE =   "//span[contains(@class, 'priceToPay')]//span[@class='a-price-whole']/text()"
        LIST_PRICE = "//span[contains(@class, 'basisPrice')]//span[@class = 'a-offscreen']/text()"
        SHIPING_INFO = "//div[@id = 'deliveryBlock_feature_div']/text()"
        OFFER_DETAILS = "//div[contains(@class, 'vsx__offers')]//div[@class = 'offers-items']/text()"
        
        title = clean_data(data.xpath(PRODUCT_TITLE).get())
        price = clean_prices(data.xpath(CURRENT_PRICE).get())
        listPrice = clean_prices(data.xpath(LIST_PRICE).get())
        shipinginfo =clean_data(data.xpath(SHIPING_INFO).get())
        couponinfo = clean_data(data.xpath(OFFER_DETAILS).get())


        product_details = {
            'ProductTitle' : title ,
            'BrandName' : '',
            'CompetitorSKU' : '',
            'CurrentPrice' : price,
            'ListPrice' : listPrice,
            'ShippingInfo' : shipinginfo,
            'CouponInfo' : couponinfo,
            'StockStatus' : '',
            'EstimatedDelivery' : '',
            'SoldBy' : '',
            'OverallRating' : '',
            'ReviewCount' : '',
            'ScrapeTimestamp' : '',
            'ProductURL' : '',
            'CompetitorWebsite' : '',
        }
        
        return product_details
    


    


In [13]:
links = 'https://amzn.in/d/fIaNEtn'
responce = requests.get(links,headers=HEADERS)


In [17]:
data = Selector(responce.text)
OFFER_DETAILS = "//div[@class = 'vsx__offers']/text()"
        
title = clean_data(data.xpath(OFFER_DETAILS).get())
print(title)




In [15]:
scrap(responce=responce)

<html lang="en-in" class="a-no-js" data-19ax5a9jf="dingo"><!-- sp:feature:head-start -->
<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8">
<!-- sp:end-feature:head-start -->
<!-- sp:feature:csm:head-open-part1 -->

<script type="text/javascript">var ue_t0=ue_t0||+new Date();</script>
<!-- sp:end-feature:csm:head-open-part1 -->
<!-- sp:feature:cs-optimization -->
<meta http-equiv="x-dns-prefetch-control" content="on">
<link rel="dns-prefetch" href="https://images-eu.ssl-images-amazon.com">
<link rel="dns-prefetch" href="https://m.media-amazon.com">
<link rel="dns-prefetch" href="https://completion.amazon.com">
<!-- sp:end-feature:cs-optimization -->
<!-- sp:feature:csm:head-open-part2 -->
<script type="text/javascript">
window.ue_ihb = (window.ue_ihb || window.ueinit || 0) + 1;
if (window.ue_ihb === 1) {

var ue_csm = window,
    ue_hob = +new Date();
(function(d){var e=d.ue=d.ue||{},f=Date.now||function(){return+new Date};e.d=function(b){return f()-(

{'ProductTitle': 'iQOO Neo 10 (Inferno Red, 12GB RAM, 256GB Storage) | Snapdragon 8s Gen 4 Processor & SuperComputing Chip Q1 | 7000 mAh Battery | Segment’s Highest 144 FPS Gaming Smartphone',
 'BrandName': '',
 'CompetitorSKU': '',
 'CurrentPrice': 35998.0,
 'ListPrice': 40999.0,
 'ShippingInfo': '',
 'CouponInfo': '',
 'StockStatus': '',
 'EstimatedDelivery': '',
 'SoldBy': '',
 'OverallRating': '',
 'ReviewCount': '',
 'ScrapeTimestamp': '',
 'ProductURL': '',
 'CompetitorWebsite': ''}

In [41]:
links = 'https://amzn.in/d/fIaNEtn'
responce = requests.get(links,headers=HEADERS)
data = Selector(responce.text)
# Targets the full text of each offer card in the carousel
OFFER_DETAILS = "//li[@class = 'a-carousel-card']//text()"
        
title = data.xpath(OFFER_DETAILS).get()
if title: print(title) 
else: print("empty")

 
