## Frame & Lens ETL pipeline

In [None]:
# import required libraries
# web scarping librabris
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager

# import manipulation libraries
import pandas as pd


# database libraries
from sqlalchemy import create_engine
import psycopg2

# evn
from dotenv import load_dotenv
import os 

import time

## Loading the page

In [None]:
url = "https://www.glasses.com/gl-us/eyeglasses"

# set up selenium webdriver
options = Options()
options.headless = False
service = Service(ChromeDriverManager().install())

# initialize the webdriver 
driver = webdriver.Chrome(service=service,options=options)

#load the webpage
driver.get(url)

#wait for the page to load 
time.sleep(20)


## Handling infinite scroll

In [None]:


scroll_pause_time = 10

# Get initial height
last_height = driver.execute_script("return document.body.scrollHeight")

while True:
    # Scroll down
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    
    # Wait for page to load
    time.sleep(scroll_pause_time)
    
    # Get new height
    new_height = driver.execute_script("return document.body.scrollHeight")
    
    if new_height == last_height:
        print("No more content to load")
        break
    
    print("Scrolling down for more content...")
    last_height = new_height

# After scrolling is complete
page_source = driver.page_source
print("Scraping successfully")

# Quit AFTER loop finishes
driver.quit()


In [None]:
soup = BeautifulSoup(page_source,"html.parser")
soup

In [None]:
glasses = soup.find_all("a", {'class':"product-tile"})
glasses


In [None]:
glasses_data = []

In [None]:
# extract data for each data
for glass in glasses:
    
    
    code_element = glass.find('div',{'class':'product-code'})
    product_code = code_element.text.strip() if code_element else 'N/A'
    
    
    brand_element = glass.find('div',{'class':'product-brand'})
    product_brand = brand_element.text.strip() if brand_element else 'N/A'
    
    price_element = glass.find(class_='product-price')
    price = price_element.get_text(strip=True) if price_element else 'N/A'

    
    
    glasses_data.append({
        'product_code':product_code,
        'product_brand':product_brand,
        'price':price
    })
    
    

In [None]:
glasses_data

In [None]:
glasses_df = pd.DataFrame(glasses_data)
glasses_df

In [43]:
glasses_df

Unnamed: 0,product_code,product_brand,price
0,RB6335 Optics,Ray-Ban,
1,OX3184 TinCup™,Oakley,
2,PO3292V,Persol,
3,RB7140 Optics,Ray-Ban,
4,OX8060 Overhead,Oakley,
...,...,...,...
73,RB2242V Wayfarer Oval Optics Change,Ray-Ban,
74,RB6645 Explorer IV Optics,Ray-Ban,
75,OX8046 Airdrop™,Oakley,
76,TF2232U,Tiffany,


In [44]:
glasses_df.to_csv("glasses_data.csv", index=False)