In [75]:
import requests
from bs4 import BeautifulSoup
from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime
from sqlalchemy.orm import sessionmaker, declarative_base
from datetime import datetime
from sqlalchemy.dialects.postgresql import ARRAY
from postgre_cred import database_url
import json
import random

In [86]:
with open('links.json', 'r') as f:
    stores = json.load(f)
alta = stores['alta']

In [77]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}

In [88]:
def get_details(items):
    result = []
    for item in items:
        name = item.select_one(".product-title").text.strip()
        image_tags = item.select(".ty-pict")
        images = [img["src"] if "src" in img.attrs else img["data-src"] for img in image_tags]
        
        old_price = item.select_one(".ty-list-price")
        old_price = old_price.text.strip() if old_price else None
        if old_price:
            old_price = ''.join(c for c in old_price if c.isdigit() or c == '.')
            old_price = float(old_price)
            
        current_price = item.select_one(".ty-price-num")
        current_price = current_price.text.strip() if current_price else None
        if not current_price:
            continue
    
        product_url = item.select_one('.product-title')['href']

        product_details = {
            "name": name,
            "images": images,
            "old_price": old_price,
            "current_price": current_price,
            "product_url": product_url}

        result.append(product_details)
    return result

In [83]:
def get_products(url):
    url = f'{url}{random.randint(2234, 4532)}'
    resp = requests.get(url, headers=headers)
    print(url, '\n', resp.status_code)
    soup = BeautifulSoup(resp.text, "html.parser")
    items = soup.select('#pagination_contents > div.grid-list > div')

    products = get_details(items)
    
    return products

In [80]:
engine = create_engine(database_url)
Base = declarative_base()

In [81]:
category_classes = {}

# First create all table classes and store them
for category_name in alta.keys():
    # Only create the class if we haven't already
    if category_name not in category_classes:
        class_name = f"{category_name.capitalize()}Table"
        
        # Create a unique class for each category
        category_classes[category_name] = type(
            class_name, 
            (Base,),
            {
                "__tablename__": category_name,
                "item_id": Column(Integer, primary_key=True, autoincrement=True),
                "name": Column(String, nullable=False),
                "images": Column(ARRAY(String), nullable=True),
                "old_price": Column(Float, nullable=True),
                "current_price": Column(Float, nullable=False),
                "update_date": Column(DateTime, default=datetime.now, onupdate=datetime.now),
                "store_id": Column(String, nullable=False, default='alta'),
                "product_url": Column(String, nullable=False)
            }
        )

# Now create all tables at once
Base.metadata.create_all(engine)

# Create a single session
Session = sessionmaker(bind=engine)
session = Session()

In [89]:
for category, url in alta.items():
    # Use the already created class
    try:
        CategoryClass = category_classes[category]
        products = get_products(url)
        for product in products:
            new_product = CategoryClass(**product)
            session.add(new_product)
        # Commit once per category
        session.commit()
        print(f"Added products for {category}")
    except Exception as e:
        session.rollback()
        print(f"Error occurred: {e}")
    finally:
        session.close()

https://alta.ge/computers-and-office/computer-accessories.html?items_per_page=4309 
 200
Added products for computer_accessories
https://alta.ge/smart-home-and-car-tools/smart-home.html?items_per_page=2268 
 200
Added products for smart_home
