# ShopList

In [None]:
from components.dev.shop.shop_product_card_list.list_scraper import ShopListScrapMachine
from components.dev.utils.browser_controller import PwBrowserController
browser_controller = await PwBrowserController().create()
page_controller = await browser_controller.create_page_controller()
page = await page_controller.get_page()


await page.goto('https://www.sevenstore.com/brands/adidas/')

In [None]:
import json
from typing import List, Dict, Any
from playwright.async_api import Page
from components.dev.shop.shop_product_card_list.schema import ListConfig, ListScrapData
from bs4 import BeautifulSoup, Tag
from components.dev.shop.shop_list.parent_class import PwShopList

class PwSevenStoreList(PwShopList):
    def __name__(self) -> str:
        return "seven_store"

    def config(self) -> ListConfig:
        return ListConfig(
            scroll_on=True,
            reverse_not_found_result=True,
            page_reload_after_cookies=False,
            cookie_button_xpath=[
                '//button[@class="btn btn-level1 accept-all-cookies"]'
            ],
            not_found_xpath='//div[contains(@id,"listing-list")]',
            max_scroll=30,
        )

    async def extract_card_html(self, page) -> List[Tag] | None:
        product_cards = await page.query_selector('//div[contains(@id,"listing-list")]')
        if product_cards:
            cards = await product_cards.inner_html()
            cards = BeautifulSoup(cards, "html.parser")
            cards = cards.find_all(attrs={"class": "nodecor"})
            # 종종 제품 카드에 홍보용 카드가 들어가는 경우가 있음, 홍보용 카드는 nodecor 클래스는 있지만 id는 없음
            cards = [card for card in cards if card.get("id") is not None]
            assert cards, "load_product_card : No product cards found"
            return cards
        else:
            return None

    def extract_info(self, card: Tag, brand_name: str) -> ListScrapData:
        product_name = card.find("a", class_="f-hover-decor").text  # type: ignore
        shop_product_name = product_name + " - " + card["data-nq-product"]  # type: ignore
        price = card.find(attrs={"data-listing": "price"}).text.split(" RRP")[0]  # type: ignore

        return ListScrapData(
            shop_name=self.__name__(),
            brand_name=brand_name,
            shop_product_name=shop_product_name,
            shop_product_img_url=card.img["src"],  # type: ignore
            product_url=card.img["data-url"],  # type: ignore
            price=price,
        )

    async def get_next_page(self, page: Page, page_num: int) -> bool:
        return False

## cookies Modals ...

## extract card html

In [None]:
module = PwSevenStoreList()

cards = await module.extract_card_html(page)

In [None]:
data = module.extract_info(cards[0], "adidas")

In [None]:
from model.db_model_shop import ShopProductCardSchema
from components.dev.shop.currency import Currency
from datetime import datetime



def _preprocess_list_data(
    cards_info: List[ListScrapData],
) -> List[ShopProductCardSchema]:
    currency = Currency()

    # currency
    lst = []
    for card in cards_info:
        price = card.price

        _, curr_name, origin_price = currency.get_price_info(price)

        (_, _, us_price) = currency.change_currency_to_custom_usd(price)

        (_, _, kor_price) = currency.change_currency_to_buying_won(price)

        data = card.model_dump()
        print(data)

        lst.append(
            ShopProductCardSchema(
                **data,
                original_price_currency=curr_name,
                original_price=origin_price,
                us_price=us_price,
                kor_price=int(round(kor_price, -3)),
                updated_at=datetime.now().replace(microsecond=0),
            )
        )
    return lst

_preprocess_list_data([data])

# ShopPage

In [None]:
from components.dev.shop.shop_product_card_list.list_scraper import ShopListScrapMachine
from components.dev.utils.browser_controller import PwBrowserController
browser_controller = await PwBrowserController().create()
page_controller = await browser_controller.create_page_controller()
page = await page_controller.get_page()


await page.goto('https://www.sevenstore.com/tops/hoodies/human-made-grey-heart-logo-printed-hoodie/')

In [None]:
import json
from typing import List, Dict, Any
from playwright.async_api import Page,expect
from components.dev.shop.shop_product_card_list.schema import ListConfig, ListScrapData
from bs4 import BeautifulSoup, Tag
from components.dev.shop.shop_list.parent_class import PwShopPage

class PwSevenStorePage(PwShopPage):
    def __name__(self) -> str:
        return "seven_store"

    def get_cookie_button_xpath(self) -> List[str]:
        return [""]

    async def get_size_info(self, page: Page) -> List[Dict[str, Any]]:
        locator = page.locator(".product-sizes-title")
        await expect(locator).to_contain_text("Sizes", timeout=10000)

        size_query = await page.query_selector_all(
            '//div[contains(@class, "size-wrapper")]',
        )

        size_list = [await s.inner_text() for s in size_query]

        if not size_list:
            return [{"shop_product_size": "-", "kor_product_size": "-"}]

        l = []
        for s in size_list:
            kor_size = s
            try:
                if float(s) < 15:
                    kor_size = "UK " + s
            except:
                pass
            l.append({"shop_product_size": s, "kor_product_size": kor_size})

        return l

    async def get_product_id(self, page: Page) -> str:
        product_id_text = await page.query_selector(
            '//meta[contains(@name, "description")]',
        )

        try:
            product_id_text = await product_id_text.get_attribute("content")  # type: ignore
            product_id = product_id_text.split(":")[1].replace(" ", "")  # type: ignore
        except:
            product_id = "-"

        return product_id.upper()

In [None]:
module = PwSevenStorePage()

cards = await module.get_size_info(page)
cards

In [None]:
id = await module.get_product_id(page)
id

In [None]:
from components.dev.shop.table_data_loader import ShopTableDataLoader, TableType, SearchType
from db.dev_db import session_local as dev_session
from db.production_db import session_local as prod_session

dev_db = dev_session()
prod_db=prod_session()


loader = ShopTableDataLoader(admin_db=dev_db,prod_db=prod_db,table_type=TableType.CANDIDATE_TABLE,search_type=SearchType.BRAND_NAME)
await loader.extract_data('adidas')

In [None]:
from components.dev.utils.temp_file_manager import TempFileManager
import os
import pandas as pd
from datetime import datetime
tfm = TempFileManager("shop_list")

time_now = datetime.now().strftime("%y%m%d-%H%M%S")
list_data = await tfm.load_temp_file("product_card_list")
save_path = os.path.join('./')
if not os.path.exists(save_path):
    os.makedirs(save_path, exist_ok=True)

file_path = os.path.join(save_path, f"{time_now}.parquet.gzip")

# pd.DataFrame(list_data).drop_duplicates(subset="shop_product_name").to_parquet(
#     path=file_path, compression="gzip"
# )

In [None]:
list_data

In [None]:
# from itertools import chain

# list_data = list(chain(*list_data))

df = pd.DataFrame(list_data)

df

In [None]:
df.drop_duplicates(subset="shop_product_name").reset_index(drop=True).to_parquet(path=file_path, compression="gzip")