In [80]:
import re
import logging
import pandas as pd

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup, ResultSet
from functools import partial
from datetime import datetime

logging.basicConfig(level=logging.INFO)

In [85]:
class MKBag(object):

    def __init__(self):
        chrome_options = Options()
        chrome_options.add_argument("--headless=new")
        chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36")
        self.driver = webdriver.Chrome(options=chrome_options)

    def browse_web(
        self,
        url: str = 'https://www.michaelkors.global/hk/en/women/handbags/?pmin=1.00&start=0&sz={max_item}',
        item_count: int = 50,
    ) -> ResultSet:
        self.driver.get(url.format(max_item=item_count))
        return BeautifulSoup(self.driver.page_source, 'html.parser')
    
    def cleanup(self):
        self.driver.quit()

    @staticmethod
    def extract_item_info(raw_result: ResultSet, path: str, get_item: str) -> list:
        found = raw_result.select(path)
        if not found:
            return []
        if get_item == 'text':
            return [txt.text for txt in found]
        else:
            return [item.get(get_item) for item in found]

    def item_data_constructor(self, raw_result: ResultSet, get_all_details: bool) -> dict:

        extract_info_partial = partial(self.extract_item_info, raw_result=raw_result)
        product_link=f"https://www.michaelkors.global/{extract_info_partial(path='div.pdp-link a', get_item='href')[0]}"

        brand = extract_info_partial(path='div.product-brand a', get_item='text')
        item_name=extract_info_partial(path='div.pdp-link a', get_item='text')[0]

        logging.info(f'Pulling Basic information of item: {item_name}')

        basic_details = dict(
            brand=brand[0] if brand else 'MICHAEL Michael Kors',
            item_name=item_name,
            product_link=product_link,
            default_price=max(extract_info_partial(path='span.default-price .value', get_item='content')),
            current_price=min(extract_info_partial(path='span.default-price .value', get_item='content')),
            colors=extract_info_partial(path='div.swatches img', get_item='title'),
            product_images=extract_info_partial(path='div.image-container img', get_item='src'),
            timestamp=datetime.now(),
        )

        if get_all_details:
            logging.info(f'Pulling dtailed information of item: {item_name}')
            extra_details = self.get_item_details(product_link)
            basic_details.update(extra_details)
        
        return basic_details

    def get_all_bags(self, get_all_details: bool = False) -> list:

        total_bags = self.browse_web().select_one('span.results-count-value').text

        logging.info(f'Start pulling total {total_bags} bags data')
 
        lst_bags = self.browse_web(item_count=total_bags).findAll('div', class_='product-tile')

        bags_data = []

        for idx, bags in enumerate(lst_bags):
            logging.info(f'Working on {idx+1}/{total_bags} bags')
            bags_data.append(self.item_data_constructor(bags, get_all_details))

        return bags_data
    
    def get_item_details(self, product_link: str) -> dict:
        raw_product_detail = self.browse_web(product_link)

        extra_data = dict(
            desceiption=raw_product_detail.select('div.col-12.value.content')[0].text.strip(),
            availability=raw_product_detail.select('div.availability')[0].get('data-available'),
            product_details=raw_product_detail.select('div.col-sm-12.col-md-8.col-lg-12.value.content')[0].text.strip().split('\n')
        )
        dim_expression = r'(\d+(?:\.\d+)?)\S\s?([WHD])'
        dimension = [
            re.findall(dim_expression, details) 
            for details in extra_data['product_details'] 
            if re.findall(dim_expression, details)
        ]

        logging.info(dimension) ##
      
        extra_data['dimension'] = {item[1]: item[0] for item in dimension[0]} if dimension else {}

        return extra_data

        

In [90]:
mk_obj = MKBag()
bags_data = mk_obj.get_all_bags(get_all_details=True)
mk_obj.cleanup()

INFO:root:Start pulling total 401 bags data
INFO:root:Working on 1/401 bags
INFO:root:Pulling Basic information of item: Ruthie Large Pebbled Leather Satchel
INFO:root:Pulling dtailed information of item: Ruthie Large Pebbled Leather Satchel
INFO:root:[[('13.75', 'W'), ('9.75', 'H'), ('4.25', 'D')]]
INFO:root:Working on 2/401 bags
INFO:root:Pulling Basic information of item: Ruthie Large Signature Logo Satchel
INFO:root:Pulling dtailed information of item: Ruthie Large Signature Logo Satchel
INFO:root:[[('13.75', 'W'), ('9.75', 'H'), ('4.25', 'D')]]
INFO:root:Working on 3/401 bags
INFO:root:Pulling Basic information of item: Jet Set Large Leather Crossbody Bag
INFO:root:Pulling dtailed information of item: Jet Set Large Leather Crossbody Bag
INFO:root:[[('8.25', 'W'), ('5', 'H'), ('1.75', 'D')]]
INFO:root:Working on 4/401 bags
INFO:root:Pulling Basic information of item: Jet Set Large Smartphone Convertible Crossbody Bag
INFO:root:Pulling dtailed information of item: Jet Set Large Smar

In [91]:
pd.DataFrame(bags_data)

Unnamed: 0,brand,item_name,product_link,default_price,current_price,colors,product_images,timestamp,desceiption,availability,product_details,dimension
0,MICHAEL Michael Kors,Ruthie Large Pebbled Leather Satchel,https://www.michaelkors.global//hk/en/ruthie-l...,3400.00,3400.00,[],[https://michaelkors.scene7.com/is/image/Micha...,2024-08-11 01:22:07.033340,"Designed to hold a day’s worth of essentials, ...",true,"[• Satchel, • Pebbled leather, • 100% leather,...","{'W': '13.75', 'H': '9.75', 'D': '4.25'}"
1,MICHAEL Michael Kors,Ruthie Large Signature Logo Satchel,https://www.michaelkors.global//hk/en/ruthie-l...,3400.00,3400.00,"[BRN/ACORN, VANILLA/ACORN]",[https://michaelkors.scene7.com/is/image/Micha...,2024-08-11 01:22:08.050930,"Designed to hold a day’s worth of essentials, ...",true,"[• Satchel, • Logo-print canvas, • 90% coated ...","{'W': '13.75', 'H': '9.75', 'D': '4.25'}"
2,MICHAEL Michael Kors,Jet Set Large Leather Crossbody Bag,https://www.michaelkors.global//hk/en/jet-set-...,4200.00,4200.00,"[BLACK, LUGGAGE, LT CREAM, POWDER BLUSH]",[https://michaelkors.scene7.com/is/image/Micha...,2024-08-11 01:22:08.991778,Sporty chic meets modern minimalism on this Je...,true,"[• Crossbody bag, • Leather, • 100% leather, •...","{'W': '8.25', 'H': '5', 'D': '1.75'}"
3,MICHAEL Michael Kors,Jet Set Large Smartphone Convertible Crossbody...,https://www.michaelkors.global//hk/en/jet-set-...,4100.00,4100.00,"[VANILLA, BROWN]",[https://michaelkors.scene7.com/is/image/Micha...,2024-08-11 01:22:09.756097,Our Jet Set crossbody wallet is a small wonder...,true,"[• Smartphone crossbody bag, • Logo-print canv...","{'W': '8.25', 'H': '5', 'D': '1.75'}"
4,MICHAEL Michael Kors,Chantal Medium Pebbled Leather Satchel,https://www.michaelkors.global//hk/en/chantal-...,4600.00,4600.00,[],[https://michaelkors.scene7.com/is/image/Micha...,2024-08-11 01:22:11.661178,A spacious interior and timeless silhouette ma...,true,"[• Satchel, • Pebbled leather, • 100% leather ...","{'W': '12.75', 'H': '9.75', 'D': '6.25'}"
...,...,...,...,...,...,...,...,...,...,...,...,...
396,MICHAEL Michael Kors,Jet Set Medium Pebbled Leather Crossbody Bag,https://www.michaelkors.global//hk/en/jet-set-...,2650.00,2650.00,"[BLACK, NAVY]","[data:image/png;base64,iVBORw0KGgoAAAANSUhEUgA...",2024-08-11 01:39:00.578994,The Jet Set crossbody bag is the perfect balan...,false,[• 100% leather from tanneries meeting the hig...,"{'W': '8.25', 'H': '5.5', 'D': '2.25'}"
397,MICHAEL Michael Kors,Heather Extra-Small Leather Crossbody Bag,https://www.michaelkors.global//hk/en/heather-...,3400.00,3400.00,"[BLACK, LUGGAGE, SOFT PINK, OPTIC WHITE]","[data:image/png;base64,iVBORw0KGgoAAAANSUhEUgA...",2024-08-11 01:39:03.891314,"A smart and polished piece for every day, the ...",false,"[• Crossbody bag, • Leather, • 100% leather, •...","{'W': '7.75', 'H': '4.75', 'D': '2.25'}"
398,MICHAEL Michael Kors,Ruby Large Saffiano Leather Tote Bag,https://www.michaelkors.global//hk/en/ruby-lar...,4600.00,1840.00,[],"[data:image/png;base64,iVBORw0KGgoAAAANSUhEUgA...",2024-08-11 01:39:06.738921,Meet Ruby: your new on-the-go carryall for day...,false,"[• Tote bag, • Saffiano leather, • 100% leathe...","{'W': '15', 'H': '10.75', 'D': '5.75'}"
399,MICHAEL Michael Kors,Chantal Extra-Small Logo Messenger Bag,https://www.michaelkors.global//hk/en/chantal-...,3200.00,3200.00,[],"[data:image/png;base64,iVBORw0KGgoAAAANSUhEUgA...",2024-08-11 01:39:07.992742,With its structured silhouette and glamorous h...,true,"[• Messenger bag, • Logo-print canvas, • 90% c...","{'W': '8.5', 'H': '6', 'D': '3.25'}"
