In [1]:
import requests
import shopify
from bs4 import BeautifulSoup
import logging
import re
from dotenv import load_dotenv
import os


logging.basicConfig(level=logging.ERROR)

shop_url = "https://429eef-90.myshopify.com/"
api_version = '2024-01'
# state = binascii.b2a_hex(os.urandom(15)).decode("utf-8")
# redirect_uri = "http://myapp.com/auth/shopify/callback"
# scopes = ['read_apps', 'write_files', 'read_files', 'write_products', 'read_products', 'read_content', 'write_content', 'write_product_feeds', 'read_product_feeds', 'write_product_listings', 'read_product_listings']

load_dotenv()
access_token = (os.getenv('access_token'))

session = shopify.Session(shop_url, api_version, access_token)
shopify.ShopifyResource.activate_session(session)

In [2]:
def get_search_results(url, max_retries=3):
    """
    Fetches and returns the HTML content of a search results page, handling errors and retrying as needed.

    Args:
        url (str): The URL of the search results page.
        max_retries (int): The maximum number of retries before giving up (default: 3).

    Returns:
        str or None: The HTML content of the page on success, or None on failure.
    """

    logger = logging.getLogger(__name__)
    user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36"

    # Create a request with the custom user agent header
    headers = {"User-Agent": user_agent}

    for retry_num in range(max_retries + 1):
        try:
            with requests.get(url, headers=headers) as response:
                response.raise_for_status()  # Raise an exception if the status code is not 200
                logger.info("Successfully fetched search results page: %s", url)
                return response.text
        except requests.HTTPError as e:
            logger.error(
                "HTTP error fetching search results page: %s (status code: %d)", url, e.response.status_code
            )
             

            if retry_num < max_retries:
                logger.info("Retrying request for search results page: %s", url)
                continue  # Retry the request
            else:
                return None
                raise  # Re-raise the exception after all retries are exhausted
        except requests.ConnectionError as e:
            logger.error("Connection error fetching search results page: %s", url)
            if retry_num < max_retries:
                logger.info("Retrying request for search results page: %s", url)
                continue
            else:
                raise  # Re-raise the exception after all retries are exhausted
    return None  # If all retries fail, return None



In [3]:
def sear_Nprod(title):
    """
    Извлекает ссылки на продукты из HTML-кода страницы с результатами поиска.

    Gel Polish Deluxe Series D114 (dark, gray-lilac, enamel), 8 ml

    """
    # title = "Gel Polish Deluxe Series D119 (dark, gray-lilac, enamel), 8 ml"

    print('Search')

    name4sear = re.sub(r"\[.*?\]|\(.*?\), ", "", title).lower().replace(" 8ml","").replace("gel polish ","").replace("cat’s","cat").replace(" ","+")
    print(name4sear)
    url = f"https://newbeautylab.shop/?product_cat=&s={name4sear}"

    print(url)
    html = get_search_results(url)
    if html:
        soup = BeautifulSoup(html, 'html.parser')
        entry_title = soup.find('h2', class_='entry-title')

        # Check if element is found
        if entry_title:
            # Find the a element within the h2
            link = entry_title.find('a')
            
            # Check if link is found
            if link:
                # Extract and return the href attribute
                href = link.get('href')

                html = get_search_results(href)
                if html:
                    soup = BeautifulSoup(html, 'html.parser')
                    links = soup.find_all('span',class_='sku')
                    if links:
                        barcode = links[0].text
                        print(barcode)
                        return barcode
    print ('No barcode')
    return None


In [28]:
class Product:
    def __init__(self, name, spu, imgs, url, detail=None):
        self.name = name
        self.detail = str(detail) if detail else ""
        self.spu = spu
        self.imgs = imgs
        self.url = url
        if '60ml' in name:
            self.price = '20.00'
            self.compare_at_price = '22.00'
            self.grams = 70
            self.weight = 0.070
            self.barcode = None
        elif '15' in name:
            self.price = '15.00'
            self.compare_at_price = '17.00'
            self.grams = 20
            self.weight = 0.020
            self.barcode = None
        self.vendor = 'Passione'
        self.product_type = 'AcrilGel'

    def __str__(self):
        return f"Product: {self.name}, SPU: {self.spu}"

In [5]:
def extract_product_links(url):
    """
    Извлекает ссылки на продукты из HTML-кода страницы с результатами поиска.
    """
    html = get_search_results(url)
    if html:
        soup = BeautifulSoup(html, 'html.parser')
        links = soup.find_all('a',class_='product-item-link')
        hrefs = [link['href'] for link in links if 'href' in link.attrs]
        return hrefs
    return None


In [6]:
extract_product_links('https://www.passionebeauty.com/es_es/catalogsearch/result/?search=on&q=AcrilGel+tube')

['https://www.passionebeauty.com/es_es/acrilgel-tube-cover-lilac-60-ml.html',
 'https://www.passionebeauty.com/es_es/acrilgel-tube-cover-candy-60-ml.html',
 'https://www.passionebeauty.com/es_es/acrilgel-tube-cover-makeup-60-ml.html',
 'https://www.passionebeauty.com/es_es/acrilgel-tube-cover-blush-60-ml.html',
 'https://www.passionebeauty.com/es_es/acrilgel-tube-clear-60-ml.html',
 'https://www.passionebeauty.com/es_es/acrilgel-tube-white-60-ml.html',
 'https://www.passionebeauty.com/es_es/acrilgel-tube-cover-natural-60-ml.html',
 'https://www.passionebeauty.com/es_es/acrilgel-tube-cover-pink-60-ml.html',
 'https://www.passionebeauty.com/es_es/acrilgel-tube-milky-pink-60-ml.html',
 'https://www.passionebeauty.com/es_es/acrilgel-tube-soft-white-60-ml.html',
 'https://www.passionebeauty.com/es_es/acrilgel-tube-cover-phard-60-ml.html',
 'https://www.passionebeauty.com/es_es/acrilgel-tube-crystal-natural-60-ml.html',
 'https://www.passionebeauty.com/es_es/acrilgel-tube-diamond-pink-60-ml.

In [7]:
def UkRefSearch(sku):
    """
    Извлекает ссылки на продукты из HTML-кода страницы с результатами поиска.
    """
    url = 'https://reformacosmetics.com/search/?q='+str(sku)+'&s=%D0%9F%D0%BE%D1%88%D1%83%D0%BA'

    html = get_search_results(url)
    if html:
        soup = BeautifulSoup(html, 'html.parser')
        links = soup.find_all('a', class_='nm')
        return [link['href'] for link in links]
    return None


In [8]:
def letsgo(url):
    print()

    # url = 'https://reforma.top/catalogsearch/result/?q=' + SPU + '&product_list_limit=144'
    # url = 'https://reforma.top/shop/nails/gel-polish/base-tops/?product_list_limit=144&p='+str(page)
    print(url)
    product_links = extract_product_links(url)
    if product_links:
        print("List created")
        return product_links
        

In [49]:
def extract_data_from_url(url):
    """
    Extracts product data from the provided URL.
    """
    try:
        # Extract data from the URL
        html = get_search_results(url)
        if not html:
            raise ValueError("Failed to fetch HTML content from the URL.")

        soup = BeautifulSoup(html, 'html.parser')

        # Convert the remaining div element to a string
        detail = soup.find('div',class_='product attribute description')
        spu = None
        title_tag = soup.find('span', class_='base')
        title = title_tag.text.strip()  # strip() removes leading and trailing whitespace
        imgs = [img.get('src') for img in soup.find_all('img', class_="gallery-placeholder__image")]
        # imgs =soup
        
        return Product(title, spu, imgs, url, detail=detail)
    except Exception as e:
        logging.error(f"Failed to extract data from URL: {str(e)}")
        return None

In [51]:
extract_data_from_url('https://www.passionebeauty.com/es_es/acrilgel-tube-cover-lilac-60-ml.html').imgs

['https://d249tcpqnhqbel.cloudfront.net/media/catalog/product/cache/f6109f10f2b14d58b39e739c49b2275a/a/g/ag180cll_acrilgel-60-ml-cover-lilac.jpg']

In [11]:
def update_product(shopify_product, barcode):
    """
    Updates an existing Shopify product with new title and handle if needed.
    """

    # print(shopify_product.title)

    variant = shopify_product.variants[0]

    variant.barcode = barcode
    variant.save()
    shopify_product.save()

In [60]:
def create_new_product(product, new_handle):
    """
    Creates a new Shopify product with details and default variant.
    """
    try:
        shopify_product = shopify.Product.create({
            'title': product.name,
            'handle': new_handle,
            'body_html': product.detail,
            'vendor': product.vendor,
            'product_type': product.product_type,
            'Status': 'draft'
        })

        default_variant = {
            'title': 'Default Title',
            'price': product.price,
            'compare_at_price': product.compare_at_price,
            'inventory_policy': 'deny',
            'fulfillment_service': 'manual',
            'inventory_management': 'shopify',
            'option1': 'Default Title',
            'taxable': True,
            'grams': product.grams,
            'weight': product.weight,
            'weight_unit': 'kg',
            'requires_shipping': True
        }

        variant = shopify.Variant(default_variant)
        shopify_product.variants = [variant]

        shopify_product.images = [{'src': img_url, 'position': index + 1, 'alt': product.name} for index, img_url in enumerate(product.imgs)]


        if shopify_product.save():
            logging.info(f"Product created successfully! (ID: {shopify_product.id})")
            return shopify_product
        else:
            raise Exception(f"Failed to save new product: {shopify_product.errors.full_messages()}")

    except Exception as e:
        logging.error(f"Failed to create product: {str(e)}")
        return None

In [57]:
def create_product(product):
    """
    Creates a new product on Shopify with the provided data, 
    handling creation, updates, and errors.
    """
    try:
        Handle = product.url.split('/')[-1].replace('.html','')
        print(Handle)
        shopify_product = shopify.Product.find(handle=Handle)

        if not shopify_product:
            shopify_product = create_new_product(product, Handle)
            logging.info(f"Product created successfully! (ID: {shopify_product.id})")
            print("Product created successfully!")
        else:
            # Update existing product if titles differ
            shopify_product = shopify_product[0]
            print(shopify_product.title)
            print('Already exist!')
            if product.barcode:
                update_product(shopify_product, product.barcode)
                logging.info(f"Product updated successfully! (ID: {shopify_product.id})")
                # print(shopify_product.title)
                print("Product barcode added successfully!")
            else:
                logging.info(f"Product already exists with matching title: {shopify_product.title} (ID: {shopify_product.id})")
                # print(shopify_product.title)
                print("Barcode not exists!")

        return shopify_product

    except Exception as e:
        logging.error(f"Failed to create product: {str(e)}")
        return None

In [14]:
def create_or_get_collection(col_handle, col_name):
    # Find the collection by handle
    collections = shopify.CustomCollection.find(handle=col_handle)

    # If the collection doesn't exist, create a new one
    if not collections:
        collection = shopify.CustomCollection.create({
            'title':col_name,
            'handle':col_handle}
        )
        print(f"Collection {col_name} created")
        print()
    else:
        # Get the first collection from the PaginatedCollection
        collection = collections[0]
        print(f"Collection {col_name} exists")
    return collection


def add_product_to_collection(collection, product):
    new_collect = shopify.Collect.create({
        "collection_id": collection.id,
        "product_id": product.id
    })
    print(f"Product added to collection")


In [62]:
def allProdCol(colURL):
    # Get the collection name
    col_handle = colURL.split('=')[-2].replace('+', ' ')
    # col_name = col_handle.replace('-', ' ').upper()
    

    # Create or get the collection
    # create_or_get_collection(col_handle, col_name)

    # Loop through pages
    for i in range(1, 35):    

        # Extract product links from the page
        prod_links = extract_product_links(colURL)

        if prod_links:
            print(f'{col_handle}: Page {i}: {len(prod_links)} products')
            print()
            
            # Add each product to the collection
            for index, prod_link in enumerate(prod_links, start=1):
                data = extract_data_from_url(prod_link)
                create_product(data)
                print(f"{index}/{len(prod_links)}\n")
        else:
            print(f'{col_handle}: Page {i}: No products')
            break

listCol = [
        
           
'https://www.passionebeauty.com/es_es/catalogsearch/result/?search=on&q=AcrilGel+tube'
           
           



           
        
           ]

for colURL in listCol:

    allProdCol(colURL)

on&q: Page 1: 20 products

acrilgel-tube-cover-lilac-60-ml
Product created successfully!
1/20

acrilgel-tube-cover-candy-60-ml
Product created successfully!
2/20

acrilgel-tube-cover-makeup-60-ml
Product created successfully!
3/20

acrilgel-tube-cover-blush-60-ml
Product created successfully!
4/20

acrilgel-tube-clear-60-ml
Product created successfully!
5/20

acrilgel-tube-white-60-ml
Product created successfully!
6/20

acrilgel-tube-cover-natural-60-ml
Product created successfully!
7/20

acrilgel-tube-cover-pink-60-ml
Product created successfully!
8/20

acrilgel-tube-milky-pink-60-ml
Product created successfully!
9/20

acrilgel-tube-soft-white-60-ml
Product created successfully!
10/20

acrilgel-tube-cover-phard-60-ml
Product created successfully!
11/20

acrilgel-tube-crystal-natural-60-ml
Product created successfully!
12/20

acrilgel-tube-diamond-pink-60-ml
Product created successfully!
13/20

acrilgel-tube-cover-babyboomer-60-ml
Product created successfully!
14/20

acrilgel-tube-spar

KeyboardInterrupt: 

In [None]:
prod_links = [
    'https://komilfo.ua/en/product/komilfo-glitter-top-top-for-gel-polish-with-glitter-without-sticky-layer-15-ml/'
    

    
]
for prod_link in prod_links:
    data = extract_data_from_url(prod_link)

    create_product(data)

KeyboardInterrupt: 

In [None]:
extract_product_links('https://komilfo.ua/en/product-category/komilfo-gel-polish/')

In [None]:
extract_product_links('https://komilfo.ua/ru/product-category/komilfo-gel-polish-ru/french-collection/')

In [None]:
Collection.title = "My Collection"
Collection.save()

In [None]:
# col_name = 'fuch yoo ooo'
# col_handle = 'fuch yoo ooo'
# collection = shopify.SmartCollection.create({
#     'title': col_name,
#     'handle': col_handle,
#     'rules': [
#         {
#             "column": "title",
#             "relation": "contains",
#             "condition": col_name.split(' ')[0]
#         },
#         {
#             "column": "vendor",
#             "relation": "equals",
#             "condition": 'Komilfo'
#         }
#     ]
# })
