In [113]:
import http.client
import ssl
import gzip
from io import BytesIO
import json
import pandas as pd
import openpyxl

In [114]:
categories = ["ALIMENTOS%20Y%20DESPENSA","CONGELADOS","BEBIDAS","ASEO%20HOGAR","ASEO%20Y%20CUIDADO%20PERSONAL", "BEB%C3%89","L%C3%81CTEOS", "MASCOTAS"]


#opción 2 Bogota: 12522
#opción 3 Bogota: 11808
stores = {"Bogotá":"12165","Medellin":"","Barranquilla":"","Bucaramanga":""}

In [115]:
def fetch_products(category_reference, page):
    conn = http.client.HTTPSConnection("nextgentheadless.instaleap.io", context=ssl._create_unverified_context() )
    
    payload = json.dumps([{
        "operationName": "GetProductsByCategory",
        "variables": {
            "getProductsByCategoryInput": {
                "categoryReference": category_reference,
                "categoryId": "null",
                "clientId": "D1",
                "storeReference": "12165",
                "currentPage": page,
                "pageSize": 100,
                "filters": {},
                "googleAnalyticsSessionId": ""
            }
        },
        "query": """fragment CategoryFields on CategoryModel {
            active
            boost
            hasChildren
            categoryNamesPath
            isAvailableInHome
            level
            name
            path
            reference
            slug
            photoUrl
            imageUrl
            shortName
            isFeatured
            isAssociatedToCatalog
            __typename
        }
        fragment CatalogProductTagModel on CatalogProductTagModel {
            description
            enabled
            textColor
            filter
            tagReference
            backgroundColor
            name
            __typename
        }
        fragment CatalogProductFormatModel on CatalogProductFormatModel {
            format
            equivalence
            unitEquivalence
            clickMultiplier
            minQty
            maxQty
            __typename
        }
        fragment Taxes on ProductTaxModel {
            taxId
            taxName
            taxType
            taxValue
            taxSubTotal
            __typename
        }
        fragment PromotionCondition on PromotionCondition {
            quantity
            price
            priceBeforeTaxes
            taxTotal
            taxes {
                ...Taxes
                __typename
            }
            __typename
        }
        fragment Promotion on Promotion {
            type
            isActive
            conditions {
                ...PromotionCondition
                __typename
            }
            description
            endDateTime
            startDateTime
            __typename
        }
        fragment PromotedModel on PromotedModel {
            isPromoted
            onLoadBeacon
            onClickBeacon
            onViewBeacon
            onBasketChangeBeacon
            onWishlistBeacon
            __typename
        }
        fragment SpecificationModel on SpecificationModel {
            title
            values {
                label
                value
                __typename
            }
            __typename
        }
        fragment NutritionalDetailsInformation on NutritionalDetailsInformation {
            servingName
            servingSize
            servingUnit
            servingsPerPortion
            nutritionalTable {
                nutrientName
                quantity
                unit
                quantityPerPortion
                dailyValue
                __typename
            }
            bottomInfo
            __typename
        }
        fragment CatalogProductModel on CatalogProductModel {
            name
            price
            photosUrl
            unit
            subUnit
            subQty
            description
            sku
            ean
            maxQty
            minQty
            clickMultiplier
            nutritionalDetails
            isActive
            slug
            brand
            stock
            securityStock
            boost
            isAvailable
            location
            priceBeforeTaxes
            taxTotal
            promotion {
                ...Promotion
                __typename
            }
            taxes {
                ...Taxes
                __typename
            }
            categories {
                ...CategoryFields
                __typename
            }
            categoriesData {
                ...CategoryFields
                __typename
            }
            formats {
                ...CatalogProductFormatModel
                __typename
            }
            tags {
                ...CatalogProductTagModel
                __typename
            }
            specifications {
                ...SpecificationModel
                __typename
            }
            promoted {
                ...PromotedModel
                __typename
            }
            score
            relatedProducts
            ingredients
            stockWarning
            nutritionalDetailsInformation {
                ...NutritionalDetailsInformation
                __typename
            }
            productVariants
            isVariant
            isDominant
            __typename
        }
        fragment CategoryWithProductsModel on CategoryWithProductsModel {
            name
            reference
            level
            path
            hasChildren
            active
            boost
            isAvailableInHome
            slug
            photoUrl
            categoryNamesPath
            imageUrl
            shortName
            isFeatured
            products {
                ...CatalogProductModel
                __typename
            }
            __typename
        }
        fragment PaginationTotalModel on PaginationTotalModel {
            value
            relation
            __typename
        }
        fragment PaginationModel on PaginationModel {
            page
            pages
            total {
                ...PaginationTotalModel
                __typename
            }
            __typename
        }
        fragment AggregateBucketModel on AggregateBucketModel {
            min
            max
            key
            docCount
            __typename
        }
        fragment AggregateModel on AggregateModel {
            name
            docCount
            buckets {
                ...AggregateBucketModel
                __typename
            }
            __typename
        }
        fragment BannerModel on BannerModel {
            id
            storeId
            title
            desktopImage
            mobileImage
            targetUrl
            targetUrlInfo {
                type
                url
                __typename
            }
            targetCategory
            index
            categoryId
            __typename
        }
        query GetProductsByCategory($getProductsByCategoryInput: GetProductsByCategoryInput!) {
            getProductsByCategory(getProductsByCategoryInput: $getProductsByCategoryInput) {
                category {
                    ...CategoryWithProductsModel
                    __typename
                }
                pagination {
                    ...PaginationModel
                    __typename
                }
                aggregates {
                    ...AggregateModel
                    __typename
                }
                banners {
                    ...BannerModel
                    __typename
                }
                promoted {
                    ...PromotedModel
                    __typename
                }
                __typename
            }
        }"""
    }])

    headers = {
        'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0",
        'Accept': "*/*",
        'Accept-Language': "en-US,en;q=0.5",
        'Accept-Encoding': "gzip, deflate, br, zstd",
        'content-type': "application/json",
        'apollographql-client-name': "Ecommerce",
        'apollographql-client-version': "3.49.57",
        'token': "",
        'dpl-api-key': "",
        'sentry-trace': "1f01abbaf2644743935bd975bd0bef8d-b10fe0e8b44a15c3",
        'baggage': "sentry-environment=production,sentry-release=3.49.57,sentry-public_key=b29a7fe3cf5f486b9520c5b3de53a5a7,sentry-trace_id=1f01abbaf2644743935bd975bd0bef8d",
        'Origin': "https://domicilios.tiendasd1.com",
        'Connection': "keep-alive",
        'Referer': "https://domicilios.tiendasd1.com/",
        'Sec-Fetch-Dest': "empty",
        'Sec-Fetch-Mode': "cors",
        'Sec-Fetch-Site': "cross-site",
        'Priority': "u=4",
        'TE': "trailers"
    }

    conn.request("POST", "/api/v3", payload, headers)
    
    res = conn.getresponse()
    data = res.read()
    if res.getheader('Content-Encoding') == 'gzip':
        data = gzip.GzipFile(fileobj=BytesIO(data)).read()

    data = json.loads(data.decode('utf-8'))
    
    conn.close()

    return data


def fetch_all_pages(category_reference):
    page = 1
    links = []
    names = []
    prices_without_discount = []
    categories_df = []
    
    
    while True:
        data = fetch_products(category_reference, page)
        category = data[0]['data']['getProductsByCategory']['category']['name']
        products = data[0]['data']['getProductsByCategory']['category']['products']
        
        if not products:
            break
        
        links.extend(['https://domicilios.tiendasd1.com/p/'+product['slug'] for product in products])
        names.extend([product['name'] for product in products])
        prices_without_discount.extend([product['price'] for product in products])
        categories_df.extend([category for product in products])

        page += 1
        
    df = pd.DataFrame({
            'Link': links,
            'Category': categories_df,
            'Name': names,
            'Price Without Discount': prices_without_discount
    })

    return df


In [116]:
df = pd.DataFrame(columns=['Link','Category', 'Name', 'Price Without Discount'])
for category in categories:
    df_new = fetch_all_pages(category)
    df = pd.concat([df, df_new], ignore_index=True)

In [118]:
df.to_excel('products.xlsx', index = False)