In [1]:
!pip install requests



In [2]:
import requests
import time
import json
from collections import Counter
from types import SimpleNamespace
from urllib.parse import urljoin

In [3]:
class Category:
    id_ = 0
    
    def __init__(self, parent, value, tag, title):
        Category.id_ += 1
        self.id_ = Category.id_
        self.parent = parent
        self.value = value
        self.tag = tag
        self.title = title
        
    def to_dict(self):
        return {'id': self.id_, 
                'parent_id': self.parent.id_ if self.parent else None, 
                'title': self.title}
    

class Offer:
    def __init__(self, title, images, category):
        self.title = title
        self.images = images
        self.category = category
        
    def to_dict(self):
        return {'title': self.title,
                'images': self.images,
                'category_id': self.category.id_}

In [4]:
headers = {'User-agent': 'targetapp_android_21'}

In [5]:
included_categories = ['ladies', 'men', 'shop-by-product']
skipped_categories = ['shop-by-product']
excluded_categories = ['view-all', 'hm-plus', 'beauty', 'maternity-wear', 
                       'accessories', 'lingerie', 'premium-selection', 'underwear']

def create_category(parent, category):
    return Category(parent, category.CategoryValue, next(iter(category.tagCodes), None), category.CatName) if category else None
    

def get_processed_categories(categories, raw_parent=None, processed_parent=None, level=0):
    subcategories = None
    if not raw_parent:
        Category.id_ = 0
        subcategories = categories
    elif hasattr(raw_parent, 'CategoriesArray'):
        subcategories = raw_parent.CategoriesArray
    else:
        subcategories = []
        
    processed_subcategories = []
    for subcategory in subcategories:
        if (level <= 1 and subcategory.CategoryValue in included_categories 
            or level > 1 and subcategory.CategoryValue not in excluded_categories):
            if subcategory.CategoryValue not in skipped_categories:
                processed_subcategory = create_category(processed_parent, subcategory)
                processed_subcategories.append(processed_subcategory)
                processed_subcategories.extend(get_processed_categories(None, subcategory, processed_subcategory, level + 1))
            else:
                processed_subcategories.extend(get_processed_categories(None, subcategory, processed_parent, level + 1))
                
    return processed_subcategories

def get_categories():
    response = requests.get('https://app2.hm.com/content/hmonline/en_us/search-results.hm.v1.json', headers=headers)
    return get_processed_categories(json.loads(response.content, object_hook=lambda d: SimpleNamespace(**d))[0].Categories[0])

In [40]:
def create_offer(offer, category):
    return Offer(offer.name, [x.baseUrl for x in offer.galleryImages[:3]], category)

def get_offers(categories):
    offers = []
    intermediate_categories = set(map(lambda x: x.parent.id_ if x.parent else None, categories))
    processed = set()
    
    for category in [x for x in categories if x.id_ not in intermediate_categories]:
        print(f'Processing category {category.tag}')
        query = f':stock:category:{category.tag}'
        params = {'currentPage': 0, 'pageSize': 10, 'q': query}
        response = requests.get('https://app2.hm.com/hmwebservices/service/products/plp/hm-us/Online/en', 
                                headers=headers,
                                params=params)
        for offer in json.loads(response.content, object_hook=lambda d: SimpleNamespace(**d)).results:
            if offer.pk not in processed:
                offers.append(create_offer(offer, category))
                processed.add(offer.pk)
            
        time.sleep(1)
    return offers

In [25]:
categories = get_categories()
with open("categories.json", "w", encoding="utf-8") as f:
    json.dump([x.to_dict() for x in categories], f)

In [41]:
offers = get_offers(categories)
with open("offers.json", "w", encoding="utf-8") as f:
    json.dump([x.to_dict() for x in  offers], f)

Processing category ladies_dresses_cutout
Processing category ladies_dresses_sleeveless
Processing category ladies_dresses_puffsleeve
Processing category ladies_dresses_halterneck
Processing category ladies_dresses_weddingguest
Processing category ladies_dresses_camidresses
Processing category ladies_dresses_knitted
Processing category ladies_dresses_denim
Processing category ladies_dresses_shortdresses
Processing category ladies_dresses_mididresses
Processing category ladies_dresses_maxidresses
Processing category ladies_dresses_bodycon
Processing category ladies_dresses_party
Processing category ladies_dresses_cocktail
Processing category ladies_dresses_shirt
Processing category ladies_dresses_sequin
Processing category Ladies_dresses_wrap
Processing category ladies_dresses_sweater
Processing category ladies_tops_collared
Processing category ladies_tops_turtleneck
Processing category ladies_tops_halterneck
Processing category ladies_tops_cutout
Processing category ladies_tops_tube
Pr