In [30]:
import requests
from bs4 import BeautifulSoup as bs
from pprint import pprint
import pandas as pd
import datetime
import re

mainRef = "https://health-tehnika.ru"




In [83]:
def getPrices(ref):
    responce = requests.get(ref).text
    html = bs(responce, "lxml")

    tags = html.find_all("div", class_="product_preview-form_container row")
    prices = {}
    for tag in tags:
        subtag = tag.find("div", class_="product_preview-title")
        name = subtag.find("a").getText().strip()
        
        name = name.replace(u'\u2010',"-")
        price = tag.find("div", class_="prices-current").getText().strip("отр \n\r\t")
        
        price = price.replace("\xa0","")
        
        if price == 'Предзаказ':
            continue

            
        if (int(price) != 0):
            prices[name] = price
    return(prices)

In [84]:
def getPageRefs(ref):
    responce = requests.get(ref).text
    html = bs(responce, "lxml")
    
    pageNumTags = html.find("div", class_="results")
    
    if pageNumTags == None:
        return([ref])
    
    res = re.search(r"Показано с \d+ по \d+ из \d+ \(всего (\d+) страниц\)", pageNumTags.getText())
    
    
    lastPageNum = int(res.group(1))

    refs = [ref + "?page=" + str(x) for x in range(2,lastPageNum + 1)]
    refs = [ref] + refs
    return(refs)

In [85]:
def getAllPrices(refs):
    allPrices = {}

    for pageRef in refs:
        prices = getPrices(pageRef)
        allPrices.update(prices)
    return(allPrices)

In [86]:
def printToFile(allPrices, fname):

    data = pd.DataFrame(list(allPrices.items()))
    data.columns = ["name","price"]
    
    data.sort_values("name").to_csv(fname, index = False, sep = ";", encoding='cp1251')


In [87]:
def getAll(ref):
        
    refs = getPageRefs(ref)
    
    allPrices = getAllPrices(refs)
    
    return(allPrices)

In [88]:
def getCtalogRefs(ref):
    refs = set()
    
    responce = requests.get(ref).text
    html = bs(responce, "lxml")
    
    megaCat = html.find("ul", class_="menu menu--main menu--main_lvl_1 menu--horizontal")
    
    colBlockTags = megaCat.findAll("li", class_="menu-node menu-node--main_lvl_1 js-menu-wrapper")
    for tag in colBlockTags:
        atag = tag.find("a")
        name = atag.getText().strip()
        
        if (name == "Акции") | (name == "Подарочные сертификаты") | (name == "Бренды"):
            continue

        refs.add(ref + atag['href'])
    
    return(refs)

In [89]:
if __name__ == '__main__':    
    now = datetime.datetime.now()
    fname = "HealthTehnika_" + now.strftime("%d-%m-%Y") + ".csv"
    
    catRefs = getCtalogRefs(mainRef)

    
    allPrices = {}

    for ref in catRefs:

        print(ref)
        allPrices.update(getAll(ref))
    
    printToFile(allPrices, fname)
    
    

https://health-tehnika.ru/collection/blendery
https://health-tehnika.ru/collection/komplektuyuschie-i-aksessuary
https://health-tehnika.ru/collection/vakuumnye-upakovschiki
https://health-tehnika.ru/collection/kuhonnaya-tehnika
https://health-tehnika.ru/collection/pribory-dlya-zdorovya
https://health-tehnika.ru/collection/posuda-green-pan
https://health-tehnika.ru/collection/proraschivateli
https://health-tehnika.ru/collection/sokovyzhimalki
https://health-tehnika.ru/collection/degidratory-sushilki
https://health-tehnika.ru/collection/produkty-dlya-zdorovya
