In [97]:
import requests
from bs4 import BeautifulSoup as bs
from pprint import pprint
import pandas as pd
import datetime
import re

mainRef = "https://madeindream.com"




In [141]:
def getPrices(ref):
    responce = requests.get(ref).text
    html = bs(responce, "lxml")

    tags = html.find_all("div", class_="product-thumb clearfix grid_3")
    prices = {}
    for tag in tags:
        nameTag = tag.find("div", class_="name")
        name = nameTag.find("a").getText().strip()
        name = name.replace(u'\u2010',"-")
        price = tag.find("div", class_="price").getText().strip("₽ \n\r")
        res = re.search(r"(\d+)₽\n\d+",price)
        if(res):
            price = res.group(1)
            
        if (int(price) != 0):
            prices[name] = price
    return(prices)

In [142]:
def getPageRefs(ref):
    responce = requests.get(ref).text
    html = bs(responce, "lxml")
    
    pageNumTags = html.find("div", class_="results")
    
    if pageNumTags == None:
        return([ref])
    
    res = re.search(r"Показано с \d+ по \d+ из \d+ \(всего (\d+) страниц\)", pageNumTags.getText())
    
    
    lastPageNum = int(res.group(1))

    refs = [ref + "?page=" + str(x) for x in range(2,lastPageNum + 1)]
    refs = [ref] + refs
    return(refs)

In [143]:
def getAllPrices(refs):
    allPrices = {}

    for pageRef in refs:
        prices = getPrices(pageRef)
        allPrices.update(prices)
    return(allPrices)

In [144]:
def printToFile(allPrices, fname):

    data = pd.DataFrame(list(allPrices.items()))
    data.columns = ["name","price"]
    
    data.sort_values("name").to_csv(fname, index = False, sep = ";", encoding='cp1251')


In [145]:
def getAll(ref):
        
    refs = getPageRefs(ref)
    
    allPrices = getAllPrices(refs)
    
    return(allPrices)

In [146]:
def getCtalogRefs(ref):
    refs = set()
    
    responce = requests.get(ref).text
    html = bs(responce, "lxml")
    
    megaCat = html.find("ul", class_="mega-category")
    
    colBlockTags = megaCat.findAll("li")
    for tag in colBlockTags:
        atag = tag.find("a")
        
        if atag['href'].find("?filter_tag=")>0:
            continue
        result = re.search(r'(.*/)[^/]+/$', atag['href'])
        cat = result.group(1)
        
        if cat in refs:
            continue

        refs.add(atag['href'])
    
    return(refs)

In [150]:
if __name__ == '__main__':    
    now = datetime.datetime.now()
    fname = "MadeInDream_" + now.strftime("%d-%m-%Y") + ".csv"
    
    catRefs = getCtalogRefs(mainRef)

    
    allPrices = {}

    for ref in catRefs:

        print(ref)
        allPrices.update(getAll(ref))
    
    printToFile(allPrices, fname)
    
    

https://madeindream.com/ekotestery/
https://madeindream.com/ucenennye-tovary/
https://madeindream.com/degidratory/
https://madeindream.com/suvidy/
https://madeindream.com/kuhonnye-kombainy/
https://madeindream.com/zapchasti/
https://madeindream.com/drugoe-kupit/
https://madeindream.com/sprautery/
https://madeindream.com/tovary-gigieny/
https://madeindream.com/vakuumnye-upakovshhiki/
https://madeindream.com/maslopressy/
https://madeindream.com/banki-mason-jar/
https://madeindream.com/jogurtnicy/
https://madeindream.com/aksessuary/
https://madeindream.com/melanzhery/
https://madeindream.com/planetarnye-miksery/
https://madeindream.com/melnicy/
https://madeindream.com/pylesosy/
https://madeindream.com/terki/
https://madeindream.com/blendery/
https://madeindream.com/sokovyzhimalki/
https://madeindream.com/zdorovye/
https://madeindream.com/ionizatory/
https://madeindream.com/ochistiteli-vody/
