In [1]:
from requests import Session
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import sys, os, re
from bs4 import BeautifulSoup as bs
class RetrySession(Session):
    __attrs__ = [
        'headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify',
        'cert', 'prefetch', 'adapters', 'stream', 'trust_env',
        'max_redirects', 'retries', 'backoff', 'timeout', 'set_encoding',
        'mount', 'encoding'
    ]

    def __init__(self, retries=3, backoff=0.3, timeout=5, **kwargs):
        """ Session with retrying adapter

        param:

            retries: maximum number of retries (default: 3)

            backoff: sleep for {backoff} * 2 ** {n_retries}, (default: 0.3)

            timeout: request timeout

            encoding: if given, set response encoding after receiving requests
        """
        self.retries = retries
        self.backoff = backoff
        self.timeout = timeout
        self.set_encoding = ('encoding' in kwargs)
        self.encoding = None
        if self.set_encoding:
            self.encoding = kwargs['encoding']

        super().__init__()
        retry = Retry(retries, retries, retries, backoff_factor=backoff)
        adapter = HTTPAdapter(max_retries=retry)
        self.mount('https://', adapter)
        self.mount('http://', adapter)

        self.headers['User-Agent'] = 'SNU IDS Lab (http://ids.snu.ac.kr/)'

    def request(self, method, url, **kwargs):
        if 'timeout' not in kwargs:
            kwargs['timeout'] = self.timeout

        r = super().request(method, url, **kwargs)

        if self.set_encoding:
            r.encoding = self.encoding

        return r.text

s = RetrySession()

In [4]:
resp = s.get('http://madhead42.cafe24.com/product/%EC%83%98%ED%94%8C%EC%83%81%ED%92%88-2/10/category/24/display/1/')
soup = bs(resp, 'html.parser')
metas = soup.find_all("meta")
len(metas)

13

In [17]:
hints = {
    'title': {'og:title'},
    'site': {'og:site_name'},
    'type': {'og:type'},
    'price': {'product:price:amount'},
    'sale_price': {'product:sale_price:amount'},
    'currency': {'product:price:currency'},
}

data = {}
for meta in metas:
    attrs = meta.attrs
    if 'property' in attrs and 'content' in attrs:
        for k, hints in hints.items():
            if attrs['property'] in hints:
                data[k] = attrs['content']
data

NameError: name 'x' is not defined