In [1]:
import json


class PostDetais(object):
    def __init__(self, soup, link=None):
        self.page_soup = soup
        self.link = link

    def get_title(self):
        class_names = ['graf graf--h3 graf-after--figure graf--title',
                       'graf graf--h3 graf--leading graf--title',
                       'graf graf--h3 graf-after--figure graf--trailing graf--title']
        try:
            for my_tag in self.page_soup.find_all(True, {
                'class': class_names}):
                title = my_tag.text
                return title
        except Exception as e:
            error_trace = {}
            error_trace["link"] = self.link
            error_trace["method"] = "get_title"
            error_trace["message"] = str(e)
            print(json.dumps(error_trace, indent=4))
        return ""

    def get_author_name(self):
        class_names = "ds-link ds-link--styleSubtle ui-captionStrong u-inlineBlock link link--darken link--darker"
        try:
            for my_tag in self.page_soup.find_all(
                    class_=class_names):
                name = my_tag.text
                return name
        except Exception as e:
            error_trace = {}
            error_trace["link"] = self.link
            error_trace["method"] = "get_title"
            error_trace["message"] = str(e)
            print(json.dumps(error_trace, indent=4))
        return ""

    def get_date(self):
        class_names = 'time'
        try:
            for my_tag in self.page_soup.find_all(class_names):
                date_time = my_tag.text
                return date_time
        except Exception as e:
            error_trace = {}
            error_trace["link"] = self.link
            error_trace["method"] = "get_title"
            error_trace["message"] = str(e)
            print(json.dumps(error_trace, indent=4))
        return ""

    def get_read(self):
        try:
            for my_tag in self.page_soup.find_all(class_="readingTime"):
                read = my_tag.get('title')
                return read
        except Exception as e:
            error_trace = {}
            error_trace["link"] = self.link
            error_trace["method"] = "get_title"
            error_trace["message"] = str(e)
            print(json.dumps(error_trace, indent=4))
        return ""

    def get_upvote(self):
        class_names = 'u-relative u-background js-actionMultirecommendCount u-marginLeft5'
        try:
            for my_tag in self.page_soup.find_all('span', {
                'class': class_names}):
                upvotes = my_tag.text
                return upvotes
        except Exception as e:
            error_trace = {}
            error_trace["link"] = self.link
            error_trace["method"] = "get_upvote"
            error_trace["message"] = str(e)
            print(json.dumps(error_trace, indent=4))
        return ""

    def get_body(self):
        news_body = ""
        for paragraphs in self.page_soup.find_all(
                class_='graf graf--p graf-after--p'):
            news_body += paragraphs.text.rstrip().lstrip()
        return news_body

    def get_post_content(self):
        try:
            for content in self.page_soup.find_all(class_="section-content"):
                return content.text
        except Exception as e:
            error_trace = {}
            error_trace["link"] = self.link
            error_trace["method"] = "get_post_content"
            error_trace["message"] = str(e)
            print(json.dumps(error_trace, indent=4))
        return ""

    def get_response(self):
        class_names = 'button button--chromeless u-baseColor--buttonNormal u-marginRight12'
        try:
            class_names = 'button button--chromeless u-baseColor--buttonNormal u-marginRight12'
            for my_tag in self.page_soup.find_all('button', {
                'class': class_names}):
                res = my_tag.text
                return res
        except Exception as e:
            error_trace = {}
            error_trace["link"] = self.link
            error_trace["method"] = "get_response"
            error_trace["message"] = str(e)
            print(json.dumps(error_trace, indent=4))
        return ""


In [2]:
import time
from bs4 import BeautifulSoup
from selenium import webdriver
import requests


class MediumScrapper(object):
    def __init__(self, CHROME_DRIVER_PATH='/home/mrx/Downloads/chromedriver'):
        self.CHROME_DRIVER_PATH = CHROME_DRIVER_PATH
        content = self.get_intial_content()
        self.parsed_data = BeautifulSoup(content, 'lxml')

    def get_intial_content(self,
                           base_url="https://medium.com/search?q=metamask"):
        driver = webdriver.Chrome(self.CHROME_DRIVER_PATH)
        driver.get(base_url)
        scrolls = 40
        while scrolls > 0:
            driver.execute_script(
                "window.scrollTo(0, document.body.scrollHeight-1000);")
            time.sleep(15)
            scrolls -= 1
        # driver.implicitly_wait(30)
        time.sleep(30)
        content = driver.execute_script(
            "return document.documentElement.outerHTML")
        driver.quit()
        return content

    def get_post_links(self):
        links = []
        class_names = "button button--smaller button--chromeless u-baseColor--buttonNormal"
        for my_tag in self.parsed_data.find_all(class_=class_names):
            links.append(my_tag.get('href'))
        return links

    def get_post_contents(self):
        links = self.get_post_links()
        data = []
        headers = requests.utils.default_headers()
        headers.update({
            'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
        })
        for link in links:
            try:
                print("Scrapping link: {}".format(link))
                time.sleep(15)
                request_link = requests.get(link, headers=headers)
                request_content = BeautifulSoup(request_link.content,
                                                'html.parser')
                post_details = PostDetais(request_content, link)
                post_title = post_details.get_title()
                author_name = post_details.get_author_name()
                post_date = post_details.get_date()
                post_readtime = post_details.get_read()
                post_upvotes = post_details.get_upvote()
                post_contents = post_details.get_post_content()
                post_responses = post_details.get_response()
                single_post = {
                    "title": post_title,
                    "author_name": author_name,
                    "link": link,
                    "post_date": post_date,
                    "readtime": post_readtime,
                    "upvotes": post_upvotes,
                    "content": post_contents,
                    "responses": post_responses
                }
                data.append(single_post)
            except Exception as e:
                print("Error in scrapping link: {}".format(link))
                print(str(e))
        return data


In [5]:
if __name__ == '__main__':
    CHROME_DRIVER_PATH = '/home/mrx/Downloads/chromedriver'
    scrapper = MediumScrapper(CHROME_DRIVER_PATH=CHROME_DRIVER_PATH)
    output_filename = 'metamask.json'
    data = scrapper.get_post_contents()
    with open(output_filename, 'w') as fp:
        json.dump(data, fp)
    print("Check JSON file: {}".format(output_filename))
    print("Total posts: {}".format(len(data)))

Scrapping link: https://medium.com/metamask/metamasks-new-ui-has-begun-to-roll-out-74dba32cc7f7?source=search_post---------0
Scrapping link: https://hackernoon.com/never-use-passwords-again-with-ethereum-and-metamask-b61c7e409f0d?source=search_post---------1
Scrapping link: https://medium.com/ubex/how-to-create-a-wallet-on-metamask-and-buy-ubex-tokens-using-it-58ff8d07e380?source=search_post---------2
Scrapping link: https://medium.com/metamask/developing-ethereum-dapps-with-truffle-and-metamask-aa8ad7e363ba?source=search_post---------3
Scrapping link: https://medium.com/blockchain-cuties/metamask-not-as-untraceable-as-you-thought-37e09e1acb47?source=search_post---------4
Scrapping link: https://medium.com/teamspectreai/guide-to-converting-your-sxs-tokens-to-sxu-and-sxd-tokens-with-myetherwallet-metamask-compatible-eb5615c5821e?source=search_post---------5
Scrapping link: https://blog.hellobloom.io/a-starter-guide-to-using-the-bloom-mainnet-dapp-metamask-signing-up-77f403d13f9b?source=

Scrapping link: https://medium.com/bidali/improved-user-experience-and-metamask-b58ee15a89c3?source=search_post
Scrapping link: https://medium.com/blockcat/guide-purchase-cat-using-metamask-google-chrome-742b2884ac7d?source=search_post
Scrapping link: https://blog.chronologic.network/como-ver-os-tokens-day-com-a-wallet-metamask-f6ac3c6bc624?source=search_post
Scrapping link: https://medium.com/@jinbitoken/how-to-setup-a-metamask-wallet-for-jnb-tokens-70ea9a5bba66?source=search_post
Scrapping link: https://medium.com/@bankexcom/creating-a-smart-asset-using-metamask-plugin-195fde1b2f71?source=search_post
Scrapping link: https://media.consensys.net/metamask-brings-ethereum-to-your-browser-a327f87c47ce?source=search_post
Scrapping link: https://blog.chronologic.network/how-to-see-your-day-tokens-via-metamask-17b3a1b066f3?source=search_post
Scrapping link: https://medium.com/@danfinlay/hi-there-dan-from-the-metamask-team-here-the-metamask-column-has-a-few-deceptive-marks-a890b7727b08?source

Scrapping link: https://medium.com/@merunasgrincalaitis/to-execute-the-function-do-7ec3f58c66ea?source=search_post
Scrapping link: https://medium.com/@danfinlay/hi-chris-thanks-for-the-quick-follow-up-i-absolutely-believe-you-werent-trying-to-disparage-us-8a28728162d7?source=search_post
Scrapping link: https://medium.com/@roomdao/how-to-add-rdc-token-to-eth-wallet-41b5af0ccbbd?source=search_post
Scrapping link: https://medium.com/@WeiFund/braid-contribution-instructions-63a7be9ef587?source=search_post
Scrapping link: https://medium.com/@iotex/%EC%95%84%EC%9D%B4%EC%98%A4%ED%85%8D%EC%8A%A4-iotex-%EB%8C%80%ED%91%9C-%ED%94%84%EB%A1%9C%EA%B7%B8%EB%9E%A8%EC%9D%84-%EC%9C%84%ED%95%9C-%EB%AA%A8%EB%B0%94%EC%9D%BC-%ED%88%AC%ED%91%9C-dapp%EA%B0%80-%EC%B6%9C%EC%8B%9C-e340ee6cee48?source=search_post
Scrapping link: https://medium.com/@davecraige/hi-sunk818-a1db3c345147?source=search_post
Scrapping link: https://medium.com/@Amplify/thats-great-that-you-tried-cent-but-you-really-don-t-need-to-buy-cryp

Scrapping link: https://medium.com/@mail.bahurudeen/setup-a-metamask-ethereum-wallet-and-use-it-to-send-and-receive-ether-4f3b99360e4f?source=search_post
Scrapping link: https://medium.com/faast/metamask-faast-the-easiest-way-to-diversify-your-cryptocurrency-portfolio-4551ea649439?source=search_post
Scrapping link: https://medium.com/metamask/metamask-monthly-september-a2993bc2fa0f?source=search_post
Scrapping link: https://medium.com/hackoin-taiwan/%E6%99%BA%E8%83%BD%E5%90%88%E7%B4%84-smart-contract-%E6%95%99%E5%AD%B8%E7%B3%BB%E5%88%97-2-metamask-dadbe1d2ac3e?source=search_post
Scrapping link: https://medium.com/swapynetwork/integrating-metamask-with-electron-a-simple-secure-and-non-intrusive-approach-517a04da1656?source=search_post
Scrapping link: https://blog.pryze.com/how-to-create-a-private-wallet-with-metamask-1386255f2bf8?source=search_post
Scrapping link: https://medium.freecodecamp.org/every-blockchain-developer-should-know-these-web3-and-metamask-use-cases-7f93c1f139b1?source

Scrapping link: https://medium.com/coinmonks/whitepaper-and-demo-ux-for-authenticated-verified-erc20-payments-using-metamask-and-ethsigutil-7a146afcd65e?source=search_post
Scrapping link: https://medium.com/@escrowblock/how-to-claim-free-tokens-via-metamask-6fcffd77bec8?source=search_post
Scrapping link: https://medium.com/vestella/announcement-how-to-transfer-ves-from-mew-metamask-and-bitberry-to-exchanges-8d2980e59fb3?source=search_post
Scrapping link: https://medium.com/teamspectreai/how-to-withdraw-your-rewards-part-1-metamask-9c5be6d2ac90?source=search_post
Scrapping link: https://medium.com/@everfountain/%E5%9C%96%E6%96%87%E6%95%99%E5%AD%B8-%E5%A6%82%E4%BD%95%E5%8F%83%E8%88%87%E5%92%96%E5%95%A1%E5%B9%A3-bean-%E7%9A%84-ico-metamask-%E9%8C%A2%E5%8C%85%E7%AF%87-f3498cf214b8?source=search_post
Scrapping link: https://medium.com/@Hurify/metamask-installation-and-adding-hur-tokens-on-test-network-for-platform-evaluation-bb7438bf54cd?source=search_post
Scrapping link: https://hackernoon

Scrapping link: https://medium.com/@dan_43404/handling-metamask-rejections-532a4a41caf?source=search_post
Scrapping link: https://medium.com/@dtseng/%E7%B6%B2%E9%A0%81dapps%E4%B8%8A%E7%9A%84%E6%82%A0%E9%81%8A%E5%8D%A1-metamask-2-e0a55f8eaec0?source=search_post
Scrapping link: https://medium.com/@redi_official/%EB%A9%94%ED%83%80%EB%A7%88%EC%8A%A4%ED%81%AC-metamask-%EC%84%A4%EC%B9%98-%EB%B0%8F-%EC%82%AC%EC%9A%A9%ED%95%98%EA%B8%B0-bd7b91845b9?source=search_post
Scrapping link: https://medium.com/nuo-news/product-update-now-signup-with-metamask-or-web3-wallet-on-nuo-4340f1535729?source=search_post
Scrapping link: https://medium.com/@kbennett2000_46495/how-to-create-an-ethereum-wallet-using-the-metamask-chrome-extension-78c210ea735b?source=search_post
Scrapping link: https://medium.com/humanscape-ico/%ED%9C%B4%EB%A8%BC%EC%8A%A4%EC%BC%80%EC%9D%B4%ED%94%84-metamask%EC%97%90-hum-%ED%86%A0%ED%81%B0-%EC%B6%94%EA%B0%80%ED%95%98%EB%8A%94-%EB%B0%A9%EB%B2%95-54dd9f8811aa?source=search_post
Scrapping