In [1]:
import json
import urllib.request

from IPython.display import Image, display
import textwrap

In [2]:
# Globals (from Wikipedia API)
TEXT_URL_TMPL = 'https://en.wikipedia.org/w/api.php?action=query&prop=extracts&exsentences=2&titles={title}&explaintext=1&exintro=1&format=json'
THUMB_URL_TMPL = 'https://en.wikipedia.org/w/api.php?action=query&prop=pageimages&titles={title}&format=json'

In [3]:
class WikiSummarizer(object):
    """
    A stepwise processor the uses Wikipedia to summarize topics.

    To use this module, instantiate with the topic name, 
    call .process() and then .get_results()
    """
    
    def __init__(self, topic):
        self.topic = topic

    def process(self):
        self._fetch_text()
        self._fetch_thumbnail()
        return self

    def get_results(self, as_text=False):
        if as_text:
            return self.topic + ' summary: ' + self._text
        return WikiSummary(self.topic, self._thumb_url, self._text)

    def _fetch_text(self):
        self._text_api_url = TEXT_URL_TMPL.format(title=self.topic)
        self._text_resp = self._get_url_json(self._text_api_url)
        self._text = list(self._text_resp['query']['pages'].values())[0]['extract']

    def _fetch_thumbnail(self):
        self._thumb_api_url = THUMB_URL_TMPL.format(title=self.topic)
        self._thumb_resp = self._get_url_json(self._thumb_api_url)
        self._thumb_url = list(self._thumb_resp['query']['pages'].values())[0]['thumbnail']['source']

    def _get_url_json(self, url):
        resp = urllib.request.urlopen(url)
        resp_body = resp.read()
        return json.loads(resp_body.decode('utf-8'))


class WikiSummary(object):
    def __init__(self, topic, thumb_url, text):
        self.topic = topic
        self.thumb_url = thumb_url
        self.text = text

    def __repr__(self):
        cn = self.__class__.__name__
        return '%s(%r, %r, %r)' % (cn, self.topic, self.thumb_url, self.text)

In [4]:
def multi_summarizer(articles):
    """
    Local function to format WikiSummarizer on a list of articles.
    """
    for item in articles:
        # Loop through each article and recall wiki summary
        summarizer = WikiSummarizer(item)
        summarizer.process()
        summary = summarizer.get_results()
        
        # Format thumbnail and summary text
        display(Image(url=summary.thumb_url))
        print(textwrap.fill(summary.text, 90))
        print("==============")
        Image(url=summary.thumb_url)

### Calling multi_summarizer with a list of wikipedia queries


In [5]:
multi_summarizer(['Tea', 'Coffee', 'Machine_learning'])

Tea is an aromatic beverage commonly prepared by pouring hot or boiling water over cured
leaves of the Camellia sinensis, an evergreen shrub native to Asia. After water, it is the
most widely consumed drink in the world.


Coffee is a brewed drink prepared from roasted coffee beans, which are the seeds of
berries from the Coffea plant. The genus Coffea is native to tropical Africa, Madagascar,
and the Comoros, Mauritius and Réunion in the Indian Ocean.


Machine learning is a subfield of computer science that evolved from the study of pattern
recognition and computational learning theory in artificial intelligence. In 1959, Arthur
Samuel defined machine learning as a "Field of study that gives computers the ability to
learn without being explicitly programmed".
