In [1]:
import rouge
import torch
from transformers import pipeline
import wikipedia
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

# Helper functions
def summarize_article(article, summarizer, words=500, min_len=75, summary_max_length=250):
    wiki_page = wikipedia.page(article)
    article_content = ' '.join(wiki_page.content.split()[:words])
    return summarizer(article_content, min_length=min_len, max_length=summary_max_length)

def get_article(article_name, words=500):
    try:
        wiki_page = wikipedia.page(article_name)
        article_content = ' '.join(wiki_page.content.split()[:words])
        return article_content
    except:
        return None

# Use this to avoid pegasus blowing up GPU memory and refusing to leave 
class PegasusSummarizer():
    def __init__(self):
        model_name = 'google/pegasus-xsum'
        self.torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
        print(self.torch_device)
        self.tokenizer = PegasusTokenizer.from_pretrained(model_name)
        self.model = PegasusForConditionalGeneration.from_pretrained(model_name).to(self.torch_device)
    def summarize(self, src_txt):
        cleaned_text = [text for text in src_text if text]
        batch = self.tokenizer(cleaned_text, truncation=True, padding='longest', return_tensors="pt").to(self.torch_device)
        translated = self.model.generate(**batch, max_length=200)
        tgt_text = self.tokenizer.batch_decode(translated, skip_special_tokens=True)
        return tgt_text

In [2]:
src_text = [
    get_article("Natural language processing"),
    get_article("Artificial intelligence"),
    get_article("Neural network"),
]
p = PegasusSummarizer()
p.summarize(src_text)

cuda


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['In our series of letters from African journalists, film-maker and columnist Ahmed Rashid looks at the history of natural language processing.',
 'Artificial intelligence (AI) is intelligence demonstrated by machines, unlike the natural intelligence displayed by humans and animals, which involves consciousness and emotionality.',
 'A neural network is a network or circuit of neurons, or in a modern sense, an artificial neural network, composed of artificial neurons or nodes.']

In [3]:
src_text = [
    get_article("Packet switching"),
    get_article("Internet"),
]
p = PegasusSummarizer()
p.summarize(src_text)

cuda


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['Packet switching is the primary basis for data communications in computer networks worldwide.',
 'Interest, in finance and economics, is payment from a borrower or deposit-taking financial institution to a lender or depositor of an amount above repayment of the principal sum (that is, the amount borrowed), at a particular rate.']

In [4]:
src_text = [
    get_article("Finance"),
    get_article("Network packet"),
    get_article("Transmission medium"),
]
p = PegasusSummarizer()
p.summarize(src_text)

cuda


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['In our series of letters from African journalists, film-maker and columnist Farai Sevenzo looks at the history of finance.',
 'A network packet is a formatted unit of data carried by a packet-switched network.',
 'A definition of a transmission medium:']

In [5]:
src_text = [
    get_article("Diana, Princess of Wales"),
    get_article("Meghan, Duchess of Sussex"),
]
p = PegasusSummarizer()
p.summarize(src_text)

cuda


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['Diana, Princess of Wales, was a member of the British royal family.',
 'Meghan Markle is the Duchess of Sussex, a member of the British royal family.']

In [6]:
src_text = [
    get_article("Algebra"),
    get_article("Calculus"),
    get_article("Geometry"),
]
p = PegasusSummarizer()
p.summarize(src_text)

cuda


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['Algeria has one of the largest militaries in Africa and the largest defence budget.',
 'Calculus, originally called infinitesimal calculus or "the calculus of infinitesimals", is a major branch of mathematics.',
 'Geometry has applications to almost all sciences, and also to art, architecture, and other activities that are related to graphics.']

In [7]:
src_text = [
    get_article("Topology"),
    get_article("Differential geometry"),
    get_article("Number theory"),
]
p = PegasusSummarizer()
p.summarize(src_text)

cuda


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['In our series of letters from African journalists, film-maker and columnist Ahmed Rashid looks at the history of topology in mathematics.',
 'Differential geometry is a mathematical discipline that uses techniques of differential calculus, integral calculus, linear algebra and multilinear algebra to study problems in geometry.',
 'Number theory (or arithmetic or higher arithmetic in older usage) is a branch of pure mathematics devoted primarily to the study of arithmetic and integer-valued functions.']

In [8]:
src_text = [
    get_article("Algorithm"),
    get_article("Statistics"),
    get_article("Game theory"),
]
p = PegasusSummarizer()
p.summarize(src_text)

cuda


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['An algorithm (listen) is a finite computation of well-defined, computer-implementable instructions, typically to solve a class of problems or to perform a task.',
 'Statistics is a branch of science that deals with the analysis of data.',
 'Game theory has been widely recognized as an important tool in many fields.']

In [2]:
src_text = [
    get_article("Fourier transform"),
    get_article("Probability"),
    #get_article("Cryptography"),
]
p = PegasusSummarizer()
p.summarize(src_text)

cuda


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['The Fourier transform of a function of time is a complex-valued function of frequency, whose magnitude (absolute value) represents the amount of that frequency present in the original function, and whose argument is the phase offset of the basic sinusoid in that frequency. The Fourier transform of a function of time is a complex-valued function of frequency, whose magnitude (absolute value) represents the amount of that frequency present in the original function, and whose argument is the phase offset of the basic sinusoid in that frequency.',
 'Probability is the branch of mathematics concerning numerical descriptions of how likely an event is to occur, or how likely it is that a proposition is true.']

In [3]:
src_text = [
    get_article("Information theory"),
    get_article("Signal processing"),
    get_article("Communication"),
]
p = PegasusSummarizer()
p.summarize(src_text)

cuda


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['Information theory is the scientific study of the quantification, storage, and communication of information.',
 'What is signal processing?',
 'Communication is a branch of science which deals with the development, transmission, and interpretation of information.']

In [4]:
src_text = [
    get_article("Nash Equilibrium"),
    get_article("Functional Analysis"),
    get_article("Hausdorff space"),
]
p = PegasusSummarizer()
p.summarize(src_text)

cuda


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['The Nash equilibrium is the most common way to define the solution of a non-cooperative game involving two or more players.',
 'A Hausdorff space is a topological space where for any two distinct points there exist neighbourhoods of each which are disjoint from each other.']

In [5]:
src_text = [
    get_article("Hilbert space"),
    get_article("Linear algebra"),
    get_article("Holomorphic function"),
]
p = PegasusSummarizer()
p.summarize(src_text)

cuda


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['Linear algebra is the branch of mathematics concerning linear equations such as: a 1 x 1 +  + a n x n = b',
 'A holomorphic function is a complex-valued function of one or more complex variables that is, at every point of its domain, complex differentiable in a neighbourhood of the point.']

In [None]:
# 27 articles so far

In [6]:
src_text = [
    get_article("Neighbourhood (mathematics)"),
    get_article("Metric space"),
    get_article("Felix Hausdorff"),
]
p = PegasusSummarizer()
p.summarize(src_text)

cuda


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['In our series of letters from African journalists, film-maker and columnist Ahmed Rashid looks at the concept of a neighbourhood.',
 'A metric space is a space in mathematics where the distance between two points is defined by a function called a metric.',
 'Felix Hausdorff was born in Leipzig, Germany, on 8 November 1868.']

In [7]:
src_text = [
    get_article("Wasserstein metric"),
    get_article("Probability measure"),
    get_article("Volume"),
]
p = PegasusSummarizer()
p.summarize(src_text)

cuda


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['The Wasserstein distance or Kantorovich-Rubinstein metric is a distance function defined between probability distributions on a given metric space M displaystyle M.',
 'A probability measure is a real-valued function defined on a set of events in a probability space that satisfies measure properties such as countable additivity.',
 'Volume is one of the basic units of measurement in the International System of Units (SI).']

In [8]:
src_text = [
    get_article("Subset"),
    get_article("Complex analysis"),
    get_article("Function (mathematics)"),
]
p = PegasusSummarizer()
p.summarize(src_text)

cuda


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['The subset relation defines a partial order on sets.',
 'Complex analysis is the branch of mathematics that investigates functions of complex numbers.',
 'A function is a process or relation that associates each element x of a set X, the domain of the function, to a single element y of another set Y (possibly the same set), the codomain of the function.']

In [2]:
src_text = [
    get_article("Codomain"),
    get_article("Surjective function"),
    get_article("Identity function"),
]
p = PegasusSummarizer()
p.summarize(src_text)

cuda


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['The codomain or set of destination of a function is the set into which all of the output of the function is constrained to fall.',
 'The identity function is a function that always returns the same value as its argument.']

In [3]:
src_text = [
    get_article("Monoid"),
    get_article("Semigroup"),
    get_article("Magma (algebra)"),
]
p = PegasusSummarizer()
p.summarize(src_text)

cuda


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['A semigroup is an algebraic structure consisting of a set together with an associative binary operation.',
 'A magma, binar or groupoid is a basic kind of algebraic structure.']

In [2]:
src_text = [
    get_article("Monad (category theory)"),
    get_article("Monad (functional programming)"),
    get_article("Monad (Gnosticism)"),
]
p = PegasusSummarizer()
p.summarize(src_text)

cuda


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['Monad theory is a branch of mathematics, a branch of mathematics, a monad (also triple, triad, standard construction and fundamental construction) is an adjoint functor (a functor mapping a to itself), together with two natural transformations required to fulfill certain coherence conditions.',
 'A monad is an abstraction that allows structuring programs generically.',
 'The Monad in gnosticism is an adaptation of concepts of the Monad in Greek philosophy to Christian gnostic belief systems.']

In [2]:
src_text = [
    get_article("Fiber Bundle"),
    get_article("Manifold"),
    get_article("Symplectic manifold"),
]
p = PegasusSummarizer()
p.summarize(src_text)

cuda


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['A fiber bundle is a topological space that is locally a product space, but globally may have a different topological structure.',
 'The concept of a manifold is central to many parts of geometry and modern mathematical physics because it allows complicated structures to be described and understood in terms of the simpler local topological properties of Euclidean space.',
 'A symplectic manifold is a smooth manifold, M displaystyle M, equipped with a closed nondegenerate differential 2-form  omega , called the symplectic form.']

In [3]:
src_text = [
    get_article("Klein bottle"),
    get_article("Immersion (mathematics)"),
    get_article("Diffeomorphism"),
]
p = PegasusSummarizer()
p.summarize(src_text)

cuda


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['The Klein bottle is a topological object.',
 'An immersion is a differentiable function between differentiable manifolds whose derivative is everywhere injective.',
 'A diffeomorphism is an invertible function that maps one differentiable manifold to another such that both the function and its inverse are smooth.']

In [4]:
src_text = [
    get_article("Differentiable manifold"),
    get_article("Homeomorphism"),
    get_article("Topological property"),
]
p = PegasusSummarizer()
p.summarize(src_text)

cuda


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['A differentiable manifold is a type of manifold that is locally similar enough to a linear space to allow one to do calculus.',
 'A homeomorphism is a continuous function between topological spaces that has a continuous inverse function.',
 'A topological property or topological invariant is a property of a topological space which is invariant under homeomorphisms.']