In [19]:
from typing import List
import spacy
import pytextrank
import pandas as pd
import numpy as np
from operator import itemgetter
from itertools import islice
from math import sqrt
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.preprocessing import normalize

In [1]:
text = "Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given. These criteria and the corresponding algorithms for constructing a minimal supporting set of solutions can be used in solving all the considered types systems and systems of mixed types."

In [11]:
nlp = spacy.load("en_core_web_md")
tr = pytextrank.TextRank()
nlp.add_pipe(tr.PipelineComponent, name="textrank", last=True)

In [12]:
doc = nlp(text)

In [17]:
def summarise(doc: spacy.tokens.Doc) -> List[spacy.tokens.Span]:
    def is_part_of(chunk: spacy.tokens.Span, sentence: spacy.tokens.Span):
        """whether the chunk is part of the sentence"""
        return sentence.start <= chunk.start <= chunk.end <= sentence.end

    top_k_keywords = 10

    # unit rank vector
    U = normalize([[p.rank for p in doc._.phrases[:top_k_keywords]]], norm="l1")

    # sentences as bag of keywords
    sents_as_bag_of_keywords = [[any(is_part_of(c, s) for c in p.chunks) for s in doc.sents] for p in doc._.phrases[:top_k_keywords]]
    BoK = pd.DataFrame(sents_as_bag_of_keywords, index=doc._.phrases[:top_k_keywords]).T.astype(int)

    # calculate the euclidean distance of each sentence from the unit vector
    dist = euclidean_distances(BoK * U, U)

    # extract the sentences with the lowest distance, up to the limite requested
    top_k_sentences = 3
    idx_for_summary = np.argpartition(dist.reshape(-1), top_k_sentences)[:top_k_sentences]
    summary = [next(islice(doc.sents, idx, None)) for idx in idx_for_summary]
    
    return summary

In [20]:
summarise(doc)

[These criteria and the corresponding algorithms for constructing a minimal supporting set of solutions can be used in solving all the considered types systems and systems of mixed types.,
 Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered.,
 Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given.]