In [3]:
import spacy
import pytextrank

import warnings
warnings.filterwarnings("ignore")

# example text
text = "Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given. These criteria and the corresponding algorithms for constructing a minimal supporting set of solutions can be used in solving all the considered types systems and systems of mixed types."

# load a spaCy model, depending on language, scale, etc.
nlp = spacy.load("en_core_web_sm")

# add PyTextRank to the spaCy pipeline
nlp.add_pipe("textrank")
doc = nlp(text)

In [4]:
from icecream import ic

for p in doc._.phrases:
    ic(p.rank, p.count, p.text)
    ic(p.chunks)

ic| p.rank: 0.17054248030845812, p.count: 1, p.text: 'mixed types'
ic| p.chunks: [mixed types]
ic| p.rank: 0.15757771579579002
    p.count: 1
    p.text: 'minimal generating sets'
ic| p.chunks: [minimal generating sets]
ic| p.rank: 0.1573942320091846, p.count: 3, p.text: 'systems'
ic| p.chunks: [systems, systems, systems]
ic| p.rank: 0.14894241299658317
    p.count: 1
    p.text: 'nonstrict inequations'
ic| p.chunks: [nonstrict inequations]
ic| p.rank: 0.14039169904589088
    p.count: 1
    p.text: 'strict inequations'
ic| p.chunks: [strict inequations]
ic| p.rank: 0.11698198658021898, p.count: 1, p.text: 'natural numbers'
ic| p.chunks: [natural numbers]
ic| p.rank: 0.11559770516796158
    p.count: 1
    p.text: 'linear Diophantine equations'
ic| p.chunks: [linear Diophantine equations]
ic| p.rank: 0.11407086615794945, p.count: 3, p.text: 'solutions'
ic| p.chunks: [solutions, solutions, solutions]
ic| p.rank: 0.10165710454752863
    p.count: 1
    p.text: 'linear constraints'
ic| p.chu

In [5]:
sent_bounds = [ [s.start, s.end, set([])] for s in doc.sents ]
sent_bounds

[[0, 13, set()], [13, 33, set()], [33, 61, set()], [61, 91, set()]]

In [6]:
limit_phrases = 4

phrase_id = 0
unit_vector = []

for p in doc._.phrases:
    ic(phrase_id, p.text, p.rank)
    
    unit_vector.append(p.rank)
    
    for chunk in p.chunks:
        ic(chunk.start, chunk.end)
        
        for sent_start, sent_end, sent_vector in sent_bounds:
            if chunk.start >= sent_start and chunk.end <= sent_end:
                ic(sent_start, chunk.start, chunk.end, sent_end)
                sent_vector.add(phrase_id)
                break

    phrase_id += 1

    if phrase_id == limit_phrases:
        break

ic| phrase_id: 0, p.text: 'mixed types', p.rank: 0.17054248030845812
ic| chunk.start: 88, chunk.end: 90
ic| sent_start: 61, chunk.start: 88, chunk.end: 90, sent_end: 91
ic| phrase_id: 1
    p.text: 'minimal generating sets'
    p.rank: 0.15757771579579002
ic| chunk.start: 48, chunk.end: 51
ic| sent_start: 33, chunk.start: 48, chunk.end: 51, sent_end: 61
ic| phrase_id: 2, p.text: 'systems', p.rank: 0.1573942320091846
ic| chunk.start: 2, chunk.end: 3
ic| sent_start: 0, chunk.start: 2, chunk.end: 3, sent_end: 13
ic| chunk.start: 57, chunk.end: 58
ic| sent_start: 33, chunk.start: 57, chunk.end: 58, sent_end: 61
ic| chunk.start: 86, chunk.end: 87
ic| sent_start: 61, chunk.start: 86, chunk.end: 87, sent_end: 91
ic| phrase_id: 3
    p.text: 'nonstrict inequations'
    p.rank: 0.14894241299658317
ic| chunk.start: 28, chunk.end: 30
ic| sent_start: 13, chunk.start: 28, chunk.end: 30, sent_end: 33


In [7]:
sent_bounds

[[0, 13, {2}], [13, 33, {3}], [33, 61, {1, 2}], [61, 91, {0, 2}]]

In [8]:
for sent in doc.sents:
    ic(sent)

ic| sent: Compatibility of systems of linear constraints over the set of natural numbers.
ic| sent: Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered.
ic| sent: Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given.
ic| sent: These criteria and the corresponding algorithms for constructing a minimal supporting set of solutions can be used in solving all the considered types systems and systems of mixed types.


In [9]:
unit_vector

[0.17054248030845812,
 0.15757771579579002,
 0.1573942320091846,
 0.14894241299658317]

In [10]:
sum_ranks = sum(unit_vector)

unit_vector = [ rank/sum_ranks for rank in unit_vector ]
unit_vector

[0.2688007587877609,
 0.248366327834214,
 0.24807712961817077,
 0.23475578375985437]

In [11]:
from math import sqrt

sent_rank = {}
sent_id = 0

for sent_start, sent_end, sent_vector in sent_bounds:
    ic(sent_vector)
    sum_sq = 0.0
    ic
    for phrase_id in range(len(unit_vector)):
        ic(phrase_id, unit_vector[phrase_id])
        
        if phrase_id not in sent_vector:
            sum_sq += unit_vector[phrase_id]**2.0

    sent_rank[sent_id] = sqrt(sum_sq)
    sent_id += 1

ic| sent_vector: {2}
ic| phrase_id: 0, unit_vector[phrase_id]: 0.2688007587877609
ic| phrase_id: 1, unit_vector[phrase_id]: 0.248366327834214
ic| phrase_id: 2, unit_vector[phrase_id]: 0.24807712961817077
ic| phrase_id: 3, unit_vector[phrase_id]: 0.23475578375985437
ic| sent_vector: {3}
ic| phrase_id: 0, unit_vector[phrase_id]: 0.2688007587877609
ic| phrase_id: 1, unit_vector[phrase_id]: 0.248366327834214
ic| phrase_id: 2, unit_vector[phrase_id]: 0.24807712961817077
ic| phrase_id: 3, unit_vector[phrase_id]: 0.23475578375985437
ic| sent_vector: {1, 2}
ic| phrase_id: 0, unit_vector[phrase_id]: 0.2688007587877609
ic| phrase_id: 1, unit_vector[phrase_id]: 0.248366327834214
ic| phrase_id: 2, unit_vector[phrase_id]: 0.24807712961817077
ic| phrase_id: 3, unit_vector[phrase_id]: 0.23475578375985437
ic| sent_vector: {0, 2}
ic| phrase_id: 0, unit_vector[phrase_id]: 0.2688007587877609
ic| phrase_id: 1, unit_vector[phrase_id]: 0.248366327834214
ic| phrase_id: 2, unit_vector[phrase_id]: 0.2480771296

In [12]:
ic(sent_rank)

ic| sent_rank: {0: 0.4347987565937048,
                1: 0.4421333995145797,
                2: 0.3568811089614853,
                3: 0.3417544598254071}


{0: 0.4347987565937048,
 1: 0.4421333995145797,
 2: 0.3568811089614853,
 3: 0.3417544598254071}

In [13]:
from operator import itemgetter

sorted(sent_rank.items(), key=itemgetter(1)) 

[(3, 0.3417544598254071),
 (2, 0.3568811089614853),
 (0, 0.4347987565937048),
 (1, 0.4421333995145797)]

In [14]:
limit_sentences = 2

sent_text = {}
sent_id = 0

for sent in doc.sents:
    sent_text[sent_id] = sent.text
    sent_id += 1

num_sent = 0

for sent_id, rank in sorted(sent_rank.items(), key=itemgetter(1)):
    ic(sent_id, sent_text[sent_id])
    num_sent += 1
    
    if num_sent == limit_sentences:
        break

ic| sent_id: 3
    sent_text[sent_id]: ('These criteria and the corresponding algorithms for constructing a minimal '
                         'supporting set of solutions can be used in solving all the considered types '
                         'systems and systems of mixed types.')
ic| sent_id: 2
    sent_text[sent_id]: ('Upper bounds for components of a minimal set of solutions and algorithms of '
                         'construction of minimal generating sets of solutions for all types of '
                         'systems are given.')
