In [1]:
import spacy
import pytextrank

from nltk.tokenize import sent_tokenize
import tensorflow_hub as hub
from sklearn.metrics.pairwise import cosine_similarity
import networkx as nx
import re

from summarizer import Summarizer

from gensim.summarization import summarize

from sklearn.metrics import pairwise_distances_argmin_min
import numpy as np
from sklearn.cluster import KMeans

from sentence_transformers import SentenceTransformer

In [7]:
class pyTextRank():
  def __init__(self):
      self.nlp = spacy.load("en_core_web_lg")
      self.nlp.add_pipe("textrank", last=True)
      
  def summary(self, text, limit_phrases=20, limit_sentences=5):
      text = sentence_preprocess(text)
      text = ' '.join(text)
      doc = self.nlp(text)
      res = [sent.text for sent in doc._.textrank.summary(limit_phrases=limit_phrases, limit_sentences=limit_sentences)]
      return res


class pageRankUSE():
  def __init__(self):
      module_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
      self.embed = hub.load(module_url)
      
  def summary(self, text, num_sentences=5):
      sentences = sentence_preprocess(text)
      sentences_embeddings = self.embed(sentences)
      sim_matrix = cosine_similarity(sentences_embeddings)

      nx_graph = nx.from_numpy_array(sim_matrix)
      scores = nx.pagerank(nx_graph)
      
      ranked_sentences = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)

      summary = [i[1] for i in ranked_sentences[:num_sentences]]
      return summary


class bertSummarizer():
  def __init__(self):
    self.model = Summarizer()
  
  def summary(self, text, min_length=60):
    result = self.model(text, min_length=min_length)
    return [result]


class gensimSummarizer():
  def summary(self, text):
    result = summarize(text, ratio=0.4)
    return [result]


class USEWithCluster():
  def __init__(self):
    module_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
    self.embed = hub.load(module_url)

  def summary(self, text, n_clusters=10):
    sentences = sentence_preprocess(text)
    sentences_embeddings = self.embed(sentences)
    return sent_closest_centroids(sentences, sentences_embeddings, n_clusters)


class SentTransformerCluster():
  def __init__(self):
    self.model = SentenceTransformer('paraphrase-distilroberta-base-v1')

  def summary(self, text, n_clusters=10):
    sentences = sentence_preprocess(text)
    sentence_embeddings = self.model.encode(sentences)
    return sent_closest_centroids(sentences, sentence_embeddings, n_clusters)


def sentence_preprocess(sent):
  sentences = sent_tokenize(sent)
  sentences = [re.sub('\n', '', i) for i in sentences]
  return sentences


def sent_closest_centroids(sentences, embedding, n_clusters=10):
  kmeans = KMeans(n_clusters=10)
  kmeans = kmeans.fit(embedding)

  n_clusters = int(np.ceil(len(embedding)**0.6))

  avg = []
  for j in range(n_clusters):
      idx = np.where(kmeans.labels_ == j)[0]
      avg.append(np.mean(idx))
  closest, _ = pairwise_distances_argmin_min(kmeans.cluster_centers_, embedding)
  ordering = sorted(range(n_clusters), key=lambda k: avg[k])
  summary = [sentences[closest[idx]] for idx in ordering]
  return summary

In [4]:
text = """
SINCE PRESIDENT JOE Biden on March 11 directed that states make every adult eligible for a coronavirus vaccine by May 1, many states have ramped up their vaccine rollouts; 
moving up dates and announcing new eligibility to meet the president's timeline. But vaccine rollouts vary by state.
Alaska was the first state to announce and implement eligibility for all adults on March 9. 
Mississippi has since followed suit, with all individuals 16 and older becoming eligible on March 16, while West Virginia opened up eligibility to all adults on March 22. 
All Arizonans 16 and older are eligible for a vaccine on March 24, and adults in Texas are eligible as of March 29.
Still, most states are weeks away from opening up eligibility entirely. 
For the majority of states, elderly populations and health care workers have been prioritized, with eligibility being opened up to those with certain high-risk medical conditions and other essential workers more recently.
But just because states make certain populations eligible does not mean those individuals will secure a vaccine anytime soon, and some populations will continue to be prioritized above others, depending on the state's approach. 
While some states have taken on an age-based vaccine rollout, others have instituted an equity-based rollout, while others have gone for a hybrid approach. 
Rhode Island, for example, is accelerating distribution of the vaccines to those living in ZIP codes disproportionately impacted by the coronavirus, and to those with certain health conditions that make them more vulnerable.
Regardless of approach, some individuals across the country are getting vaccinated without necessarily having priority at the state level, as vaccine rollouts operate differently at the federal, state and county levels. In Delaware, for example, those 50 and older are eligible for a vaccine at local pharmacies, but not with medical providers, or at hospitals. 
And in various parts of the country others, still, are able to get a dose of a vaccine by being in the right place at the right time, such as at a grocery store as the day comes to an end, and unused vaccines run the risk of going to waste.
"""

In [8]:
textrank = pyTextRank()
textrank.summary(text)

["SINCE PRESIDENT JOE Biden on March 11 directed that states make every adult eligible for a coronavirus vaccine by May 1, many states have ramped up their vaccine rollouts; moving up dates and announcing new eligibility to meet the president's timeline.",
 'For the majority of states, elderly populations and health care workers have been prioritized, with eligibility being opened up to those with certain high-risk medical conditions and other essential workers more recently.',
 'But vaccine rollouts vary by state.',
 "But just because states make certain populations eligible does not mean those individuals will secure a vaccine anytime soon, and some populations will continue to be prioritized above others, depending on the state's approach.",
 'Regardless of approach, some individuals across the country are getting vaccinated without necessarily having priority at the state level, as vaccine rollouts operate differently at the federal, state and county levels.']

In [5]:
PR = pageRankUSE()
PR.summary(text)

INFO:absl:Using /tmp/tfhub_modules to cache modules.


['Regardless of approach, some individuals across the country are getting vaccinated without necessarily having priority at the state level, as vaccine rollouts operate differently at the federal, state and county levels.',
 "But just because states make certain populations eligible does not mean those individuals will secure a vaccine anytime soon, and some populations will continue to be prioritized above others, depending on the state's approach.",
 "SINCE PRESIDENT JOE Biden on March 11 directed that states make every adult eligible for a coronavirus vaccine by May 1, many states have ramped up their vaccine rollouts; moving up dates and announcing new eligibility to meet the president's timeline.",
 'All Arizonans 16 and older are eligible for a vaccine on March 24, and adults in Texas are eligible as of March 29.',
 'In Delaware, for example, those 50 and older are eligible for a vaccine at local pharmacies, but not with medical providers, or at hospitals.']

In [12]:
BS = bertSummarizer()
BS.summary(text)

["SINCE PRESIDENT JOE Biden on March 11 directed that states make every adult eligible for a coronavirus vaccine by May 1, many states have ramped up their vaccine rollouts; \nmoving up dates and announcing new eligibility to meet the president's timeline. Still, most states are weeks away from opening up eligibility entirely. While some states have taken on an age-based vaccine rollout, others have instituted an equity-based rollout, while others have gone for a hybrid approach."]

In [13]:
GS = gensimSummarizer()
GS.summary(text)

["SINCE PRESIDENT JOE Biden on March 11 directed that states make every adult eligible for a coronavirus vaccine by May 1, many states have ramped up their vaccine rollouts; \nMississippi has since followed suit, with all individuals 16 and older becoming eligible on March 16, while West Virginia opened up eligibility to all adults on March 22.\nFor the majority of states, elderly populations and health care workers have been prioritized, with eligibility being opened up to those with certain high-risk medical conditions and other essential workers more recently.\nBut just because states make certain populations eligible does not mean those individuals will secure a vaccine anytime soon, and some populations will continue to be prioritized above others, depending on the state's approach.\nRegardless of approach, some individuals across the country are getting vaccinated without necessarily having priority at the state level, as vaccine rollouts operate differently at the federal, state

In [9]:
USEC = USEWithCluster()
USEC.summary(text)

['Mississippi has since followed suit, with all individuals 16 and older becoming eligible on March 16, while West Virginia opened up eligibility to all adults on March 22.',
 'All Arizonans 16 and older are eligible for a vaccine on March 24, and adults in Texas are eligible as of March 29.',
 'Still, most states are weeks away from opening up eligibility entirely.',
 'While some states have taken on an age-based vaccine rollout, others have instituted an equity-based rollout, while others have gone for a hybrid approach.',
 "But just because states make certain populations eligible does not mean those individuals will secure a vaccine anytime soon, and some populations will continue to be prioritized above others, depending on the state's approach."]

In [10]:
STC = SentTransformerCluster()
STC.summary(text)

['Alaska was the first state to announce and implement eligibility for all adults on March 9.',
 'But vaccine rollouts vary by state.',
 'For the majority of states, elderly populations and health care workers have been prioritized, with eligibility being opened up to those with certain high-risk medical conditions and other essential workers more recently.',
 'All Arizonans 16 and older are eligible for a vaccine on March 24, and adults in Texas are eligible as of March 29.',
 'Rhode Island, for example, is accelerating distribution of the vaccines to those living in ZIP codes disproportionately impacted by the coronavirus, and to those with certain health conditions that make them more vulnerable.']