In [24]:
#Importing required Modules
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from heapq import nlargest

In [25]:
text = """The US federal law allows the triggering of sanctions against nations that aim to procure major defence hardware from Russia. This was brought as a response to Russia's annexation of Crimea in 2014 and its alleged meddling in the 2016 US presidential elections.

The US said in November it is yet to determine whether it will waive those sanctions with respect to the deal between India and Russia. “We have not made a determination on a potential waiver with respect to Indian arms transactions with Russia. CAATSA does not have a blanket or country-specific waiver provision,” the US state department said in a statement.

“We urge all of our allies and partners to forgo transactions with Russia that risk triggering sanctions under the Countering America's Adversaries Through Sanctions Act (CAATSA),” the US urged through its statement while highlighting that it values its strategic partnership with India and expects that the defence partnership between both nations can continue"""

In [26]:
stopwords = STOP_WORDS
stopwords

{"'d",
 "'ll",
 "'m",
 "'re",
 "'s",
 "'ve",
 'a',
 'about',
 'above',
 'across',
 'after',
 'afterwards',
 'again',
 'against',
 'all',
 'almost',
 'alone',
 'along',
 'already',
 'also',
 'although',
 'always',
 'am',
 'among',
 'amongst',
 'amount',
 'an',
 'and',
 'another',
 'any',
 'anyhow',
 'anyone',
 'anything',
 'anyway',
 'anywhere',
 'are',
 'around',
 'as',
 'at',
 'back',
 'be',
 'became',
 'because',
 'become',
 'becomes',
 'becoming',
 'been',
 'before',
 'beforehand',
 'behind',
 'being',
 'below',
 'beside',
 'besides',
 'between',
 'beyond',
 'both',
 'bottom',
 'but',
 'by',
 'ca',
 'call',
 'can',
 'cannot',
 'could',
 'did',
 'do',
 'does',
 'doing',
 'done',
 'down',
 'due',
 'during',
 'each',
 'eight',
 'either',
 'eleven',
 'else',
 'elsewhere',
 'empty',
 'enough',
 'even',
 'ever',
 'every',
 'everyone',
 'everything',
 'everywhere',
 'except',
 'few',
 'fifteen',
 'fifty',
 'first',
 'five',
 'for',
 'former',
 'formerly',
 'forty',
 'four',
 'from',
 'fron

In [27]:
nlp = spacy.load('en_core_web_sm')


In [28]:
doc = nlp(text)

In [29]:
# Getting tokens from our text
tokens = [token.text for token in doc]
print(tokens)

['The', 'US', 'federal', 'law', 'allows', 'the', 'triggering', 'of', 'sanctions', 'against', 'nations', 'that', 'aim', 'to', 'procure', 'major', 'defence', 'hardware', 'from', 'Russia', '.', 'This', 'was', 'brought', 'as', 'a', 'response', 'to', 'Russia', "'s", 'annexation', 'of', 'Crimea', 'in', '2014', 'and', 'its', 'alleged', 'meddling', 'in', 'the', '2016', 'US', 'presidential', 'elections', '.', '\n\n', 'The', 'US', 'said', 'in', 'November', 'it', 'is', 'yet', 'to', 'determine', 'whether', 'it', 'will', 'waive', 'those', 'sanctions', 'with', 'respect', 'to', 'the', 'deal', 'between', 'India', 'and', 'Russia', '.', '“', 'We', 'have', 'not', 'made', 'a', 'determination', 'on', 'a', 'potential', 'waiver', 'with', 'respect', 'to', 'Indian', 'arms', 'transactions', 'with', 'Russia', '.', 'CAATSA', 'does', 'not', 'have', 'a', 'blanket', 'or', 'country', '-', 'specific', 'waiver', 'provision', ',', '”', 'the', 'US', 'state', 'department', 'said', 'in', 'a', 'statement', '.', '\n\n', '“',

In [30]:
punctuation = punctuation + "\n"
punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\n'

In [31]:
# Calculating for Word Frequency table to get frequency of every word

word_frequencies = {}
for word in doc:
  if word.text.lower() not in stopwords:
    if word.text.lower() not in punctuation:
      if word.text not in word_frequencies.keys():
        word_frequencies[word.text] = 1
      else:
        word_frequencies[word.text] += 1

In [32]:
print(word_frequencies)

{'federal': 1, 'law': 1, 'allows': 1, 'triggering': 2, 'sanctions': 3, 'nations': 2, 'aim': 1, 'procure': 1, 'major': 1, 'defence': 2, 'hardware': 1, 'Russia': 5, 'brought': 1, 'response': 1, 'annexation': 1, 'Crimea': 1, '2014': 1, 'alleged': 1, 'meddling': 1, '2016': 1, 'presidential': 1, 'elections': 1, '\n\n': 2, 'said': 2, 'November': 1, 'determine': 1, 'waive': 1, 'respect': 2, 'deal': 1, 'India': 2, '“': 2, 'determination': 1, 'potential': 1, 'waiver': 2, 'Indian': 1, 'arms': 1, 'transactions': 2, 'CAATSA': 2, 'blanket': 1, 'country': 1, 'specific': 1, 'provision': 1, '”': 2, 'state': 1, 'department': 1, 'statement': 2, 'urge': 1, 'allies': 1, 'partners': 1, 'forgo': 1, 'risk': 1, 'Countering': 1, 'America': 1, 'Adversaries': 1, 'Sanctions': 1, 'Act': 1, 'urged': 1, 'highlighting': 1, 'values': 1, 'strategic': 1, 'partnership': 2, 'expects': 1, 'continue': 1}


In [33]:
max_frequency = max(word_frequencies.values())
max_frequency

5

In [34]:
# Normalizing Word Frequency
for word in word_frequencies.keys():
  word_frequencies[word] = word_frequencies[word]/max_frequency

In [35]:
print(word_frequencies)

{'federal': 0.2, 'law': 0.2, 'allows': 0.2, 'triggering': 0.4, 'sanctions': 0.6, 'nations': 0.4, 'aim': 0.2, 'procure': 0.2, 'major': 0.2, 'defence': 0.4, 'hardware': 0.2, 'Russia': 1.0, 'brought': 0.2, 'response': 0.2, 'annexation': 0.2, 'Crimea': 0.2, '2014': 0.2, 'alleged': 0.2, 'meddling': 0.2, '2016': 0.2, 'presidential': 0.2, 'elections': 0.2, '\n\n': 0.4, 'said': 0.4, 'November': 0.2, 'determine': 0.2, 'waive': 0.2, 'respect': 0.4, 'deal': 0.2, 'India': 0.4, '“': 0.4, 'determination': 0.2, 'potential': 0.2, 'waiver': 0.4, 'Indian': 0.2, 'arms': 0.2, 'transactions': 0.4, 'CAATSA': 0.4, 'blanket': 0.2, 'country': 0.2, 'specific': 0.2, 'provision': 0.2, '”': 0.4, 'state': 0.2, 'department': 0.2, 'statement': 0.4, 'urge': 0.2, 'allies': 0.2, 'partners': 0.2, 'forgo': 0.2, 'risk': 0.2, 'Countering': 0.2, 'America': 0.2, 'Adversaries': 0.2, 'Sanctions': 0.2, 'Act': 0.2, 'urged': 0.2, 'highlighting': 0.2, 'values': 0.2, 'strategic': 0.2, 'partnership': 0.4, 'expects': 0.2, 'continue': 

In [36]:
# Sentence Tokenizations

sentence_tokens = [sent for sent in doc.sents]
print(sentence_tokens)

[The US federal law allows the triggering of sanctions against nations that aim to procure major defence hardware from Russia., This was brought as a response to Russia's annexation of Crimea in 2014 and its alleged meddling in the 2016 US presidential elections.

, The US said in November it is yet to determine whether it will waive those sanctions with respect to the deal between India and Russia., “We have not made a determination on a potential waiver with respect to Indian arms transactions with Russia., CAATSA does not have a blanket or country-specific waiver provision,” the US state department said in a statement.

, “We urge all of our allies and partners to forgo transactions with Russia that risk triggering sanctions under the Countering America's Adversaries Through Sanctions Act (CAATSA),” the US urged through its statement while highlighting that it values its strategic partnership with India and expects that the defence partnership between both nations can continue]


In [37]:
# Add the word frequencies of every word and calculate the sentence score for 
# for every sentence to determine the important sentences
sentence_scores = {}
for sent in sentence_tokens:
  for word in sent:
    if word.text.lower() in word_frequencies.keys():
      if sent not in sentence_scores.keys():
        sentence_scores[sent] = word_frequencies[word.text.lower()]
      else:
        sentence_scores[sent] += word_frequencies[word.text.lower()]

In [38]:
sentence_scores

{The US federal law allows the triggering of sanctions against nations that aim to procure major defence hardware from Russia.: 3.2000000000000006,
 This was brought as a response to Russia's annexation of Crimea in 2014 and its alleged meddling in the 2016 US presidential elections.
 : 2.1999999999999997,
 The US said in November it is yet to determine whether it will waive those sanctions with respect to the deal between India and Russia.: 1.9999999999999998,
 “We have not made a determination on a potential waiver with respect to Indian arms transactions with Russia.: 2.2,
 CAATSA does not have a blanket or country-specific waiver provision,” the US state department said in a statement.
 : 3.1999999999999997,
 “We urge all of our allies and partners to forgo transactions with Russia that risk triggering sanctions under the Countering America's Adversaries Through Sanctions Act (CAATSA),” the US urged through its statement while highlighting that it values its strategic partnership w

In [39]:
# Determining the percentage we want to reduce our original text into / 
# No of sentences we are going to summarize the text
select_length = int(len(sentence_tokens) * 0.3)
select_length

1

In [40]:
# Got the sentences with most sentence_scores
summary = nlargest(select_length, sentence_scores, key = sentence_scores.get)

In [41]:
summary

[“We urge all of our allies and partners to forgo transactions with Russia that risk triggering sanctions under the Countering America's Adversaries Through Sanctions Act (CAATSA),” the US urged through its statement while highlighting that it values its strategic partnership with India and expects that the defence partnership between both nations can continue]

In [42]:
final_summary = [word.text for word in summary]
final_summary

["“We urge all of our allies and partners to forgo transactions with Russia that risk triggering sanctions under the Countering America's Adversaries Through Sanctions Act (CAATSA),” the US urged through its statement while highlighting that it values its strategic partnership with India and expects that the defence partnership between both nations can continue"]

In [None]:
text

In [None]:
summary = " ".join(final_summary)
summary

In [45]:
len(text)

986

In [46]:
len(summary)

361