# Text Mining: Regex, List Comprehension, Spacy

* Regular Expression
* Using Spacy for text normalization & parsing

In [1]:
# define a text variable
# either ' or " are fine

text = 'Welcome to LIS 875!'
text

'Welcome to LIS 875!'

In [2]:
# a text is just a sequence of characters (including whitespace and other punctuations)

characters = [char for char in text]
characters

['W',
 'e',
 'l',
 'c',
 'o',
 'm',
 'e',
 ' ',
 't',
 'o',
 ' ',
 'L',
 'I',
 'S',
 ' ',
 '8',
 '7',
 '5',
 '!']

In [3]:
# lowercase

text.lower()

'welcome to lis 875!'

In [4]:
# uppercase

text.upper()

'WELCOME TO LIS 875!'

In [5]:
# How to do title case? I can't remember! Let's just Google it!

text

'Welcome to LIS 875!'

In [6]:
# Tip: programming suggestion may help a lot (at least you don't need to remember every function names)
# If you using a plain Jupyter notebook, you may need to press Tab to activate the suggestions.

# put a . after text to activate suggestions in Google colab
text

'Welcome to LIS 875!'

In [7]:
# Tip: use ? for documentation

text.capitalize?

In [8]:
text.capitalize()

'Welcome to lis 875!'

In [9]:
# A few useful shortcuts:
# CTRL + Enter: run the current cell
# SHIFT + Enter: run the current cell and move to the next cell
#
# See Tools -> Keyboard Shortcuts for all shortcuts

In [10]:
# concatenate strings

'Welcome' + 'to' + 'LIS' + '501' + '!'

'WelcometoLIS501!'

In [11]:
# concatenate a list of strings with a separator

' '.join( [ 'Welcome', 'to', 'LIS', '501', '!' ] )

'Welcome to LIS 501 !'

In [12]:
# repeat a string

'abc'*3

'abcabcabc'

In [13]:
text

'Welcome to LIS 875!'

In [14]:
# concat string and other objects -- casting them to string first (otherwise it throws an exception)

text + str(12)

'Welcome to LIS 875!12'

In [15]:
# length of a string (how many characters)

len(text)

19

In [16]:
# get characters from a text

text[1]

'e'

In [17]:
# string indexing: -len to (len-1)

text[0:3]

'Wel'

In [18]:
# negative indexing is equivalent to len(text) - negative_number
text[0:-3]

'Welcome to LIS 8'

In [19]:
# split text by a separator
text.split()

['Welcome', 'to', 'LIS', '875!']

In [20]:
text.split?

## In-class Exercise 1

In [21]:
name = 'First Last'
name

'First Last'

In [22]:
# Class Exercise: get first & last name initials
# For example, if name = 'Peter Pan', you should get 'PP'

# step 1: split it
# you should aim to get [ 'First', 'Last' ]

name.split()

['First', 'Last']

In [23]:
# step 2: get the first letter of each part
# you should aim to get [ 'F', 'L' ]

[ n[0] for n in name.split() ]

['F', 'L']

In [24]:
# step 3: concat them together
# you should aim to get 'FL'

''.join( [ n[0] for n in name.split() ] )

'FL'

## Regular Expression

Just some basic examples; more tutorial can be found at: https://www.w3schools.com/python/python_regex.asp

In [25]:
import re

text = 'Welcome to LIS 501!'

# [A-Z] matches any uppercase character
# [A-Z]+ matches any uppercase character appearing one to many times
re.findall( "[A-Z]+", text ) # found all substrings that are all upercase

['W', 'LIS']

In [26]:
# [0-9] matches any digit
# [0-9]+ matches any digit appearing one to many times
re.findall( "[0-9]+", text ) # found all substrings that are all digits

['501']

In [27]:
# . matches any character
re.findall( "W.+o", text ) # found all substrings that starts with a 'W' and ends with an 'o'; "greedy" match, match as much text as possible

['Welcome to']

In [28]:
re.findall( "W.+?o", text ) # found all substrings that starts with a 'W' and ends with an 'o'; "reluctant" match, match as few text as possible

['Welco']

In [29]:
# \s matches any whitespace
re.split( "\s+", text ) # split by 1 or more whitespace

  re.split( "\s+", text ) # split by 1 or more whitespace


['Welcome', 'to', 'LIS', '501!']

In [30]:
re.sub( "\s+", '[WHITESPACE]', text ) # replace 1 or more whitespace by [WHITESPACE]

  re.sub( "\s+", '[WHITESPACE]', text ) # replace 1 or more whitespace by [WHITESPACE]


'Welcome[WHITESPACE]to[WHITESPACE]LIS[WHITESPACE]501!'

In [31]:
text = "Contact us at support@example.com or sales@example.org."
emails = re.findall(r'\b[\w\.-]+@[\w\.-]+\.\w+\b', text)
print(emails)  # Output: ['support@example.com', 'sales@example.org']

['support@example.com', 'sales@example.org']


In [32]:
text = "Hello,   world!!!  Welcome to regex."
cleaned_text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
cleaned_text = re.sub(r'\s+', ' ', cleaned_text)  # Replace multiple spaces with one
print(cleaned_text)  # Output: "Hello world Welcome to regex"

Hello world Welcome to regex


In [33]:
email_demo = "It was a crisp fall morning at the University of Wisconsin-Madison, and the campus was buzzing with excitement as students and faculty prepared for the annual Data Science Symposium. In the bustling atrium of the Discovery Building, a small group of students gathered around a table piled high with pamphlets, coffee cups, and name tags. Among them was Alex Carter, a senior majoring in Computer Science. Alex had been working tirelessly on a project that would be showcased at the event, and he was eager to share his work with anyone who would listen. As he sipped his coffee, Alex received an email notification on his phone from his project advisor, Dr. Sarah Thompson. The message read: \"Alex, please send the latest draft of your presentation slides to sarah.thompson@wisc.edu by noon. Looking forward to seeing your progress!\" Alex quickly typed a reply, making sure to attach the slides. He glanced around the room, spotting his friend and fellow data enthusiast, Priya Patel, who was intently discussing her research on machine learning models with a group of underclassmen. Priya’s research was focused on improving predictive algorithms, and she was always willing to lend a helping hand to those interested in the field. Alex made a mental note to catch up with her later and send her an email at priya.patel@wisc.edu to get her feedback on his presentation. Not far from them, Michael Rodriguez was deeply engrossed in a conversation with Dr. Emily Chen, a leading expert in natural language processing. Michael had spent the summer working in Dr. Chen’s lab, and they had developed a novel approach to sentiment analysis that they were eager to present. \"Send me the updated dataset when you get a chance,\" Dr. Chen said, handing him her card. \"You can use emily.chen@wisc.edu.\" Michael nodded, already planning his follow-up email. Meanwhile, in the corner of the room, Jessica Miller and her friend Rachel Evans were setting up their poster on ethical AI. They were both part of the university’s ethics in technology group, and their research had been well-received in past symposiums. Jessica noticed that their poster had a small typo and whispered to Rachel, \"I’ll fix it and resend it to you. What’s your email again?\" Rachel smiled and replied, \"It’s rachel.evans@wisc.edu, and don’t worry about it—our project still looks great!\" Across the room, James Lee was chatting with a visiting professor from another university, but his mind was on the workshop he was leading later that day. He had designed an interactive session on data visualization techniques, and he hoped it would be both informative and engaging. He remembered that he still needed to send the final workshop outline to his co-presenter, Natalie Brown. He quickly typed a reminder on his phone: \"Send outline to natalie.brown@wisc.edu.\" At a nearby table, Hannah Johnson was busy reviewing her notes for a panel discussion on big data challenges. As she skimmed through her slides, she realized she needed some last-minute input from her collaborator, David Wilson. She sent him a quick message: \"Can you check slide 4 and let me know your thoughts? Send any suggestions to hannah.johnson@wisc.edu.\" By the end of the day, the symposium was deemed a success, with lively discussions, valuable networking, and promising research presented. As the crowd began to disperse, Alex gathered his things and headed toward the exit, feeling proud of what he and his peers had accomplished. Before he left, he checked his phone one last time and noticed an email from his friend and mentor, Olivia Martinez. Her email, olivia.martinez@wisc.edu, had become a frequent sight in his inbox throughout the semester. \“Great job today,” the email read. “You’re on your way to doing amazing things.”"

In [34]:
email_demo

'It was a crisp fall morning at the University of Wisconsin-Madison, and the campus was buzzing with excitement as students and faculty prepared for the annual Data Science Symposium. In the bustling atrium of the Discovery Building, a small group of students gathered around a table piled high with pamphlets, coffee cups, and name tags. Among them was Alex Carter, a senior majoring in Computer Science. Alex had been working tirelessly on a project that would be showcased at the event, and he was eager to share his work with anyone who would listen. As he sipped his coffee, Alex received an email notification on his phone from his project advisor, Dr. Sarah Thompson. The message read: "Alex, please send the latest draft of your presentation slides to sarah.thompson@wisc.edu by noon. Looking forward to seeing your progress!" Alex quickly typed a reply, making sure to attach the slides. He glanced around the room, spotting his friend and fellow data enthusiast, Priya Patel, who was intent

In [35]:
# Regex pattern to extract last names
pattern = r'\b[a-z]+\.(\w+)@wisc\.edu\b'

# Extract all last names
last_names = re.findall(pattern, email_demo)

print(last_names)

['thompson', 'patel', 'chen', 'evans', 'brown', 'johnson', 'martinez']


##List Comprehension

- Concise and powerful way to create lists in Python
- Allows you to generate lists by iterating over an object (list, string, tuple)

###Basic Structure of List Comprehension

[**expression** for **item** in *iterable* if *condition*]

- **expression**: What you want each item in the new list to be. This could be just the item itself, or something derived from it.
- **for item in iterable**: The loop that goes through each item in the original iterable (like a list or range).
- **if condition** (optional): A filter that only includes items that satisfy this condition.

In [36]:
numbers = [1, 2, 3, 4, 5]
squares = [n**2 for n in numbers]

In [37]:
#This will create a list of squares of the numbers in "numbers". The expression n**2 calculates the square of each number.
squares

[1, 4, 9, 16, 25]

In [38]:
#List Comprehension with a Condition

new_numbers = [n for n in numbers if n >=3]

In [39]:
new_numbers

[3, 4, 5]

In [40]:
#Nested List Comprehension
pairs = [(x, y) for x in range(3) for y in range(3)]

In [41]:
pairs

[(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0), (2, 1), (2, 2)]

## Using Spacy for Text Normalization and Parsing
- Sentence segmentation, tokenization, stop words removal, lemmatization (stemming)
- POS tagging, NP chunking, Named Entity Recognition
- https://spacy.io/

In [42]:
# make sure the required python packages are installed

# install nltk (we'll use 3.6.7 in Fall 2022)
!pip install nltk==3.6.7 --upgrade

# install spacy (we'll use 3.2.1 in Fall 2022)
!pip install spacy==3.2.1 --upgrade

# download the spacy en_core_web_sm model (3.2.0 version)
!python -m spacy download en_core_web_sm-3.2.0 --direct

Collecting nltk==3.6.7
  Downloading nltk-3.6.7-py3-none-any.whl.metadata (2.8 kB)
Downloading nltk-3.6.7-py3-none-any.whl (1.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nltk
  Attempting uninstall: nltk
    Found existing installation: nltk 3.9.1
    Uninstalling nltk-3.9.1:
      Successfully uninstalled nltk-3.9.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
textblob 0.19.0 requires nltk>=3.9, but you have nltk 3.6.7 which is incompatible.[0m[31m
[0mSuccessfully installed nltk-3.6.7
Collecting spacy==3.2.1
  Downloading spacy-3.2.1.tar.gz (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25h  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [3

In [43]:
import spacy
import en_core_web_sm

nlp = en_core_web_sm.load()

In [44]:
name = "My name is Charlie Brown."

In [45]:
[t for t in name]

['M',
 'y',
 ' ',
 'n',
 'a',
 'm',
 'e',
 ' ',
 'i',
 's',
 ' ',
 'C',
 'h',
 'a',
 'r',
 'l',
 'i',
 'e',
 ' ',
 'B',
 'r',
 'o',
 'w',
 'n',
 '.']

In [46]:
type(name)

str

In [47]:
[t for t in nlp(name)]

[My, name, is, Charlie, Brown, .]

In [48]:
doc = nlp(name)

In [49]:
type(doc) #We know this is a special type of object or collection because we were able to loop over it
#And every word in the text has an associated member or token in this collection

spacy.tokens.doc.Doc

In [50]:
t = doc[0]

In [51]:
type(t)

spacy.tokens.token.Token

In [52]:
#Now the cool thing about these tokens is that they have lots of pre-defined properties (use "dot")
#t.

In [53]:
#Another cool way to look at these properties is by visualizing them.
#This sort of displays the dependency graph of the document we gave it.

from spacy import displacy

displacy.render(doc, jupyter=True)

In [54]:
spacy.explain('poss')

'possession modifier'

In [55]:
spacy.explain('nsubj')

'nominal subject'

In [56]:
#these properties are not just available in a visualization
#they are also available in a Spacy library

#whether something is a noun or verb is called a part-of-speech
#and the dependency structure we can access by call dep_

for t in doc:
    print(t, t.pos_, t.dep_)

My PRON poss
name NOUN nsubj
is AUX ROOT
Charlie PROPN compound
Brown PROPN attr
. PUNCT punct


In [57]:
raw = "Charlie Brown is a beloved character from the comic strip Peanuts, created by Charles Schulz. Known for his kind heart and persistent optimism, Charlie often finds himself facing challenges, whether it's losing at baseball or missing his chance to kick a football. Despite his frequent failures, he continues to try again, embodying a sense of resilience and hope. His loyal friends, like Linus and Snoopy, support him, but he also deals with teasing from characters like Lucy. Charlie Brown’s relatable struggles and enduring spirit make him a timeless symbol of perseverance."
text = nlp(raw)

In [58]:
print(text)

Charlie Brown is a beloved character from the comic strip Peanuts, created by Charles Schulz. Known for his kind heart and persistent optimism, Charlie often finds himself facing challenges, whether it's losing at baseball or missing his chance to kick a football. Despite his frequent failures, he continues to try again, embodying a sense of resilience and hope. His loyal friends, like Linus and Snoopy, support him, but he also deals with teasing from characters like Lucy. Charlie Brown’s relatable struggles and enduring spirit make him a timeless symbol of perseverance.


In [59]:
# sentence segmentation

sentences = list(text.sents)
sentences[0]

Charlie Brown is a beloved character from the comic strip Peanuts, created by Charles Schulz.

In [60]:
# tokenization

[token.text for token in sentences[0]] # tokens in the first sentence

['Charlie',
 'Brown',
 'is',
 'a',
 'beloved',
 'character',
 'from',
 'the',
 'comic',
 'strip',
 'Peanuts',
 ',',
 'created',
 'by',
 'Charles',
 'Schulz',
 '.']

In [61]:
# just in case you are not familiar with list comprehension

# equivalent for loop

tokens = []
for sent in text.sents:
    for token in sent:
        tokens.append(token.text)

[ token.text for sent in text.sents for token in sent ] # tokens in all the sentences; list comprehension

['Charlie',
 'Brown',
 'is',
 'a',
 'beloved',
 'character',
 'from',
 'the',
 'comic',
 'strip',
 'Peanuts',
 ',',
 'created',
 'by',
 'Charles',
 'Schulz',
 '.',
 'Known',
 'for',
 'his',
 'kind',
 'heart',
 'and',
 'persistent',
 'optimism',
 ',',
 'Charlie',
 'often',
 'finds',
 'himself',
 'facing',
 'challenges',
 ',',
 'whether',
 'it',
 "'s",
 'losing',
 'at',
 'baseball',
 'or',
 'missing',
 'his',
 'chance',
 'to',
 'kick',
 'a',
 'football',
 '.',
 'Despite',
 'his',
 'frequent',
 'failures',
 ',',
 'he',
 'continues',
 'to',
 'try',
 'again',
 ',',
 'embodying',
 'a',
 'sense',
 'of',
 'resilience',
 'and',
 'hope',
 '.',
 'His',
 'loyal',
 'friends',
 ',',
 'like',
 'Linus',
 'and',
 'Snoopy',
 ',',
 'support',
 'him',
 ',',
 'but',
 'he',
 'also',
 'deals',
 'with',
 'teasing',
 'from',
 'characters',
 'like',
 'Lucy',
 '.',
 'Charlie',
 'Brown',
 '’s',
 'relatable',
 'struggles',
 'and',
 'enduring',
 'spirit',
 'make',
 'him',
 'a',
 'timeless',
 'symbol',
 'of',
 'pers

In [62]:
# all tokens in the text

[token.text for token in text]

['Charlie',
 'Brown',
 'is',
 'a',
 'beloved',
 'character',
 'from',
 'the',
 'comic',
 'strip',
 'Peanuts',
 ',',
 'created',
 'by',
 'Charles',
 'Schulz',
 '.',
 'Known',
 'for',
 'his',
 'kind',
 'heart',
 'and',
 'persistent',
 'optimism',
 ',',
 'Charlie',
 'often',
 'finds',
 'himself',
 'facing',
 'challenges',
 ',',
 'whether',
 'it',
 "'s",
 'losing',
 'at',
 'baseball',
 'or',
 'missing',
 'his',
 'chance',
 'to',
 'kick',
 'a',
 'football',
 '.',
 'Despite',
 'his',
 'frequent',
 'failures',
 ',',
 'he',
 'continues',
 'to',
 'try',
 'again',
 ',',
 'embodying',
 'a',
 'sense',
 'of',
 'resilience',
 'and',
 'hope',
 '.',
 'His',
 'loyal',
 'friends',
 ',',
 'like',
 'Linus',
 'and',
 'Snoopy',
 ',',
 'support',
 'him',
 ',',
 'but',
 'he',
 'also',
 'deals',
 'with',
 'teasing',
 'from',
 'characters',
 'like',
 'Lucy',
 '.',
 'Charlie',
 'Brown',
 '’s',
 'relatable',
 'struggles',
 'and',
 'enduring',
 'spirit',
 'make',
 'him',
 'a',
 'timeless',
 'symbol',
 'of',
 'pers

In [63]:
sentences[0][0].lemma_

'Charlie'

In [64]:
[ (token.text, token.is_stop) for token in sentences[0] ] # a list of tokens in the first sentence and if they are stop words

[('Charlie', False),
 ('Brown', False),
 ('is', True),
 ('a', True),
 ('beloved', False),
 ('character', False),
 ('from', True),
 ('the', True),
 ('comic', False),
 ('strip', False),
 ('Peanuts', False),
 (',', False),
 ('created', False),
 ('by', True),
 ('Charles', False),
 ('Schulz', False),
 ('.', False)]

In [65]:
[ token.text for token in sentences[0] if not token.is_stop ] # only listing the tokens that are not stop words in sentences[0]

['Charlie',
 'Brown',
 'beloved',
 'character',
 'comic',
 'strip',
 'Peanuts',
 ',',
 'created',
 'Charles',
 'Schulz',
 '.']

In [66]:
# only listing the tokens that are not stop words or punctuation
[ token.text for token in sentences[0] if not token.is_stop and not token.is_punct ]

['Charlie',
 'Brown',
 'beloved',
 'character',
 'comic',
 'strip',
 'Peanuts',
 'created',
 'Charles',
 'Schulz']

In [67]:
# list the lowercased tokens and lemma
[ (token.text, token.lower_, token.lemma_) for token in sentences[0] if not token.is_stop and not token.is_punct ]

[('Charlie', 'charlie', 'Charlie'),
 ('Brown', 'brown', 'Brown'),
 ('beloved', 'beloved', 'beloved'),
 ('character', 'character', 'character'),
 ('comic', 'comic', 'comic'),
 ('strip', 'strip', 'strip'),
 ('Peanuts', 'peanuts', 'Peanuts'),
 ('created', 'created', 'create'),
 ('Charles', 'charles', 'Charles'),
 ('Schulz', 'schulz', 'Schulz')]

In [68]:
# just in case if you are interested in using Porter Stemming

from nltk.stem.porter import *

porter = PorterStemmer()

[ (token.text, token.lower_, token.lemma_, porter.stem(token.text)) for token in sentences[0] if not token.is_stop and not token.is_punct ]

[('Charlie', 'charlie', 'Charlie', 'charli'),
 ('Brown', 'brown', 'Brown', 'brown'),
 ('beloved', 'beloved', 'beloved', 'belov'),
 ('character', 'character', 'character', 'charact'),
 ('comic', 'comic', 'comic', 'comic'),
 ('strip', 'strip', 'strip', 'strip'),
 ('Peanuts', 'peanuts', 'Peanuts', 'peanut'),
 ('created', 'created', 'create', 'creat'),
 ('Charles', 'charles', 'Charles', 'charl'),
 ('Schulz', 'schulz', 'Schulz', 'schulz')]

In [69]:
# print each token and its part-of-speech (POS) tags in sentences[0]

[ (token.text, token.tag_) for token in sentences[0] ]

[('Charlie', 'NNP'),
 ('Brown', 'NNP'),
 ('is', 'VBZ'),
 ('a', 'DT'),
 ('beloved', 'JJ'),
 ('character', 'NN'),
 ('from', 'IN'),
 ('the', 'DT'),
 ('comic', 'JJ'),
 ('strip', 'NN'),
 ('Peanuts', 'NNP'),
 (',', ','),
 ('created', 'VBN'),
 ('by', 'IN'),
 ('Charles', 'NNP'),
 ('Schulz', 'NNP'),
 ('.', '.')]

In [70]:
# print each noun phrase (NP) and its starting & ending token in sentences[0]

[ ( np.text, np.start, np.end ) for np in sentences[0].noun_chunks ]

[('Charlie Brown', 0, 2),
 ('a beloved character', 3, 6),
 ('the comic strip Peanuts', 7, 11),
 ('Charles Schulz', 14, 16)]

In [71]:
# print each entity, its entity type, and its starting & ending token in sentences[0]
# GPE stands for Geo-Political Entity
# CARDINAL stands for Cardinal Entity

[ ( ent.text, ent.label_, ent.start, ent.end ) for ent in sentences[0].ents ]

[('Charlie Brown', 'PERSON', 0, 2), ('Charles Schulz', 'PERSON', 14, 16)]

In [72]:
# you can also get the IOB-style entity tags for each token

[ ( token.text, token.ent_iob_, token.ent_type_ ) for token in sentences[0] ]

[('Charlie', 'B', 'PERSON'),
 ('Brown', 'I', 'PERSON'),
 ('is', 'O', ''),
 ('a', 'O', ''),
 ('beloved', 'O', ''),
 ('character', 'O', ''),
 ('from', 'O', ''),
 ('the', 'O', ''),
 ('comic', 'O', ''),
 ('strip', 'O', ''),
 ('Peanuts', 'O', ''),
 (',', 'O', ''),
 ('created', 'O', ''),
 ('by', 'O', ''),
 ('Charles', 'B', 'PERSON'),
 ('Schulz', 'I', 'PERSON'),
 ('.', 'O', '')]

## In-class Exercise 2
* Counting the most frequent nouns (NN, NNP, NNS, or NNPS) in the provided text.

In [73]:
raw = "Netanyahu's visit was cut short by reports late Sunday that a rocket was fired from Gaza into central Israel, wounding at least seven people. Following criticism from political opponents over what they consider the prime minister's unclear stance toward the militant political group, Israel responded with a series of strikes into Gaza against Hamas, which largely governs the contested strip. President Donald Trump tacitly endorsed the strike following his meetings with Netanyahu, calling the Hamas attack \"despicable.\""

text = nlp(raw)

In [74]:
from collections import Counter

# your solution

# write a list comprehension [] for all tokens' lemma
# use Counter([ your list comprehension ]).most_common()

Counter( [ t.lower_ for t in text if t.tag_ in ['NN', 'NNP', 'NNS', 'NNPS'] ] ).most_common()

[('netanyahu', 2),
 ('gaza', 2),
 ('israel', 2),
 ('hamas', 2),
 ('visit', 1),
 ('reports', 1),
 ('sunday', 1),
 ('rocket', 1),
 ('people', 1),
 ('criticism', 1),
 ('opponents', 1),
 ('minister', 1),
 ('stance', 1),
 ('group', 1),
 ('series', 1),
 ('strikes', 1),
 ('strip', 1),
 ('president', 1),
 ('donald', 1),
 ('trump', 1),
 ('strike', 1),
 ('meetings', 1),
 ('attack', 1)]