In [1]:
from gensim.utils import simple_preprocess
from gensim.models.phrases import Phrases, Phraser
from gensim import corpora
from gensim.similarities import Similarity
import pandas as pd
import glob
import re

## Joplin

### First filter out articles that appear in multiple outlets in the DMA

In [7]:
#Bringing in the Joplin data

files_joplin = glob.glob("/home/mazz76/jupyter/Civic_Info_Project/Final_data_Joplin/*.csv")

df = []
for f in files_joplin:
    csv = pd.read_csv(f)
    df.append(csv)
joplin_df = pd.concat(df)

#Keep only one instance of each story in the dataframe
joplin_df.drop_duplicates(subset=['text'], inplace=True)
joplin_df.to_csv('final_joplin.csv')

In [153]:
#Now we have all of the local articles from the Joplin DMA (TV + Newspaper + Radio + Magazine). We need to find the
#exact and close matches to determine an originality score for the publication

texts = []

for x in df['text']:
    texts.append(x)
    
texts_cleaned = []
texts_cleaned[:] = [x for x in texts if isinstance(x,str)]
documents = texts_cleaned

In [154]:
#Converting documents to collection of words
#Deacc=True strips the punctuation accents and numbers

words = [[text for text in simple_preprocess(doc, deacc=True)] for doc in documents]

In [155]:
#Build a bigram model -- but using the Gensim library, which only stores common bigrams (saves on time and space)

bigram = Phrases(words, min_count=1)
bigram_phraser = Phraser(bigram)

In [156]:
#Reconvert documents to collection of words/bigrams
texts_bigrams = [[text for text in bigram_phraser[ simple_preprocess(doc, deacc=True)]] for doc in documents]

In [157]:
#Using a Gensim dictionary
dictionary = corpora.Dictionary(texts_bigrams)

In [158]:
#Creating a Gensim corpus
#Basically uses numbers which speeds up the process substantially
corpus = [dictionary.doc2bow(docString) for docString in texts_bigrams]

In [159]:
#Build similarity index
#Calculates the pairwise cosine similarity of each document
index = Similarity(corpus=corpus,
                   num_features=len(dictionary),
                   output_prefix='on_disk_output')

In [160]:
doc_id = 0
similar_docs = {}
for similarities in index:
    similar_docs[doc_id] = list(enumerate(similarities))
    doc_id += 1

In [161]:
#Setting a similarity threshold
sim_threshold = 0.9


In [162]:
exact_matches = []

for doc_id, sim_doc_tuples in similar_docs.items():
    for sim_doc_tuple in sim_doc_tuples:
        sim_doc_id = sim_doc_tuple[0]
        sim_score = sim_doc_tuple[1]
        if sim_score >= sim_threshold and doc_id != sim_doc_id and sim_score==1:
            exact_matches.append(documents[doc_id])
            exact_matches.append(documents[sim_doc_id])

In [163]:
exact_matches = list(set(exact_matches))
exact_matches

[]

In [164]:
close_matches = []

for doc_id, sim_doc_tuples in similar_docs.items():
    for sim_doc_tuple in sim_doc_tuples:
        sim_doc_id = sim_doc_tuple[0]
        sim_score = sim_doc_tuple[1]
        if 0.9 <= sim_score < 0.99 and doc_id != sim_doc_id:
            close_matches.append(documents[doc_id])
            close_matches.append(documents[sim_doc_id])

In [165]:
close_matches = list(set(close_matches))
len(close_matches)

22

In [166]:
#Appending a repeated article flag to our dataframe

df['Repeated_Article'] = df.isin(close_matches).any(1).astype(int)
df

#df.to_csv('joplin_repeated_articles.csv')

Unnamed: 0.1,Unnamed: 0,text,places,long,lat,outlet,Repeated_Article
0,0,After weeks and weeks of anticipation artCentr...,1110 East Thirteenth Street carthage Missouri,-94.297779,37.165091,Carthage,0
1,1,The University of Missouri’s Southwest Researc...,"Southwest Research, Extension and Education Ce...",-93.879303,37.073132,Carthage,1
2,2,As I sat down to write this final version of t...,Jasper County Juvenile Court Missouri,-94.517166,37.085823,Carthage,0
3,3,"CARTHAGE, Mo. — A slow first quarter by the Ca...",carthage high school CARTHAGE Missouri,-94.299023,37.145331,Carthage,0
4,4,"CARTHAGE, Mo. — After a sluggish start over mu...",carthage high school CARTHAGE Missouri,-94.299023,37.145331,Carthage,0
...,...,...,...,...,...,...,...
14,14,The Vernon County Commission met in regular se...,vernon county missouri,-94.315422,37.814849,Nevada Daily Mail,0
15,15,The filing period for two expiring Nevada City...,nevada missouri city hall,-94.354672,37.839205,Nevada Daily Mail,0
16,16,Tuesday evening at Wynn Gymnasium in Nevada Hi...,nevada high school Missouri,,,Nevada Daily Mail,0
17,17,The Nevada Regional Medical Center Board of Di...,nevada regional medical center missouri,-94.360528,37.830908,Nevada Daily Mail,0


In [174]:
#Detecting presence of a quote

quote_match = df['text'].str.contains(r'“(.*?)”')
df['direct_quote'] = quote_match

#But I want to make sure that it's actually a quote and not just some term that is in quotes
#I will search for the presence of the word says or said

quote_language_match = df['text'].lower.str.contains('said|says|told|explained|noted|added|saying|notes|adds')
df['quote_language'] = quote_language_match


#Detecting presence of press release language

press_release_match = df['text'].lower.str.contains('press release|statement|release')
df['press_release_language'] = press_release_match

df

#Assessing the originality
def originality(row):
    if row['direct_quote'] == True & row['quote_language'] == True:
        val = 1
    else:
        val = 0
    return val

df['sourced_article'] = df.apply(originality, axis=1)
df

df.to_csv('joplin_repeated_articles.csv')


  return func(self, *args, **kwargs)


In [148]:
for x in df['text']:
    print(x)
    print(' ')

After weeks and weeks of anticipation artCentral’s HOLIDAY BOUTIQUE CELEBRATION WEEKEND, underwritten by OLD MISSOURI BANK, is finally here with Hyde House open for your shopping pleasure December 2 and 3, Friday and Saturday, 12:00 noon until 8:00 p. m. at 1110 East Thirteenth Street in Carthage. The HOLIDAY BOUTIQUE gala CLOSING CELEBRATION will take place Saturday, December 3, 5:00-8:00 p. m. , with holiday sweets and savories and libations, too. The SILENT AUCTION bidding concludes at 7:00 p. m. Beautiful, historic Hyde House is shining at her most elegant! While the towering holly trees stand brightly festooned with their seasonal red berries their twinkle lights sparkle all way round! On stair and porch railings accented with scarlet velvet bows, swags and wreaths of winter greenery are twinkling, too, ready to welcome you inside to artCentral’s magical galleries abundantly filled with exquisite artist-created gifting and decorating items for your family and friends and colleague

## Springfield

In [6]:
#Bringing in the Springfield data

files_springfield = glob.glob("/home/mazz76/jupyter/Civic_Info_Project/Final_data_springfield/*.csv")

df = []
for f in files_springfield:
    csv = pd.read_csv(f)
    df.append(csv)
springfield_df = pd.concat(df)

#Keep only one instance of each story in the dataframe
springfield_df.drop_duplicates(subset=['text'], inplace=True)
springfield_df.to_csv('final_springfield.csv')

In [70]:
texts = []

for x in df['text']:
    texts.append(x)
    
texts_cleaned = []
texts_cleaned[:] = [x for x in texts if isinstance(x,str)]
documents = texts_cleaned

#Converting documents to collection of words
#Deacc=True strips the punctuation accents and numbers

words = [[text for text in simple_preprocess(doc, deacc=True)] for doc in documents]

#Build a bigram model -- but using the Gensim library, which only stores common bigrams (saves on time and space)

bigram = Phrases(words, min_count=1)
bigram_phraser = Phraser(bigram)

#Reconvert documents to collection of words/bigrams
texts_bigrams = [[text for text in bigram_phraser[ simple_preprocess(doc, deacc=True)]] for doc in documents]

#Using a Gensim dictionary
dictionary = corpora.Dictionary(texts_bigrams)

#Creating a Gensim corpus
#Basically uses numbers which speeds up the process substantially
corpus = [dictionary.doc2bow(docString) for docString in texts_bigrams]

#Build similarity index
#Calculates the pairwise cosine similarity of each document
index = Similarity(corpus=corpus,
                   num_features=len(dictionary),
                   output_prefix='on_disk_output')

doc_id = 0
similar_docs = {}
for similarities in index:
    similar_docs[doc_id] = list(enumerate(similarities))
    doc_id += 1
    
#Setting a similarity threshold
sim_threshold = 0.9

exact_matches = []

for doc_id, sim_doc_tuples in similar_docs.items():
    for sim_doc_tuple in sim_doc_tuples:
        sim_doc_id = sim_doc_tuple[0]
        sim_score = sim_doc_tuple[1]
        if sim_score >= sim_threshold and doc_id != sim_doc_id and sim_score==1:
            exact_matches.append(documents[doc_id])
            exact_matches.append(documents[sim_doc_id])
            
exact_matches = list(set(exact_matches))
exact_matches

[]

In [75]:
close_matches = []

for doc_id, sim_doc_tuples in similar_docs.items():
    for sim_doc_tuple in sim_doc_tuples:
        sim_doc_id = sim_doc_tuple[0]
        sim_score = sim_doc_tuple[1]
        if 0.92 <= sim_score < 0.99 and doc_id != sim_doc_id:
            close_matches.append(documents[doc_id])
            close_matches.append(documents[sim_doc_id])

close_matches = list(set(close_matches))
len(close_matches)

25

In [73]:
#Appending a repeated article flag to our dataframe

df['Repeated_Article'] = df.isin(close_matches).any(1).astype(int)
df

df.to_csv('springfield_repeated_articles.csv')

## STL

In [5]:
#Bringing in the STL data

files_stl = glob.glob("/home/mazz76/jupyter/Civic_Info_Project/Final_data_STL/*.csv")

df = []
for f in files_stl:
    csv = pd.read_csv(f)
    df.append(csv)
stl_df = pd.concat(df)

#Keep only one instance of each story in the dataframe
stl_df.drop_duplicates(subset=['text'], inplace=True)
stl_df.to_csv('final_stl.csv')


## Trying the similarity checker with all of the articles together

In [2]:
from gensim.utils import simple_preprocess
from gensim.models.phrases import Phrases, Phraser
from gensim import corpora
from gensim.similarities import Similarity
import pandas as pd
import glob
import re

In [3]:
data = []

In [4]:
joplin = pd.read_csv("/home/mazz76/jupyter/Civic_Info_Project/Final_data_Joplin/final_joplin.csv")
springfield = pd.read_csv('/home/mazz76/jupyter/Civic_Info_Project/Final_data_springfield/final_springfield.csv')
stl = pd.read_csv('/home/mazz76/jupyter/Civic_Info_Project/Final_data_STL/final_stl.csv')


pdList = [joplin,springfield,stl]  # List of your dataframes
all_data = pd.concat(pdList)
all_data.drop_duplicates(subset=['text'], inplace=True)
all_data.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,text,places,long,lat,outlet,DMA
0,0,0,After weeks and weeks of anticipation artCentr...,1110 East Thirteenth Street carthage Missouri,-94.297779,37.165091,Carthage,joplin
1,1,1,The University of Missouri’s Southwest Researc...,"Southwest Research, Extension and Education Ce...",-93.879303,37.073132,Carthage,joplin
2,2,2,As I sat down to write this final version of t...,Jasper County Juvenile Court Missouri,-94.517166,37.085823,Carthage,joplin
3,3,3,"CARTHAGE, Mo. — A slow first quarter by the Ca...",carthage high school CARTHAGE Missouri,-94.299023,37.145331,Carthage,joplin
4,4,4,"CARTHAGE, Mo. — After a sluggish start over mu...",carthage high school CARTHAGE Missouri,-94.299023,37.145331,Carthage,joplin


In [5]:
#Now we have all of the local articles from the Joplin, Springfield and STL DMAs (TV + Newspaper + Radio + Magazine). We need to find the
#exact and close matches to determine an originality score for the publication

texts = []

for x in all_data['text']:
    texts.append(x)
    
texts_cleaned = []
texts_cleaned[:] = [x for x in texts if isinstance(x,str)]
documents = texts_cleaned

In [6]:
#Converting documents to collection of words
#Deacc=True strips the punctuation accents and numbers

words = [[text for text in simple_preprocess(doc, deacc=True)] for doc in documents]

#Build a bigram model -- but using the Gensim library, which only stores common bigrams (saves on time and space)
bigram = Phrases(words, min_count=1)
bigram_phraser = Phraser(bigram)

#Reconvert documents to collection of words/bigrams
texts_bigrams = [[text for text in bigram_phraser[ simple_preprocess(doc, deacc=True)]] for doc in documents]

#Using a Gensim dictionary
dictionary = corpora.Dictionary(texts_bigrams)

#Creating a Gensim corpus
#Basically uses numbers which speeds up the process substantially
corpus = [dictionary.doc2bow(docString) for docString in texts_bigrams]

#Build similarity index
#Calculates the pairwise cosine similarity of each document
index = Similarity(corpus=corpus,
                   num_features=len(dictionary),
                   output_prefix='on_disk_output')

doc_id = 0
similar_docs = {}
for similarities in index:
    similar_docs[doc_id] = list(enumerate(similarities))
    doc_id += 1
    
#Setting a similarity threshold
sim_threshold = 0.9

exact_matches = []

for doc_id, sim_doc_tuples in similar_docs.items():
    for sim_doc_tuple in sim_doc_tuples:
        sim_doc_id = sim_doc_tuple[0]
        sim_score = sim_doc_tuple[1]
        if sim_score >= sim_threshold and doc_id != sim_doc_id and sim_score==1:
            exact_matches.append(documents[doc_id])
            exact_matches.append(documents[sim_doc_id])

exact_matches = list(set(exact_matches))
exact_matches

KeyboardInterrupt: 

In [7]:
#Close Matches
close_matches = []

for doc_id, sim_doc_tuples in similar_docs.items():
    for sim_doc_tuple in sim_doc_tuples:
        sim_doc_id = sim_doc_tuple[0]
        sim_score = sim_doc_tuple[1]
        if 0.9 <= sim_score < 0.99 and doc_id != sim_doc_id:
            close_matches.append(documents[doc_id])
            close_matches.append(documents[sim_doc_id])

In [8]:
close_matches = list(set(close_matches))
len(close_matches)

212

In [None]:
#Appending a repeated article flag to our dataframe

all_data['Repeated_Article'] = all_data.isin(close_matches).any(1).astype(int)
#all_data.to_csv('repeated_articles.csv')

In [8]:
#Cleaned the articles to account for close matches within the same publication and to take out the original reference
all_data_cleaned = pd.read_csv('/home/mazz76/jupyter/Civic_Info_Project/repeated_articles_cleaned.csv')

In [9]:
#Finding how many repeated articles per outlet
all_data_cleaned.groupby('outlet').sum('Repeated_Article')

Unnamed: 0_level_0,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,long,lat,Repeated_Article
outlet,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Barry County Advertiser,10,14,14,-469.291966,183.360492,0
Benton County Enterprise,28831,324,324,-2048.841758,844.292276,0
Bolivar Herald,810,654,654,-3353.16562,1359.022768,0
Branson Tri-Lakes News,6201,3435,3435,-7268.237003,2863.107356,0
Carthage,120,120,120,-1508.815241,594.211087,1
Cassville Democrat,6674,1229,1229,-4412.48315,1724.18302,0
Cedar Republican,558,3,3,-281.077002,112.995649,0
Christian County Headliner,3325,171,171,-1772.053607,707.863879,0
Crane Chronicle,1728,76,76,-841.01822,331.133998,0
Daily Journal Park Hills,19110,21300,21300,-17771.189975,7419.36922,0


In [20]:
all_data_cleaned['text'] = all_data_cleaned['text'].str.lower()

#Detecting presence of a quote
quote_match = all_data_cleaned['text'].str.contains(r'“(.*?)”|"(.*?)"')
all_data_cleaned['direct_quote'] = quote_match

#But I want to make sure that it's actually a quote and not just some term that is in quotes
import numpy as np

#Counting the instances of quote language in each story
quote_language_list = ["acknowledge","acknowledged","acknowledges","acknowledging",
"assert","asserted","asserting","asserts","clarified","clarifies","clarify","clarifying","commented",
"exclaim","exclaimed","exclaiming","exclaims","explain","explained","explaining","explains",
"insist","insisted","insisting","insists","mention","mentioned","mentioning","mentions",
"note","noted","notes","proclaim","proclaimed","proclaiming","proclaims","reassert","reasserted",
"reasserting","reasserts","replied","replies","reply","replying","said","say","saying","says","shout","shouts","shouting","shouted","speak",
"speaks","speaking","spoke","stated","stating","suggest","suggested","suggesting","suggests",
"tell","telling","tells","told","tweet","tweeted","tweeting","tweets","write","writes","writing",
"wrote","we","I"]

def f(cell_value):
    return [((v[1])) for v in ((s, cell_value.count(s)) for s in search) if v]

search = quote_language_list
df_quote=all_data_cleaned['text'].apply(f)

all_data_cleaned['num_quote_language_matches'] = df_quote.apply(np.count_nonzero)

#Next to find only the most sourced articles, and to discount the articles where it's just one source from 
#a press release, we want the cases where there are two or more instances of quote language


well_sourced_list = []

for x in all_data_cleaned['num_quote_language_matches']:
    if x>1:
        well_sourced_list.append('Y')
    else:
        well_sourced_list.append('N')

all_data_cleaned['well_sourced'] = well_sourced_list
        
#Detecting presence of press release language

#Counting the instances of press release language in each story
press_release_list = ["press release","statement","release", "said in a statement"
"news release","release said","released a statement","has announced","announced"]

search = press_release_list
df_press_release=all_data_cleaned['text'].apply(f)

all_data_cleaned['num_press_release_language_matches'] = df_press_release.apply(np.count_nonzero)

press_release_source_list = []

for x in all_data_cleaned['num_press_release_language_matches']:
    if x>1:
        press_release_source_list.append('Y')
    else:
        press_release_source_list.append('N')

all_data_cleaned['press_release_based'] = press_release_source_list

all_data_cleaned.to_csv('quote_check_expanded.csv')

all_data_cleaned

  return func(self, *args, **kwargs)


Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,text,places,long,lat,outlet,DMA,Repeated_Article,direct_quote,num_quote_language_matches,well_sourced,num_press_release_language_matches,press_release_based
0,1,1,1,the university of missouri’s southwest researc...,"Southwest Research, Extension and Education Ce...",-93.879303,37.073132,Carthage,joplin,1,True,2,Y,0,N
1,89,189,189,groundbreaking held for new livestock handling...,710 W. 9th Street joplin missouri,-94.520910,37.081627,Joplin Globe,joplin,1,True,4,Y,0,N
2,218,341,345,missouri southern state university’s football ...,"missouri southern state university joplin, mis...",-94.460764,37.095902,Joplin Globe,joplin,1,True,7,Y,3,Y
3,314,26,26,"joplin, mo. a local physician pleads guilty i...",3200 block of S Ferguson joplin missouri,-94.513281,37.084227,KOAM,joplin,1,False,6,Y,2,Y
4,336,48,49,"joplin, mo. the ncaa says missouri southern s...",missouri southern joplin missouri,-94.460764,37.095902,KOAM,joplin,1,False,6,Y,5,Y
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3558,1689,27,27,airport project gets under way ray scherer a l...,hermann municipal airport Missouri,-91.491206,38.704991,Hermann County Advertiser,STL,0,True,5,Y,0,N
3559,1690,28,28,caboose #13538 is undergoing a renovation at t...,hermann train station Missouri,-91.437384,38.704212,Hermann County Advertiser,STL,0,False,5,Y,0,N
3560,1691,29,29,rvs as living quarters now subject to zoning r...,city hall hermann missouri,-91.437384,38.704212,Hermann County Advertiser,STL,0,False,3,Y,0,N
3561,1692,30,30,mu health care staffing hermann area district ...,hermann area district hospital Missouri,-91.449708,38.693219,Hermann County Advertiser,STL,0,False,2,Y,0,N


In [37]:
well_sourced_df = all_data_cleaned.groupby(by=['outlet','well_sourced']).count()
well_sourced_df.sort_values('outlet')

#well_sourced_df.to_csv('well_sourced.csv')

press_release_df = all_data_cleaned.groupby(by=['outlet','press_release_based']).count()
press_release_df.sort_values('outlet')

press_release_df.to_csv('press_release_based.csv')



#all_data_cleaned

In [60]:
#Total Local Articles
total_local_articles = pd.DataFrame(all_data_cleaned.outlet.value_counts())
total_local_articles

Unnamed: 0,outlet
Joplin Globe,272
KMOV,257
Fox2,214
KSPR,199
Daily Journal Park Hills,196
STL Post Dispatch,188
Phelps County Focus,147
Springfield News Leader,140
OzarksFirst,136
KSDK,135


In [65]:
#Total Original Articles
original_dataset = pd.read_csv('~/jupyter/Civic_Info_Project/december_data_cleaned.csv')
total_all_articles = pd.DataFrame(original_dataset.hostname.value_counts())
total_all_articles.to_csv('total_article_tallies.csv')

In [None]:
#Quote Language -- FULL LIST
#https://github.com/JournalismAI-2021-Quotes/quote-extraction/blob/main/regex_pipeline/utils/quote_verb_list.txt

accept|accepted|acclaim|acclaimed|acclaiming|acclaims|acknowledge|acknowledged|acknowledges|acknowledging|add|added|adding|adds|admit|admits|admitted|admitting|
advise|advised|advises|advising|announce|announced|annonces|announcing|answer|answered|answers|answering|argue|argued|argues|arguing|
assert|asserted|asserting|asserts|assure|assured|assures|assuring|brand|branded|brands|branding|claim|claimed|claiming|claims|
clarified|clarifies|clarify|clarifying|comment|commented|commenting|comments|conclude|concluded|concludes|concluding|confirm|
confirmed|confirming|confirms|continue|continued|continues|continuing|convince|convinced|convinces|convincing|counter|countered|
counters|countering|criticize|criticized|criticizes|criticizing|declaim|declaimed|declaiming|declaims|declare|declared|declares|
declaring|decried|decries|decry|decrying|demonstrate|demonstrated|demonstrates|demonstrating|denounce|denounced|denounces|
denouncing|describe|described|describes|describing|disclaim|disclaimed|disclaiming|disclaims|dispute|disputed|disputes|
disputing|ensure|ensured|ensures|ensuring|estimated|estimates|exclaim|exclaimed|exclaiming|exclaims|explain|explained|
explaining|explains|finding|finds|highlight|highlighted|highlighting|highlights|illustrate|illustrated|illustrates|
illustrating|indicate|indicated|indicates|indicating|inform|informed|informing|informs|insist|insisted|insisting|
insists|laugh|laughed|laughing|laughs|mention|mentioned|mentioning|mentions|note|noted|notes|notified|notifies|notify|
notifying|noting|persist|persisted|persisting|persists|point|pointed|pointing|points|preach|preached|preaches|preaching|predict|
predicted|predicting|predicts|present|presenting|presents|proclaim|proclaimed|proclaiming|proclaims|promise|promising|promised|
promises|rave|raved|raves|raving|reassert|reasserted|reasserting|reasserts|reassure|reassured|reassures|reassuring|reckon|reckoned|
reckoning|reckons|reconfirm|reconfirmed|reconfirming|reconfirms|release|released|releases|releasing|remind|reminded|reminding|
reminds|replied|replies|reply|replying|report|reported|reporting|reports|respond|responded|responding|responds|restate|
restated|restates|restating|retell|retelling|retells|retold|said|say|saying|says|shout|shouts|shouting|shouted|speak|
speaks|speaking|spoke|spoken|state|stated|states|stating|suggest|suggested|suggesting|suggests|swear|swears|swearing|swore|
tell|telling|tells|told|testified|testifies|testify|testifying|think|thinking|thinks|thought|tweet|tweeted|tweeting|
tweets|urge|urged|urging|urges|warn|warned|warning|warns|write|writes|writing|wrote|yell|yells|yelling|yelled

In [None]:
#Adapted List 

acknowledge|acknowledged|acknowledges|acknowledging|
assert|asserted|asserting|asserts|clarified|clarifies|clarify|clarifying|decried|decries|decry|decrying|denounce|denounced|denounces|denouncing|exclaim|exclaimed|exclaiming|exclaims|explain|explained|
explaining|explains|insist|insisted|insisting|insists|mention|mentioned|mentioning|mentions|note|noted|notes|proclaim|proclaimed|proclaiming|proclaims|reassert|reasserted|reasserting|reasserts|
replied|replies|reply|replying|respond|responded|responding|responds|restate|
restated|restates|restating|said|say|saying|says|shout|shouts|shouting|shouted|speak|
speaks|speaking|spoke|spoken|stated|stating|suggest|suggested|suggesting|suggests|
tell|telling|tells|told|tweet|tweeted|tweeting|
tweets|write|writes|writing|wrote|yell|yells|yelling|yelled|we|I'

In [None]:
#Press release language found from searching through corpus

"said in a statement"
"news release"
"release said"
"released a statement"
"has announced"
"release"