In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/airadomingo/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [3]:
#import packages
import re
import numpy as np
import pandas as pd
from pprint import pprint

import gensim
import gensim.corpora as corpora
from gensim.utils import simple_preprocess
from gensim.models import CoherenceModel

import spacy

import pyLDAvis
import pyLDAvis.gensim
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
#load spacy model
nlp = spacy.load("en_core_web_sm")
# spacy Stop words
from spacy.lang.en.stop_words import STOP_WORDS

stopwords = spacy.lang.en.stop_words.STOP_WORDS
nlp.Defaults.stop_words |= {"america", "-pron-", 'american', 'americans', 'democrats', 'support', 'strengthen', 'end', 'republican', 'people', 'donald', 'democratic', 'president', 'republicans', 'year', 'states', 'national', 'believe', 'preamble'}

In [5]:
#create dataframe
df = pd.DataFrame(columns=['party', 'year', 'manifesto', 'tokens'])



In [6]:
#print dataframe
print(df)

Empty DataFrame
Columns: [party, year, manifesto, tokens]
Index: []


In [7]:
#OPEN DOCUMENT
def read_file(filename):
    input_file_text = open(filename, encoding = 'utf-8').read()
    return input_file_text

In [8]:
#PREPROCESSING/CLEANING
def preprocess(document):
    analyzed_doc = nlp(document) #spacy model on document
    
    tokens_list =[]
    
    for token in analyzed_doc:
        # excludes tokens that are punctuations, numbers, and white spaces
            if token.is_stop == False and token.is_punct == False and token.is_digit == False and token.like_num == False and token.is_space == False: 
                token = token.lemma_ #gets token lemma
                tokens_list.append(token.lower()) #adds lowercase of token lemma to tokens_list

    for token in tokens_list:
        #removes '-pron-' from lemmatization and stop words
        if token in stopwords:
            tokens_list.remove(token)
        elif token == '-pron-':
            tokens_list.remove(token)       
    return tokens_list

In [9]:
#CREATE BAG Of WORDS
def create_bag_words(preprocessed_data):
    bag_dictionary = corpora.Dictionary(preprocessed_data)
    return bag_dictionary

In [10]:
#CREATE TERM DOCUMENT MATRIX CORPUS
def create_term_doc_mat(dictionary, preprocessed_data):
    matrix_corpus = [dictionary.doc2bow(doc) for doc in preprocessed_data]
    return matrix_corpus

In [11]:
#BUILD TOPIC MODEL
lda_model = gensim.models.ldamodel.LdaModel

def build_lda_model(new_corpus,dictionary):
    new_lda_model = lda_model(corpus = new_corpus,
                                        id2word = dictionary,
                                        num_topics=80, 
                                        random_state=100,
                                        update_every=100,
                                        chunksize=100,
                                        passes=100,
                                        alpha='auto',
                                        eta='auto',
                                        iterations=100,
                                        eval_every=None,
                                        per_word_topics=False)
    return new_lda_model

In [12]:
def print_topics(model):
    for idx, topic in model.show_topics(num_topics=10, formatted=False, num_words=10):
        print('Topic: {} \tWords: {}'.format(idx, '|'.join([w[0] for w in topic])))

In [13]:
# Compute Coherence Score
def comp_coherence(model, corpus, dictionary):
    coherence_model= CoherenceModel(model=model, texts=texts, 
                                               dictionary=dictionary, coherence='c_v')
    coherence = coherence_model.get_coherence()
    print('\nCoherence Score: ', coherence)

In [14]:
#ADD DOCUMENTS TO DF AND PREPROCESS EACH DOCUMENT
demo_2000 = read_file('/Users/airadomingo/Documents/Documents/NLP/txt_data/Democratic/61320_200011.txt')
clean_demo_2000 = preprocess(demo_2000)
df = df.append({'party':'democratic', 'year': '2000', 'manifesto': demo_2000, 'tokens': clean_demo_2000},ignore_index=True)

repub_2000 = read_file('/Users/airadomingo/Documents/Documents/NLP/txt_data/Republican/61620_200011.txt')
clean_repub_2000 = preprocess(repub_2000)
df = df.append({'party':'republican', 'year': '2000', 'manifesto': repub_2000, 'tokens': clean_repub_2000},ignore_index=True)

demo_2004 = read_file('/Users/airadomingo/Documents/Documents/NLP/txt_data/Democratic/61320_200411.txt')
clean_demo_2004 = preprocess(demo_2004)
df = df.append({'party':'democratic', 'year': '2004', 'manifesto': demo_2004, 'tokens': clean_demo_2004},ignore_index=True)

repub_2004 = read_file('/Users/airadomingo/Documents/Documents/NLP/txt_data/Republican/61620_200411.txt')
clean_repub_2004 = preprocess(repub_2004)
df = df.append({'party':'republican', 'year': '2004', 'manifesto': repub_2004, 'tokens': clean_repub_2004},ignore_index=True)

demo_2008 = read_file('/Users/airadomingo/Documents/Documents/NLP/txt_data/Democratic/61320_200811.txt')
clean_demo_2008 = preprocess(demo_2008)
df = df.append({'party':'democratic', 'year': '2008', 'manifesto': demo_2008, 'tokens': clean_demo_2008},ignore_index=True)

repub_2008 = read_file('/Users/airadomingo/Documents/Documents/NLP/txt_data/Republican/61620_200811.txt')
clean_repub_2008 = preprocess(repub_2008)
df = df.append({'party':'republican', 'year': '2008', 'manifesto': repub_2008, 'tokens': clean_repub_2008},ignore_index=True)

demo_2012 = read_file('/Users/airadomingo/Documents/Documents/NLP/txt_data/Democratic/61320_201211.txt')
clean_demo_2012 = preprocess(demo_2012)
df = df.append({'party':'democratic', 'year': '2012', 'manifesto': demo_2012, 'tokens': clean_demo_2012},ignore_index=True)
               
repub_2012 = read_file('/Users/airadomingo/Documents/Documents/NLP/txt_data/Republican/61620_201211.txt')
clean_repub_2012 = preprocess(repub_2012)
df = df.append({'party':'republican', 'year': '2012', 'manifesto': repub_2012, 'tokens': clean_repub_2012},ignore_index=True)

demo_2016 = read_file('/Users/airadomingo/Documents/Documents/NLP/txt_data/Democratic/61320_201611.txt')
clean_demo_2016 = preprocess(demo_2016)
df = df.append({'party':'democratic', 'year': '2016', 'manifesto': demo_2016, 'tokens': clean_demo_2016},ignore_index=True)
               
repub_2016 = read_file('/Users/airadomingo/Documents/Documents/NLP/txt_data/Republican/61620_201611.txt')
clean_repub_2016 = preprocess(repub_2016)
df = df.append({'party':'republican', 'year': '2016', 'manifesto': repub_2016, 'tokens': clean_repub_2016},ignore_index=True)

In [16]:
#CREATE PANDAS SERIES OF PREPROCESSED TOKENS FOR EACH DOCUMENT
texts = df['tokens']

In [19]:
#PRINT PREVIEW OF TEXTS
texts

0    [platform, prosperity, progress, peace, adopt,...
1    [platform, renewing, purpose, meet, remarkable...
2    [come, declare, vision, mindful, challenge, ti...
3    [introduction, preambleone, ago, gather, prote...
4    [come, defining, moment, history, nation, nati...
5    [defend, nation, heroes, secure, peacethree, d...
6    [forward, platformmoving, forwardfour, ago, in...
7    [americathis, platform, dedicate, appreciation...
8    [preamblein, meet, philadelphia, basic, belief...
9    [preamblewith, platform, party, reaffirm, prin...
Name: tokens, dtype: object

In [20]:
#call create_bag_words on preprocessed data

platforms_dict = create_bag_words(texts)

#call create_term_doc_mat using created bag of words and preprocessed data

platforms_mat_corpus = create_term_doc_mat(platforms_dict, texts)



In [22]:
#lda model using all documents
model_all = build_lda_model(platforms_mat_corpus, platforms_dict)

pprint(model_all.print_topics())

# Compute Coherence Score
all_coherence= comp_coherence(model_all, platforms_mat_corpus, platforms_dict)

[(56,
  '0.000*"government" + 0.000*"care" + 0.000*"family" + 0.000*"law" + '
  '0.000*"health" + 0.000*"new" + 0.000*"work" + 0.000*"security" + '
  '0.000*"administration" + 0.000*"federal"'),
 (42,
  '0.000*"government" + 0.000*"health" + 0.000*"law" + 0.000*"right" + '
  '0.000*"program" + 0.000*"work" + 0.000*"federal" + 0.000*"administration" + '
  '0.000*"economic" + 0.000*"country"'),
 (71,
  '0.000*"government" + 0.000*"world" + 0.000*"nation" + 0.000*"new" + '
  '0.000*"right" + 0.000*"federal" + 0.000*"need" + 0.000*"work" + '
  '0.000*"administration" + 0.000*"education"'),
 (75,
  '0.000*"family" + 0.000*"right" + 0.000*"need" + 0.000*"new" + 0.000*"law" + '
  '0.000*"government" + 0.000*"health" + 0.000*"country" + 0.000*"child" + '
  '0.000*"world"'),
 (40,
  '0.000*"government" + 0.000*"family" + 0.000*"new" + 0.000*"federal" + '
  '0.000*"health" + 0.000*"work" + 0.000*"program" + 0.000*"need" + '
  '0.000*"right" + 0.000*"public"'),
 (49,
  '0.000*"health" + 0.000*"go

In [27]:
## DEMOCRATIC 2000
corpus_demo_2000 = platforms_mat_corpus[0] #term document matrix of only democratic 2000

model_demo_2000 = build_lda_model([corpus_demo_2000], platforms_dict)

pprint(model_demo_2000.print_topics())

# Compute Coherence Score
demo_2000_coherence = comp_coherence(model_demo_2000, [corpus_demo_2000], platforms_dict)


[(4,
  '0.000*"unchecked" + 0.000*"undemocratic" + 0.000*"tribes" + '
  '0.000*"tropical" + 0.000*"trustworthy" + 0.000*"turbine" + 0.000*"turning" '
  '+ 0.000*"unavailable" + 0.000*"traumatic" + 0.000*"unconditional"'),
 (33,
  '0.000*"work" + 0.000*"security" + 0.000*"help" + 0.000*"child" + '
  '0.000*"gore" + 0.000*"prosperity" + 0.000*"family" + 0.000*"time" + '
  '0.000*"new" + 0.000*"public"'),
 (41,
  '0.000*"new" + 0.000*"child" + 0.000*"school" + 0.000*"world" + '
  '0.000*"worker" + 0.000*"high" + 0.000*"effort" + 0.000*"education" + '
  '0.000*"economy" + 0.000*"economic"'),
 (28,
  '0.000*"work" + 0.000*"child" + 0.000*"new" + 0.000*"need" + 0.000*"right" + '
  '0.000*"school" + 0.000*"gore" + 0.000*"continue" + 0.000*"worker" + '
  '0.000*"expand"'),
 (58,
  '0.000*"gore" + 0.000*"work" + 0.000*"new" + 0.000*"school" + 0.000*"worker" '
  '+ 0.000*"need" + 0.000*"child" + 0.000*"include" + 0.000*"home" + '
  '0.000*"nation"'),
 (43,
  '0.000*"new" + 0.000*"need" + 0.000*"

In [29]:
## REPUBLICAN 2000
corpus_repub_2000 = platforms_mat_corpus[1]

model_repub_2000 = build_lda_model([corpus_repub_2000], platforms_dict)

pprint(model_repub_2000.print_topics())

repub_2000_coherence = comp_coherence(model_repub_2000,[corpus_repub_2000], platforms_dict)

[(28,
  '0.000*"health" + 0.000*"government" + 0.000*"administration" + '
  '0.000*"market" + 0.000*"high" + 0.000*"time" + 0.000*"country" + '
  '0.000*"right" + 0.000*"rule" + 0.000*"encourage"'),
 (33,
  '0.000*"federal" + 0.000*"government" + 0.000*"current" + 0.000*"party" + '
  '0.000*"change" + 0.000*"nation" + 0.000*"private" + 0.000*"work" + '
  '0.000*"community" + 0.000*"economic"'),
 (58,
  '0.000*"health" + 0.000*"need" + 0.000*"united" + 0.000*"government" + '
  '0.000*"care" + 0.000*"federal" + 0.000*"child" + 0.000*"reform" + '
  '0.000*"u.s." + 0.000*"system"'),
 (34,
  '0.000*"government" + 0.000*"health" + 0.000*"new" + 0.000*"encourage" + '
  '0.000*"time" + 0.000*"threat" + 0.000*"market" + 0.000*"country" + '
  '0.000*"local" + 0.000*"right"'),
 (41,
  '0.000*"government" + 0.000*"administration" + 0.000*"care" + '
  '0.000*"program" + 0.000*"federal" + 0.000*"united" + 0.000*"new" + '
  '0.000*"family" + 0.000*"encourage" + 0.000*"trade"'),
 (19,
  '0.000*"federa

In [30]:
##DEMOCRATIC 2004
corpus_demo_2004 = platforms_mat_corpus[2]

model_demo_2004 = build_lda_model([corpus_demo_2004], platforms_dict)

pprint(model_demo_2004.print_topics())

# Compute Coherence Score
demo_2004_coherence = comp_coherence(model_demo_2004, [corpus_demo_2004], platforms_dict)



[(71,
  '0.000*"unchecked" + 0.000*"undemocratic" + 0.000*"tribes" + '
  '0.000*"tropical" + 0.000*"trustworthy" + 0.000*"turbine" + 0.000*"turning" '
  '+ 0.000*"unavailable" + 0.000*"traumatic" + 0.000*"unconditional"'),
 (79,
  '0.000*"unchecked" + 0.000*"undemocratic" + 0.000*"tribes" + '
  '0.000*"tropical" + 0.000*"trustworthy" + 0.000*"turbine" + 0.000*"turning" '
  '+ 0.000*"unavailable" + 0.000*"traumatic" + 0.000*"unconditional"'),
 (70,
  '0.000*"unchecked" + 0.000*"undemocratic" + 0.000*"tribes" + '
  '0.000*"tropical" + 0.000*"trustworthy" + 0.000*"turbine" + 0.000*"turning" '
  '+ 0.000*"unavailable" + 0.000*"traumatic" + 0.000*"unconditional"'),
 (58,
  '0.000*"unchecked" + 0.000*"undemocratic" + 0.000*"tribes" + '
  '0.000*"tropical" + 0.000*"trustworthy" + 0.000*"turbine" + 0.000*"turning" '
  '+ 0.000*"unavailable" + 0.000*"traumatic" + 0.000*"unconditional"'),
 (33,
  '0.000*"work" + 0.000*"undemocratic" + 0.000*"underemployment" + '
  '0.000*"trustworthy" + 0.000*"t

In [31]:
## REPUBLICAN 2004
corpus_repub_2004 = platforms_mat_corpus[3]

model_repub_2004 = build_lda_model([corpus_repub_2004], platforms_dict)

pprint(model_repub_2004.print_topics())

repub_2004_coherence = comp_coherence(model_repub_2004,[corpus_repub_2004], platforms_dict)

[(28,
  '0.000*"health" + 0.000*"bush" + 0.000*"child" + 0.000*"nation" + '
  '0.000*"continue" + 0.000*"increase" + 0.000*"effort" + 0.000*"work" + '
  '0.000*"congress" + 0.000*"world"'),
 (33,
  '0.000*"bush" + 0.000*"work" + 0.000*"effort" + 0.000*"nation" + '
  '0.000*"commitment" + 0.000*"world" + 0.000*"party" + 0.000*"pass" + '
  '0.000*"strong" + 0.000*"economy"'),
 (19,
  '0.000*"congress" + 0.000*"bush" + 0.000*"work" + 0.000*"united" + '
  '0.000*"nation" + 0.000*"provide" + 0.000*"effort" + 0.000*"health" + '
  '0.000*"improve" + 0.000*"respect"'),
 (34,
  '0.000*"bush" + 0.000*"help" + 0.000*"world" + 0.000*"nation" + '
  '0.000*"provide" + 0.000*"work" + 0.000*"health" + 0.000*"new" + '
  '0.000*"agreement" + 0.000*"include"'),
 (56,
  '0.000*"bush" + 0.000*"government" + 0.000*"time" + 0.000*"family" + '
  '0.000*"reform" + 0.000*"congress" + 0.000*"leadership" + 0.000*"protect" + '
  '0.000*"trade" + 0.000*"terrorist"'),
 (41,
  '0.000*"bush" + 0.000*"united" + 0.000*"

In [32]:
##DEMOCRATIC 2008
corpus_demo_2008 = platforms_mat_corpus[4]

model_demo_2008 = build_lda_model([corpus_demo_2008], platforms_dict)

pprint(model_demo_2008.print_topics())

# Compute Coherence Score
demo_2008_coherence = comp_coherence(model_demo_2008, [corpus_demo_2008], platforms_dict)

[(56,
  '0.000*"government" + 0.000*"unconditional" + 0.000*"underemployment" + '
  '0.000*"tropical" + 0.000*"trustworthy" + 0.000*"turbine" + 0.000*"turning" '
  '+ 0.000*"unavailable" + 0.000*"unchecked" + 0.000*"treatable"'),
 (33,
  '0.000*"work" + 0.000*"policy" + 0.000*"include" + 0.000*"provide" + '
  '0.000*"need" + 0.000*"help" + 0.000*"ensure" + 0.000*"nation" + 0.000*"job" '
  '+ 0.000*"education"'),
 (19,
  '0.000*"nation" + 0.000*"job" + 0.000*"security" + 0.000*"need" + '
  '0.000*"provide" + 0.000*"country" + 0.000*"help" + 0.000*"community" + '
  '0.000*"ensure" + 0.000*"family"'),
 (28,
  '0.000*"right" + 0.000*"work" + 0.000*"health" + 0.000*"lead" + '
  '0.000*"restore" + 0.000*"increase" + 0.000*"law" + 0.000*"leadership" + '
  '0.000*"include" + 0.000*"weapon"'),
 (41,
  '0.000*"work" + 0.000*"care" + 0.000*"business" + 0.000*"school" + '
  '0.000*"provide" + 0.000*"woman" + 0.000*"public" + 0.000*"reform" + '
  '0.000*"new" + 0.000*"good"'),
 (63,
  '0.000*"ensur

In [33]:
## REPUBLICAN 2008
corpus_repub_2008 = platforms_mat_corpus[5]

model_repub_2008 = build_lda_model([corpus_repub_2008], platforms_dict)

pprint(model_repub_2008.print_topics())

repub_2008_coherence = comp_coherence(model_repub_2008,[corpus_repub_2008], platforms_dict)

[(33,
  '0.000*"government" + 0.000*"unconditional" + 0.000*"underemployment" + '
  '0.000*"tropical" + 0.000*"trustworthy" + 0.000*"turbine" + 0.000*"turning" '
  '+ 0.000*"unavailable" + 0.000*"unchecked" + 0.000*"treatable"'),
 (34,
  '0.000*"health" + 0.000*"care" + 0.000*"family" + 0.000*"right" + '
  '0.000*"need" + 0.000*"federal" + 0.000*"system" + 0.000*"government" + '
  '0.000*"undemocratic" + 0.000*"unchecked"'),
 (28,
  '0.000*"right" + 0.000*"health" + 0.000*"system" + 0.000*"need" + '
  '0.000*"care" + 0.000*"federal" + 0.000*"government" + 0.000*"commitment" + '
  '0.000*"treatment" + 0.000*"community"'),
 (63,
  '0.000*"federal" + 0.000*"care" + 0.000*"right" + 0.000*"family" + '
  '0.000*"government" + 0.000*"health" + 0.000*"need" + 0.000*"nation" + '
  '0.000*"work" + 0.000*"change"'),
 (19,
  '0.000*"health" + 0.000*"government" + 0.000*"nation" + 0.000*"care" + '
  '0.000*"federal" + 0.000*"need" + 0.000*"individual" + 0.000*"party" + '
  '0.000*"remain" + 0.000*"

In [34]:
##DEMOCRATIC 2012
corpus_demo_2012 = platforms_mat_corpus[6]

model_demo_2012 = build_lda_model([corpus_demo_2012], platforms_dict)

pprint(model_demo_2012.print_topics())

# Compute Coherence Score
demo_2012_coherence = comp_coherence(model_demo_2012, [corpus_demo_2012], platforms_dict)

[(79,
  '0.000*"unchecked" + 0.000*"undemocratic" + 0.000*"tribes" + '
  '0.000*"tropical" + 0.000*"trustworthy" + 0.000*"turbine" + 0.000*"turning" '
  '+ 0.000*"unavailable" + 0.000*"traumatic" + 0.000*"unconditional"'),
 (41,
  '0.000*"continue" + 0.000*"community" + 0.000*"work" + 0.000*"help" + '
  '0.000*"job" + 0.000*"administration" + 0.000*"middle" + 0.000*"build" + '
  '0.000*"effort" + 0.000*"ensure"'),
 (19,
  '0.000*"work" + 0.000*"include" + 0.000*"effort" + 0.000*"help" + '
  '0.000*"ensure" + 0.000*"economy" + 0.000*"nation" + 0.000*"job" + '
  '0.000*"economic" + 0.000*"build"'),
 (56,
  '0.000*"party" + 0.000*"work" + 0.000*"government" + 0.000*"economic" + '
  '0.000*"family" + 0.000*"administration" + 0.000*"continue" + 0.000*"health" '
  '+ 0.000*"obama" + 0.000*"security"'),
 (58,
  '0.000*"obama" + 0.000*"work" + 0.000*"united" + 0.000*"health" + '
  '0.000*"continue" + 0.000*"economy" + 0.000*"country" + 0.000*"job" + '
  '0.000*"worker" + 0.000*"world"'),
 (4,


In [35]:
## REPUBLICAN 2012
corpus_repub_2012 = platforms_mat_corpus[7]

model_repub_2012 = build_lda_model([corpus_repub_2012], platforms_dict)

pprint(model_repub_2012.print_topics())

repub_2012_coherence = comp_coherence(model_repub_2012,[corpus_repub_2012], platforms_dict)

[(28,
  '0.000*"government" + 0.000*"right" + 0.000*"federal" + 0.000*"future" + '
  '0.000*"protect" + 0.000*"act" + 0.000*"education" + 0.000*"reform" + '
  '0.000*"great" + 0.000*"public"'),
 (34,
  '0.000*"nation" + 0.000*"world" + 0.000*"need" + 0.000*"public" + '
  '0.000*"administration" + 0.000*"country" + 0.000*"u.s." + 0.000*"family" + '
  '0.000*"right" + 0.000*"job"'),
 (33,
  '0.000*"current" + 0.000*"law" + 0.000*"government" + 0.000*"public" + '
  '0.000*"federal" + 0.000*"life" + 0.000*"encourage" + 0.000*"party" + '
  '0.000*"security" + 0.000*"future"'),
 (41,
  '0.000*"government" + 0.000*"federal" + 0.000*"program" + 0.000*"right" + '
  '0.000*"policy" + 0.000*"current" + 0.000*"nation" + 0.000*"need" + '
  '0.000*"private" + 0.000*"country"'),
 (19,
  '0.000*"federal" + 0.000*"government" + 0.000*"program" + 0.000*"need" + '
  '0.000*"service" + 0.000*"u.s." + 0.000*"administration" + 0.000*"job" + '
  '0.000*"nation" + 0.000*"law"'),
 (56,
  '0.000*"government" + 

In [36]:
##DEMOCRATIC 2016
corpus_demo_2016 = platforms_mat_corpus[8]

model_demo_2016 = build_lda_model([corpus_demo_2016], platforms_dict)

pprint(model_demo_2016.print_topics())

# Compute Coherence Score
demo_2016_coherence = comp_coherence(model_demo_2016, [corpus_demo_2016], platforms_dict)

[(56,
  '0.000*"unchecked" + 0.000*"undemocratic" + 0.000*"tribes" + '
  '0.000*"tropical" + 0.000*"trustworthy" + 0.000*"turbine" + 0.000*"turning" '
  '+ 0.000*"unavailable" + 0.000*"traumatic" + 0.000*"unconditional"'),
 (41,
  '0.000*"build" + 0.000*"high" + 0.000*"security" + 0.000*"public" + '
  '0.000*"create" + 0.000*"live" + 0.000*"provide" + 0.000*"family" + '
  '0.000*"strong" + 0.000*"indian"'),
 (34,
  '0.000*"ensure" + 0.000*"job" + 0.000*"family" + 0.000*"student" + '
  '0.000*"access" + 0.000*"public" + 0.000*"provide" + 0.000*"help" + '
  '0.000*"need" + 0.000*"expand"'),
 (19,
  '0.000*"health" + 0.000*"good" + 0.000*"pay" + 0.000*"housing" + '
  '0.000*"indian" + 0.000*"provide" + 0.000*"build" + 0.000*"public" + '
  '0.000*"new" + 0.000*"right"'),
 (58,
  '0.000*"health" + 0.000*"country" + 0.000*"work" + 0.000*"need" + '
  '0.000*"ensure" + 0.000*"education" + 0.000*"community" + 0.000*"service" + '
  '0.000*"right" + 0.000*"protect"'),
 (33,
  '0.000*"work" + 0.00

In [38]:
## REPUBLICAN 2016
corpus_repub_2016 = platforms_mat_corpus[9]

model_repub_2016 = build_lda_model([corpus_repub_2016], platforms_dict)

pprint(model_repub_2016.print_topics())

repub_2016_coherence = comp_coherence(model_repub_2016,[corpus_repub_2016], platforms_dict)


Coherence Score:  0.2496199104851365


In [26]:
print_topics(model_all)

Topic: 71 	Words: government|world|nation|new|right|federal|need|work|administration|education
Topic: 44 	Words: government|work|family|right|federal|community|country|program|provide|health
Topic: 33 	Words: work|country|government|need|law|public|family|nation|federal|ensure
Topic: 60 	Words: new|government|need|health|work|right|nation|family|world|administration
Topic: 8 	Words: government|health|right|country|federal|care|protect|work|family|community
Topic: 21 	Words: health|new|government|work|care|nation|need|help|congress|bush
Topic: 62 	Words: bush|applaud|congress|provide|terrorist|terror|effort|pass|hiv|endorse
Topic: 37 	Words: government|federal|right|law|program|current|country|public|state|nation
Topic: 3 	Words: new|health|work|care|government|nation|united|world|administration|child
Topic: 66 	Words: work|health|right|community|new|job|need|family|continue|worker


In [39]:
print_topics(model_demo_2000)

Topic: 44 	Words: new|economy|worker|provide|need|help|prosperity|high|child|world
Topic: 73 	Words: gore|new|work|need|unchecked|unconditional|undemocratic|underperform|unavailable|turbine
Topic: 4 	Words: unchecked|undemocratic|tribes|tropical|trustworthy|turbine|turning|unavailable|traumatic|unconditional
Topic: 36 	Words: new|gore|time|job|help|need|provide|public|community|security
Topic: 42 	Words: unchecked|undemocratic|tribes|tropical|trustworthy|turbine|turning|unavailable|traumatic|unconditional
Topic: 59 	Words: work|new|need|school|gore|child|worker|security|right|al
Topic: 5 	Words: new|work|school|need|continue|gore|worker|right|al|help
Topic: 1 	Words: work|gore|school|new|al|right|need|worker|continue|help
Topic: 64 	Words: new|child|school|work|need|gore|right|al|worker|continue
Topic: 66 	Words: new|work|need|gore|school|child|al|worker|continue|help


In [40]:
print_topics(model_repub_2000)

Topic: 79 	Words: new|health|government|administration|federal|governor|individual|country|right|increase
Topic: 70 	Words: administration|government|need|care|health|new|federal|unavailable|undemocratic|turning
Topic: 71 	Words: system|congress|administration|law|economic|policy|world|health|right|public
Topic: 32 	Words: government|health|new|governor|country|administration|u.s.|threat|promote|individual
Topic: 12 	Words: government|health|new|administration|care|united|need|nation|federal|law
Topic: 26 	Words: new|administration|government|health|federal|united|care|need|system|nation
Topic: 20 	Words: government|federal|health|administration|new|united|family|need|tax|program
Topic: 37 	Words: government|administration|federal|new|care|world|tax|reform|child|health
Topic: 3 	Words: dangerous|chemical|facility|voluntary|rely|major|ladder|operate|look|arm
Topic: 68 	Words: government|new|health|administration|federal|united|need|care|family|program


In [41]:
print_topics(model_demo_2004)

Topic: 71 	Words: unchecked|undemocratic|tribes|tropical|trustworthy|turbine|turning|unavailable|traumatic|unconditional
Topic: 79 	Words: unchecked|undemocratic|tribes|tropical|trustworthy|turbine|turning|unavailable|traumatic|unconditional
Topic: 70 	Words: unchecked|undemocratic|tribes|tropical|trustworthy|turbine|turning|unavailable|traumatic|unconditional
Topic: 58 	Words: unchecked|undemocratic|tribes|tropical|trustworthy|turbine|turning|unavailable|traumatic|unconditional
Topic: 36 	Words: family|work|world|administration|health|new|turning|underemployment|unconditional|unchecked
Topic: 68 	Words: work|health|family|need|job|new|child|strong|world|government
Topic: 16 	Words: work|new|job|strong|care|world|john|protect|school|child
Topic: 66 	Words: work|world|new|job|strong|family|need|security|government|care
Topic: 5 	Words: work|new|health|john|need|job|administration|strong|good|security
Topic: 15 	Words: work|new|health|strong|family|job|security|world|need|care


In [42]:
print_topics(model_repub_2004)

Topic: 79 	Words: bush|provide|law|terrorist|high|opportunity|world|child|act|congress
Topic: 70 	Words: new|bush|health|care|unconditional|trustworthy|turbine|turning|unavailable|unchecked
Topic: 8 	Words: bush|health|world|care|work|nation|effort|tax|government|congress
Topic: 36 	Words: bush|health|work|world|nation|help|congress|united|new|child
Topic: 31 	Words: bush|congress|care|trade|effort|government|help|work|health|world
Topic: 6 	Words: bush|nation|effort|work|provide|health|congress|united|new|help
Topic: 20 	Words: bush|congress|work|health|nation|world|help|government|united|provide
Topic: 5 	Words: bush|effort|congress|health|nation|work|new|provide|government|help
Topic: 37 	Words: bush|world|government|congress|nation|care|effort|new|tax|applaud
Topic: 62 	Words: bush|health|nation|work|congress|new|government|effort|world|help


In [43]:
print_topics(model_demo_2008)

Topic: 71 	Words: unchecked|undemocratic|tribes|tropical|trustworthy|turbine|turning|unavailable|traumatic|unconditional
Topic: 79 	Words: unchecked|undemocratic|tribes|tropical|trustworthy|turbine|turning|unavailable|traumatic|unconditional
Topic: 56 	Words: government|unconditional|underemployment|tropical|trustworthy|turbine|turning|unavailable|unchecked|treatable
Topic: 36 	Words: community|education|need|service|security|change|country|protect|help|family
Topic: 32 	Words: world|worker|family|need|health|provide|help|education|energy|ensure
Topic: 11 	Words: work|ensure|provide|health|government|new|right|family|country|care
Topic: 68 	Words: work|health|government|provide|new|right|need|country|family|ensure
Topic: 5 	Words: work|health|provide|new|care|right|government|ensure|country|need
Topic: 37 	Words: work|care|ensure|government|world|right|provide|security|new|education
Topic: 66 	Words: work|health|new|government|care|provide|ensure|right|country|security


In [44]:
print_topics(model_repub_2008)

Topic: 79 	Words: government|need|right|care|law|health|federal|unconditional|undemocratic|unavailable
Topic: 71 	Words: unchecked|undemocratic|tribes|tropical|trustworthy|turbine|turning|unavailable|traumatic|unconditional
Topic: 70 	Words: government|care|undemocratic|underpayment|trustworthy|turbine|turning|unavailable|unchecked|unconditional
Topic: 32 	Words: family|right|tax|need|government|health|care|undemocratic|underpayment|unchecked
Topic: 12 	Words: government|care|health|education|federal|nation|high|tax|protect|system
Topic: 26 	Words: care|government|health|right|federal|need|energy|nation|system|family
Topic: 5 	Words: government|health|care|right|federal|nation|tax|system|need|energy
Topic: 20 	Words: care|federal|government|right|family|law|tax|health|system|nation
Topic: 66 	Words: government|care|right|need|family|federal|world|health|system|state
Topic: 37 	Words: government|care|health|right|federal|tax|family|law|need|nation


In [45]:
print_topics(model_demo_2012)

Topic: 79 	Words: unchecked|undemocratic|tribes|tropical|trustworthy|turbine|turning|unavailable|traumatic|unconditional
Topic: 71 	Words: unchecked|undemocratic|tribes|tropical|trustworthy|turbine|turning|unavailable|traumatic|unconditional
Topic: 36 	Words: obama|continue|work|trustworthy|turbine|turning|unavailable|unchecked|unconditional|undemocratic
Topic: 56 	Words: party|work|government|economic|family|administration|continue|health|obama|security
Topic: 12 	Words: work|obama|health|economy|economic|continue|government|administration|job|protect
Topic: 26 	Words: continue|obama|work|health|administration|new|job|include|commit|world
Topic: 37 	Words: obama|continue|work|job|security|right|country|administration|world|government
Topic: 5 	Words: obama|continue|work|administration|job|health|effort|economy|economic|country
Topic: 62 	Words: argue|legal|northern|communities|bush|size|coalition|destroy|osama|mercury
Topic: 66 	Words: work|obama|continue|job|family|country|health|eco

In [46]:
print_topics(model_repub_2012)

Topic: 79 	Words: federal|government|party|private|current|program|service|work|include|world
Topic: 70 	Words: federal|government|unconditional|underpayment|tropical|trustworthy|turbine|turning|unavailable|unchecked
Topic: 32 	Words: government|federal|program|right|turning|unconditional|unchecked|underemployment|underperforming|unavailable
Topic: 71 	Words: law|nation|administration|current|economic|public|security|policy|service|federal
Topic: 36 	Words: federal|government|nation|current|lead|law|program|administration|include|service
Topic: 26 	Words: government|federal|administration|right|program|u.s.|nation|energy|state|ensure
Topic: 16 	Words: government|current|program|federal|job|right|administration|protect|u.s.|nation
Topic: 66 	Words: government|federal|right|world|job|service|need|current|program|public
Topic: 20 	Words: federal|government|program|right|law|administration|country|job|system|nation
Topic: 37 	Words: government|federal|current|right|program|law|administrati

In [47]:
print_topics(model_demo_2016)

Topic: 79 	Words: unchecked|undemocratic|tribes|tropical|trustworthy|turbine|turning|unavailable|traumatic|unconditional
Topic: 71 	Words: unchecked|undemocratic|tribes|tropical|trustworthy|turbine|turning|unavailable|traumatic|unconditional
Topic: 56 	Words: unchecked|undemocratic|tribes|tropical|trustworthy|turbine|turning|unavailable|traumatic|unconditional
Topic: 32 	Words: right|health|underemployment|turbine|turning|unavailable|unchecked|unconditional|undemocratic|tropical
Topic: 36 	Words: pay|work|provide|protect|public|need|family|worker|service|health
Topic: 5 	Words: health|work|public|community|right|protect|continue|ensure|provide|country
Topic: 68 	Words: health|work|community|right|country|need|public|pay|access|service
Topic: 37 	Words: familiesdemocrats|laboratory|allegation|inflation|tuition|guantánamo|body|claw|pledge|abolish
Topic: 66 	Words: korea|medicine|outdoors|track|certain|duty|walk|veterans|simple|away
Topic: 62 	Words: health|community|work|right|country|pu

In [48]:
print_topics(model_repub_2016)

Topic: 28 	Words: government|country|federal|right|growth|protect|advance|market|u.s.|security
Topic: 34 	Words: government|new|time|provide|amendment|public|act|create|protect|family
Topic: 58 	Words: federal|country|government|policy|congress|party|nation|protect|right|administration
Topic: 33 	Words: country|federal|government|law|nation|administration|need|right|state|public
Topic: 24 	Words: government|right|country|federal|policy|administration|state|nation|law|congress
Topic: 7 	Words: federal|government|country|law|right|state|nation|congress|religious|security
Topic: 68 	Words: government|federal|right|law|administration|state|congress|country|need|public
Topic: 74 	Words: government|federal|right|protect|law|policy|public|state|need|nation
Topic: 16 	Words: government|federal|right|current|country|protect|administration|new|congress|public
Topic: 37 	Words: government|federal|right|law|country|congress|administration|state|protect|public


# Republican Economy 2000


In [3]:
term_dictionary = []

In [4]:
text = read_file('C:/Users/Place Holder/Downloads/Republican/61620_200011.txt')

In [5]:
term_dictionary.append("economy")

num_docs = len(text)

contexts = []

analyzed = nlp(text)

lemmas = [token.lemma_.lower() for token in analyzed]
intersect = set(lemmas).intersection(set(term_dictionary))
if len(intersect) > 0:
    context_sentences = [token.sent.as_doc() for token in analyzed if token.lemma_.lower() in term_dictionary ]
    contexts.extend(context_sentences)
        #print(context_sentences)

In [6]:
patterns = []

def pattern_creator(context):
    print("create pattern")
    pattern = [(token.pos_, token.lemma_.lower()) for token in context]
    print(pattern)

for sentence in contexts:
    for token in sentence:
        if token.lemma_.lower() in term_dictionary:
            #print(token.text, token.i, " - ", token.pos_)
            start_token = token.i - 3
            end_token = token.i + 3
            context = sentence[start_token:end_token]
            print(" -> ", context)
            try:
                new_pattern = pattern_creator(context.as_doc())
            except Exception as e:
                print(e)
            print("*" * 25)
        

 ->  
division by zero
*************************
 ->  For The New Economy
create pattern
[('ADP', 'for'), ('DET', 'the'), ('PROPN', 'new'), ('PROPN', 'economy')]
*************************
 ->  Even though our economy, and
create pattern
[('ADV', 'even'), ('ADP', 'though'), ('DET', '-pron-'), ('NOUN', 'economy'), ('PUNCT', ','), ('CCONJ', 'and')]
*************************
 ->  ago, the economy was in
create pattern
[('ADV', 'ago'), ('PUNCT', ','), ('DET', 'the'), ('NOUN', 'economy'), ('VERB', 'be'), ('ADP', 'in')]
*************************
 ->  called the New Economy: the
create pattern
[('VERB', 'call'), ('DET', 'the'), ('PROPN', 'new'), ('NOUN', 'economy'), ('PUNCT', ':'), ('DET', 'the')]
*************************
 ->  in America's economy becomes the
create pattern
[('ADP', 'in'), ('PROPN', 'america'), ('PART', "'s"), ('NOUN', 'economy'), ('VERB', 'become'), ('DET', 'the')]
*************************
 ->  surpluses, the economy is far
create pattern
[('NOUN', 'surplus'), ('PUNCT', ','

In [7]:
patterns = [
[('DET'),('ADJ', 'global'), ('NOUN','economy'),('PUNCT','.')],
      [('DET'), ('ADJ', 'global'), ('NOUN', 'economy'), ('CCONJ', 'and')],
    [('DET'), ('ADJ','global'), ('NOUN', 'economy'), ('ADP',"***") ]
]
print(patterns)    


[['DET', ('ADJ', 'global'), ('NOUN', 'economy'), ('PUNCT', '.')], ['DET', ('ADJ', 'global'), ('NOUN', 'economy'), ('CCONJ', 'and')], ['DET', ('ADJ', 'global'), ('NOUN', 'economy'), ('ADP', '***')]]


In [8]:
text1 = read_file('C:/Users/Place Holder/Downloads/Republican/61620_200011.txt')

In [9]:
new_contexts = []

analyzed = nlp(text1)
lemmas = [token.lemma_.lower() for token in analyzed]
intersect = set(lemmas).intersection(set(term_dictionary))
if len(intersect) > 0:
    for pattern in patterns:
        in_machine = 0
        pattern_pos = 0
        pattern_find = []
        finds = []
        for token in analyzed:
            if in_machine == 0 and token.pos_ == pattern[pattern_pos] :
                in_machine = 1
                pattern_pos += 1
                pattern_find.append(token)
            elif in_machine == 1:
                if pattern_pos == len(pattern):
                    print("FOUND MATCH")
                    print(pattern_find)
                    print(pattern_find[0].sent)
                    finds.append(pattern_find)
                    in_machine = 0
                    pattern_pos = 0
                    pattern_find = []
                if pattern[pattern_pos][1] == "***":
                    if token.pos_ == pattern[pattern_pos][0]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
                elif pattern[pattern_pos][0] != "NOUN":
                    if token.pos_ == pattern[pattern_pos][0]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
                else:
                    if token.pos_ == pattern[pattern_pos][0] and token.lemma_ == pattern[pattern_pos][1]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
        
        
                    
        if len(finds) > 0:
            print(finds)

FOUND MATCH
[the, new, economy, .]
The old liberal approach - using the threat of stifling regulations to redistribute wealth and opportunity - will work no better than it ever has, and perhaps much worse, in the new economy.
FOUND MATCH
[the, global, economy, ,]
In recognition of its growing importance in the global economy, we support Taiwan's accession to the World Trade Organization, as well as its participation in the World Health Organization and other multilateral institutions.  
[[the, new, economy, .], [the, global, economy, ,]]
FOUND MATCH
[the, new, economy, and]
These initiatives are grounded in a steadfast commitment to open markets, to minimal regulations, and to reducing taxes that snuff out innovation - principles at the heart of the new economy and our party.
[[the, new, economy, and]]
FOUND MATCH
[a, global, economy, without]
Now we will bring it to completion: U.S. leadership of a global economy without limits to growth.
[[a, global, economy, without]]


# Democrat Energy 2000

In [10]:
term_dictionary = []

text = read_file('C:/Users/Place Holder/Downloads/Democrat/61320_200011.txt')

term_dictionary.append("energy")

num_docs = len(text)

contexts = []

analyzed = nlp(text)

lemmas = [token.lemma_.lower() for token in analyzed]
intersect = set(lemmas).intersection(set(term_dictionary))
if len(intersect) > 0:
    context_sentences = [token.sent.as_doc() for token in analyzed if token.lemma_.lower() in term_dictionary ]
    contexts.extend(context_sentences)
        #print(context_sentences)

In [11]:
patterns = []

def pattern_creator(context):
    print("create pattern")
    pattern = [(token.pos_, token.lemma_.lower()) for token in context]
    print(pattern)

for sentence in contexts:
    for token in sentence:
        if token.lemma_.lower() in term_dictionary:
            #print(token.text, token.i, " - ", token.pos_)
            start_token = token.i - 3
            end_token = token.i + 3
            context = sentence[start_token:end_token]
            print(" -> ", context)
            try:
                new_pattern = pattern_creator(context.as_doc())
            except Exception as e:
                print(e)
            print("*" * 25)
        

 ->  best ideas and energies of the
create pattern
[('ADJ', 'good'), ('NOUN', 'idea'), ('CCONJ', 'and'), ('NOUN', 'energy'), ('ADP', 'of'), ('DET', 'the')]
*************************
 ->  living in more energy-efficient
create pattern
[('VERB', 'live'), ('ADP', 'in'), ('ADJ', 'more'), ('NOUN', 'energy'), ('PUNCT', '-'), ('ADJ', 'efficient')]
*************************
 ->  changes in the energy sector promote
create pattern
[('NOUN', 'change'), ('ADP', 'in'), ('DET', 'the'), ('NOUN', 'energy'), ('NOUN', 'sector'), ('VERB', 'promote')]
*************************
 ->  deployment of clean energy technologies,
create pattern
[('NOUN', 'deployment'), ('ADP', 'of'), ('ADJ', 'clean'), ('NOUN', 'energy'), ('NOUN', 'technology'), ('PUNCT', ',')]
*************************
 ->  make all our energy sources cleaner
create pattern
[('VERB', 'make'), ('DET', 'all'), ('DET', '-pron-'), ('NOUN', 'energy'), ('NOUN', 'source'), ('ADV', 'cleaner')]
*************************
 ->  deployment of clean energy te

In [12]:
patterns = [
      
[('NOUN'),('ADP','of'), ('ADJ',"***"),('NOUN','energy')],
      [('NOUN'), ('ADP', "****"), ('PART', 'energy'), ('NOUN','health')],
      [('VERB'), ('DET', "****"), ('DET', "****"), ('NOUN','energy')]   
]

print (patterns)




[['NOUN', ('ADP', 'of'), ('ADJ', '***'), ('NOUN', 'energy')], ['NOUN', ('ADP', '****'), ('PART', 'energy'), ('NOUN', 'health')], ['VERB', ('DET', '****'), ('DET', '****'), ('NOUN', 'energy')]]


In [13]:
new_contexts = []

analyzed = nlp(text)
lemmas = [token.lemma_.lower() for token in analyzed]
intersect = set(lemmas).intersection(set(term_dictionary))
if len(intersect) > 0:
    for pattern in patterns:
        in_machine = 0
        pattern_pos = 0
        pattern_find = []
        finds = []
        for token in analyzed:
            if in_machine == 0 and token.pos_ == pattern[pattern_pos] :
                in_machine = 1
                pattern_pos += 1
                pattern_find.append(token)
            elif in_machine == 1:
                if pattern_pos == len(pattern):
                    print("FOUND MATCH")
                    print(pattern_find)
                    print(pattern_find[0].sent)
                    finds.append(pattern_find)
                    in_machine = 0
                    pattern_pos = 0
                    pattern_find = []
                if pattern[pattern_pos][1] == "***":
                    if token.pos_ == pattern[pattern_pos][0]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
                elif pattern[pattern_pos][0] != "NOUN":
                    if token.pos_ == pattern[pattern_pos][0]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
                else:
                    if token.pos_ == pattern[pattern_pos][0] and token.lemma_ == pattern[pattern_pos][1]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
        
        
                    
        if len(finds) > 0:
            print(finds)

FOUND MATCH
[deployment, of, clean, energy]
Democrats believe that with the right incentives to encourage the development and deployment of clean energy technologies, we can make all our energy sources cleaner, safer, and healthier for our children.
[[deployment, of, clean, energy]]


# Democrat Health 2008

In [14]:
term_dictionary = []

text = read_file('C:/Users/Place Holder/Downloads/Democrat/61320_200811.txt')


term_dictionary.append("health")

num_docs = len(text)

contexts = []

analyzed = nlp(text)

lemmas = [token.lemma_.lower() for token in analyzed]
intersect = set(lemmas).intersection(set(term_dictionary))
if len(intersect) > 0:
    context_sentences = [token.sent.as_doc() for token in analyzed if token.lemma_.lower() in term_dictionary ]
    contexts.extend(context_sentences)
        #print(context_sentences)

In [15]:
patterns = []

def pattern_creator(context):
    print("create pattern")
    pattern = [(token.pos_, token.lemma_.lower()) for token in context]
    print(pattern)

for sentence in contexts:
    for token in sentence:
        if token.lemma_.lower() in term_dictionary:
            #print(token.text, token.i, " - ", token.pos_)
            start_token = token.i - 3
            end_token = token.i + 3
            context = sentence[start_token:end_token]
            print(" -> ", context)
            try:
                new_pattern = pattern_creator(context.as_doc())
            except Exception as e:
                print(e)
            print("*" * 25)
        

 ->  quality and affordable health care is
create pattern
[('NOUN', 'quality'), ('CCONJ', 'and'), ('ADJ', 'affordable'), ('NOUN', 'health'), ('NOUN', 'care'), ('VERB', 'be')]
*************************
 ->  citizens have no health insurance while
create pattern
[('NOUN', 'citizen'), ('VERB', 'have'), ('DET', 'no'), ('NOUN', 'health'), ('NOUN', 'insurance'), ('ADP', 'while')]
*************************
 ->  , and denied health insurance to
create pattern
[('PUNCT', ','), ('CCONJ', 'and'), ('VERB', 'deny'), ('NOUN', 'health'), ('NOUN', 'insurance'), ('ADP', 'to')]
*************************
 ->  unaffordable, unavailable health care,
create pattern
[('ADJ', 'unaffordable'), ('PUNCT', ','), ('ADJ', 'unavailable'), ('NOUN', 'health'), ('NOUN', 'care'), ('PUNCT', ',')]
*************************
 ->  
division by zero
*************************
 ->  same time, health costs have
create pattern
[('ADJ', 'same'), ('NOUN', 'time'), ('PUNCT', ','), ('NOUN', 'health'), ('NOUN', 'cost'), ('VERB', 'have

In [16]:
patterns = [
      [('ADP'),('NOUN', 'health'), ('NOUN','insurance'),('NOUN',"***")],
      [('ADP'), ('NOUN', 'health'), ('NOUN', 'care'), ('CCONJ', 'and')]
]
print(patterns)




[['ADP', ('NOUN', 'health'), ('NOUN', 'insurance'), ('NOUN', '***')], ['ADP', ('NOUN', 'health'), ('NOUN', 'care'), ('CCONJ', 'and')]]


In [17]:
new_contexts = []

analyzed = nlp(text)
lemmas = [token.lemma_.lower() for token in analyzed]
intersect = set(lemmas).intersection(set(term_dictionary))
if len(intersect) > 0:
    for pattern in patterns:
        in_machine = 0
        pattern_pos = 0
        pattern_find = []
        finds = []
        for token in analyzed:
            if in_machine == 0 and token.pos_ == pattern[pattern_pos] :
                in_machine = 1
                pattern_pos += 1
                pattern_find.append(token)
            elif in_machine == 1:
                if pattern_pos == len(pattern):
                    print("FOUND MATCH")
                    print(pattern_find)
                    print(pattern_find[0].sent)
                    finds.append(pattern_find)
                    in_machine = 0
                    pattern_pos = 0
                    pattern_find = []
                if pattern[pattern_pos][1] == "***":
                    if token.pos_ == pattern[pattern_pos][0]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
                elif pattern[pattern_pos][0] != "NOUN":
                    if token.pos_ == pattern[pattern_pos][0]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
                else:
                    if token.pos_ == pattern[pattern_pos][0] and token.lemma_ == pattern[pattern_pos][1]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
        
        
                    
        if len(finds) > 0:
            print(finds)







FOUND MATCH
[of, health, insurance, plans]
Families and individuals should have the option of keeping the coverage they have or choosing from a wide array of health insurance plans, including many private health insurance options and a public plan.
[[of, health, insurance, plans]]
FOUND MATCH
[in, health, care, and]
For families making more than $250,000, we’ll ask them to give back a portion of the Bush tax cuts to invest in health care and other key priorities.
FOUND MATCH
[of, health, care, and]
Instead, we must strengthen our public programs by bringing down the cost of health care and reducing waste while making strategic investments that emphasize quality, efficiency, and prevention.
FOUND MATCH
[to, health, care, and]
We will expand access to health care and nutrition for women and reduce the burden of maternal mortality.
[[in, health, care, and], [of, health, care, and], [to, health, care, and]]


# Republican Health 2008

In [18]:
term_dictionary = []

text = read_file('C:/Users/Place Holder/Downloads/Republican/61620_200811.txt')

term_dictionary.append("health")

num_docs = len(text)

contexts = []

analyzed = nlp(text)

lemmas = [token.lemma_.lower() for token in analyzed]
intersect = set(lemmas).intersection(set(term_dictionary))
if len(intersect) > 0:
    context_sentences = [token.sent.as_doc() for token in analyzed if token.lemma_.lower() in term_dictionary ]
    contexts.extend(context_sentences)
        #print(context_sentences)

In [19]:
patterns = []

def pattern_creator(context):
    print("create pattern")
    pattern = [(token.pos_, token.lemma_.lower()) for token in context]
    print(pattern)

for sentence in contexts:
    for token in sentence:
        if token.lemma_.lower() in term_dictionary:
            #print(token.text, token.i, " - ", token.pos_)
            start_token = token.i - 3
            end_token = token.i + 3
            context = sentence[start_token:end_token]
            print(" -> ", context)
            try:
                new_pattern = pattern_creator(context.as_doc())
            except Exception as e:
                print(e)
            print("*" * 25)
        

 ->  our nation's health and safety
create pattern
[('DET', '-pron-'), ('NOUN', 'nation'), ('PART', "'s"), ('NOUN', 'health'), ('CCONJ', 'and'), ('NOUN', 'safety')]
*************************
 ->  , wreck our health care delivery
create pattern
[('PUNCT', ','), ('VERB', 'wreck'), ('DET', '-pron-'), ('NOUN', 'health'), ('NOUN', 'care'), ('NOUN', 'delivery')]
*************************
 ->  the pay, health care,
create pattern
[('DET', 'the'), ('NOUN', 'pay'), ('PUNCT', ','), ('NOUN', 'health'), ('NOUN', 'care'), ('PUNCT', ',')]
*************************
 ->  
division by zero
*************************
 ->  to the special health care needs
create pattern
[('ADP', 'to'), ('DET', 'the'), ('ADJ', 'special'), ('NOUN', 'health'), ('NOUN', 'care'), ('NOUN', 'need')]
*************************
 ->  for more mental health professionals who
create pattern
[('ADP', 'for'), ('ADJ', 'more'), ('ADJ', 'mental'), ('NOUN', 'health'), ('NOUN', 'professional'), ('PRON', 'who')]
*************************
 -> 

In [20]:
patterns = [
      [('ADP'),('NOUN', 'health'), ('NOUN','insurance'),('NOUN',"***")],
      [('ADP'), ('NOUN', 'health'), ('NOUN', 'care'), ('ADP', '***')]
]
print(patterns)

[['ADP', ('NOUN', 'health'), ('NOUN', 'insurance'), ('NOUN', '***')], ['ADP', ('NOUN', 'health'), ('NOUN', 'care'), ('ADP', '***')]]


In [21]:
new_contexts = []

analyzed = nlp(text)
lemmas = [token.lemma_.lower() for token in analyzed]
intersect = set(lemmas).intersection(set(term_dictionary))
if len(intersect) > 0:
    for pattern in patterns:
        in_machine = 0
        pattern_pos = 0
        pattern_find = []
        finds = []
        for token in analyzed:
            if in_machine == 0 and token.pos_ == pattern[pattern_pos] :
                in_machine = 1
                pattern_pos += 1
                pattern_find.append(token)
            elif in_machine == 1:
                if pattern_pos == len(pattern):
                    print("FOUND MATCH")
                    print(pattern_find)
                    print(pattern_find[0].sent)
                    finds.append(pattern_find)
                    in_machine = 0
                    pattern_pos = 0
                    pattern_find = []
                if pattern[pattern_pos][1] == "***":
                    if token.pos_ == pattern[pattern_pos][0]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
                elif pattern[pattern_pos][0] != "NOUN":
                    if token.pos_ == pattern[pattern_pos][0]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
                else:
                    if token.pos_ == pattern[pattern_pos][0] and token.lemma_ == pattern[pattern_pos][1]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
        
        
                    
        if len(finds) > 0:
            print(finds)

FOUND MATCH
[of, health, care, to]
Our plan to return control of health care to patients and providers will benefit small   business employers and employees alike.
FOUND MATCH
[of, health, care, in]
The American people rejected Democrats~ attempted government takeover of health care in 1993, and they remain skeptical of politicians who would send us down that road.
FOUND MATCH
[of, health, care, throughout]
We offer a detailed program that will improve the quality, cost, and coverage of health care throughout the nation, and we will turn that plan into reality.
FOUND MATCH
[of, health, care, as]
Frivolous lawsuits also drive up the cost of health care as health care providers are forced to practice defensive medicine, such as ordering unnecessary tests.
[[of, health, care, to], [of, health, care, in], [of, health, care, throughout], [of, health, care, as]]


# Republican Jobs 2012

In [22]:
term_dictionary = []

text = read_file('C:/Users/Place Holder/Downloads/Republican/61620_201211.txt')


term_dictionary.append("job")

num_docs = len(text)

contexts = []

analyzed = nlp(text)

lemmas = [token.lemma_.lower() for token in analyzed]
intersect = set(lemmas).intersection(set(term_dictionary))
if len(intersect) > 0:
    context_sentences = [token.sent.as_doc() for token in analyzed if token.lemma_.lower() in term_dictionary ]
    contexts.extend(context_sentences)
        #print(context_sentences)

In [23]:
patterns = []

def pattern_creator(context):
    print("create pattern")
    pattern = [(token.pos_, token.lemma_.lower()) for token in context]
    print(pattern)

for sentence in contexts:
    for token in sentence:
        if token.lemma_.lower() in term_dictionary:
            #print(token.text, token.i, " - ", token.pos_)
            start_token = token.i - 3
            end_token = token.i + 3
            context = sentence[start_token:end_token]
            print(" -> ", context)
            try:
                new_pattern = pattern_creator(context.as_doc())
            except Exception as e:
                print(e)
            print("*" * 25)
        

 ->  burden of lost jobs, lost
create pattern
[('NOUN', 'burden'), ('ADP', 'of'), ('VERB', 'lose'), ('NOUN', 'job'), ('PUNCT', ','), ('VERB', 'lose')]
*************************
 ->  growth, destroyed jobs, halted
create pattern
[('NOUN', 'growth'), ('PUNCT', ','), ('VERB', 'destroy'), ('NOUN', 'job'), ('PUNCT', ','), ('VERB', 'halt')]
*************************
 ->  business formation and job creation.
create pattern
[('NOUN', 'business'), ('NOUN', 'formation'), ('CCONJ', 'and'), ('NOUN', 'job'), ('NOUN', 'creation'), ('PUNCT', '.')]
*************************
 ->  to workers without jobs,families
create pattern
[('ADP', 'to'), ('NOUN', 'worker'), ('ADP', 'without'), ('NOUN', 'job'), ('PUNCT', ','), ('NOUN', 'family')]
*************************
 ->  
division by zero
*************************
 ->  
division by zero
*************************
 ->  temporary or artificial jobs.
create pattern
[('ADJ', 'temporary'), ('CCONJ', 'or'), ('ADJ', 'artificial'), ('NOUN', 'job'), ('PUNCT', '.')]
***

In [24]:
patterns = [
      [('ADJ'),('NOUN','job'), ('CCONJ',"***"),('ADJ',"***")],
      [('ADJ'), ('PART', "***"), ('VERB', 'create'), ('NOUN','job')],
      [('ADP'), ('NOUN', 'job'), ('ADP', 'to'), ('NOUN','job')],
      [('VERB'), ('ADP', "***"), ('NOUN', 'job'), ('NOUN',"***")]
]

print(patterns)




[['ADJ', ('NOUN', 'job'), ('CCONJ', '***'), ('ADJ', '***')], ['ADJ', ('PART', '***'), ('VERB', 'create'), ('NOUN', 'job')], ['ADP', ('NOUN', 'job'), ('ADP', 'to'), ('NOUN', 'job')], ['VERB', ('ADP', '***'), ('NOUN', 'job'), ('NOUN', '***')]]


In [25]:
new_contexts = []

analyzed = nlp(text)
lemmas = [token.lemma_.lower() for token in analyzed]
intersect = set(lemmas).intersection(set(term_dictionary))
if len(intersect) > 0:
    for pattern in patterns:
        in_machine = 0
        pattern_pos = 0
        pattern_find = []
        finds = []
        for token in analyzed:
            if in_machine == 0 and token.pos_ == pattern[pattern_pos] :
                in_machine = 1
                pattern_pos += 1
                pattern_find.append(token)
            elif in_machine == 1:
                if pattern_pos == len(pattern):
                    print("FOUND MATCH")
                    print(pattern_find)
                    print(pattern_find[0].sent)
                    finds.append(pattern_find)
                    in_machine = 0
                    pattern_pos = 0
                    pattern_find = []
                if pattern[pattern_pos][1] == "***":
                    if token.pos_ == pattern[pattern_pos][0]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
                elif pattern[pattern_pos][0] != "NOUN":
                    if token.pos_ == pattern[pattern_pos][0]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
                else:
                    if token.pos_ == pattern[pattern_pos][0] and token.lemma_ == pattern[pattern_pos][1]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
        
        
                    
        if len(finds) > 0:
            print(finds)







FOUND MATCH
[sustainable, jobs, and, economic]
Energy, Agriculture and the EnvironmentWe are the party of sustainable jobs and economic growth – through American energy, agriculture, and environmental policy.
[[sustainable, jobs, and, economic]]
FOUND MATCH
[determined, to, create, jobs]
We are determined to create jobs,spur economic growth,lower energy prices,and strengthen our energy industry.
[[determined, to, create, jobs]]
FOUND MATCH
[from, job, to, job]
Today’s highly mobile work force requires portability of insurance coverage that can go with them from job to job.
[[from, job, to, job]]
FOUND MATCH
[related, to, job, opportunities]
It is time to get back to basics and to higher education programs directly related to job opportunities.
[[related, to, job, opportunities]]


# Democrats Jobs 2012

In [26]:
term_dictionary = []

text = read_file('C:/Users/Place Holder/Downloads/Democrat/61320_201211.txt')


term_dictionary.append("job")

num_docs = len(text)

contexts = []

analyzed = nlp(text)

lemmas = [token.lemma_.lower() for token in analyzed]
intersect = set(lemmas).intersection(set(term_dictionary))
if len(intersect) > 0:
    context_sentences = [token.sent.as_doc() for token in analyzed if token.lemma_.lower() in term_dictionary ]
    contexts.extend(context_sentences)
        #print(context_sentences)

In [27]:
patterns = []

def pattern_creator(context):
    print("create pattern")
    pattern = [(token.pos_, token.lemma_.lower()) for token in context]
    print(pattern)

for sentence in contexts:
    for token in sentence:
        if token.lemma_.lower() in term_dictionary:
            #print(token.text, token.i, " - ", token.pos_)
            start_token = token.i - 3
            end_token = token.i + 3
            context = sentence[start_token:end_token]
            print(" -> ", context)
            try:
                new_pattern = pattern_creator(context.as_doc())
            except Exception as e:
                print(e)
            print("*" * 25)
        

 ->  to find a job that pays
create pattern
[('PART', 'to'), ('VERB', 'find'), ('DET', 'a'), ('NOUN', 'job'), ('DET', 'that'), ('VERB', 'pay')]
*************************
 ->  automatically translate into jobs and prosperity
create pattern
[('ADV', 'automatically'), ('VERB', 'translate'), ('ADP', 'into'), ('NOUN', 'job'), ('CCONJ', 'and'), ('NOUN', 'prosperity')]
*************************
 ->  so many their jobs, homes
create pattern
[('ADV', 'so'), ('ADJ', 'many'), ('DET', '-pron-'), ('NOUN', 'job'), ('PUNCT', ','), ('NOUN', 'home')]
*************************
 ->  are working multiple jobs and struggling
create pattern
[('VERB', 'be'), ('VERB', 'work'), ('ADJ', 'multiple'), ('NOUN', 'job'), ('CCONJ', 'and'), ('VERB', 'struggle')]
*************************
 ->  Americans lost their jobs – more
create pattern
[('PROPN', 'americans'), ('VERB', 'lose'), ('DET', '-pron-'), ('NOUN', 'job'), ('PUNCT', '–'), ('ADJ', 'more')]
*************************
 ->  created 4.5 million jobs,and
create pa

In [28]:
patterns = [
      [('VERB'), ('NUM', '4.5'), ('NUM', 'million'), ('NOUN', 'job')],
      [('ADJ'), ('PART', "***"), ('VERB', 'create'), ('NOUN','job')],
      [('ADP'), ('NOUN', 'job'), ('ADP', 'to'), ('NOUN','job')],
      [('VERB'), ('ADP', "***"), ('NOUN', 'job'), ('NOUN',"***")],
      [('ADJ'), ('NOUN', 'growth'), ('CCONJ', 'and'), ('NOUN', 'job')]
]

print(patterns)




[['VERB', ('NUM', '4.5'), ('NUM', 'million'), ('NOUN', 'job')], ['ADJ', ('PART', '***'), ('VERB', 'create'), ('NOUN', 'job')], ['ADP', ('NOUN', 'job'), ('ADP', 'to'), ('NOUN', 'job')], ['VERB', ('ADP', '***'), ('NOUN', 'job'), ('NOUN', '***')], ['ADJ', ('NOUN', 'growth'), ('CCONJ', 'and'), ('NOUN', 'job')]]


In [29]:
new_contexts = []

analyzed = nlp(text)
lemmas = [token.lemma_.lower() for token in analyzed]
intersect = set(lemmas).intersection(set(term_dictionary))
if len(intersect) > 0:
    for pattern in patterns:
        in_machine = 0
        pattern_pos = 0
        pattern_find = []
        finds = []
        for token in analyzed:
            if in_machine == 0 and token.pos_ == pattern[pattern_pos] :
                in_machine = 1
                pattern_pos += 1
                pattern_find.append(token)
            elif in_machine == 1:
                if pattern_pos == len(pattern):
                    print("FOUND MATCH")
                    print(pattern_find)
                    print(pattern_find[0].sent)
                    finds.append(pattern_find)
                    in_machine = 0
                    pattern_pos = 0
                    pattern_find = []
                if pattern[pattern_pos][1] == "***":
                    if token.pos_ == pattern[pattern_pos][0]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
                elif pattern[pattern_pos][0] != "NOUN":
                    if token.pos_ == pattern[pattern_pos][0]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
                else:
                    if token.pos_ == pattern[pattern_pos][0] and token.lemma_ == pattern[pattern_pos][1]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
        
        
                    
        if len(finds) > 0:
            print(finds)







FOUND MATCH
[create, one, million, jobs]
But Republicans in Congress blocked other provisions that independent analysis said could create one million jobs.
[[create, one, million, jobs]]
FOUND MATCH
[investing, in, job, creation]
By investing in job creation, clean energy, agriculture, and education, Democrats have built a stronger rural economy where future generations can enjoy the rural way of life.
[[investing, in, job, creation]]
FOUND MATCH
[economic, growth, and, job]
Even as President Obama remains committed to working with both parties, he and his cabinet pursued a series of executive actions to help spark economic growth and job creation,including expanding access to refinancing for families who have stayed current on their mortgages,challenging Community Health Centers to hire veterans,accelerating permitting for transportation projects,cutting waste and reducing improper payments,and enabling student loan borrowers to cap their payments at a percentage of income.
[[economic

# Democrats Energy 2016

In [30]:
term_dictionary = []

text = read_file('C:/Users/Place Holder/Downloads/Democrat/61320_201611.txt')


term_dictionary.append("energy")

num_docs = len(text)

contexts = []

analyzed = nlp(text)

lemmas = [token.lemma_.lower() for token in analyzed]
intersect = set(lemmas).intersection(set(term_dictionary))
if len(intersect) > 0:
    context_sentences = [token.sent.as_doc() for token in analyzed if token.lemma_.lower() in term_dictionary ]
    contexts.extend(context_sentences)
        #print(context_sentences)

In [31]:
patterns = []

def pattern_creator(context):
    print("create pattern")
    pattern = [(token.pos_, token.lemma_.lower()) for token in context]
    print(pattern)

for sentence in contexts:
    for token in sentence:
        if token.lemma_.lower() in term_dictionary:
            #print(token.text, token.i, " - ", token.pos_)
            start_token = token.i - 3
            end_token = token.i + 3
            context = sentence[start_token:end_token]
            print(" -> ", context)
            try:
                new_pattern = pattern_creator(context.as_doc())
            except Exception as e:
                print(e)
            print("*" * 25)
        

 ->  more of our energy from the
create pattern
[('ADJ', 'more'), ('ADP', 'of'), ('DET', '-pron-'), ('NOUN', 'energy'), ('ADP', 'from'), ('DET', 'the')]
*************************
 ->  becoming the clean energy superpower of
create pattern
[('VERB', 'become'), ('DET', 'the'), ('ADJ', 'clean'), ('NOUN', 'energy'), ('NOUN', 'superpower'), ('ADP', 'of')]
*************************
 ->  build 21st century energy and water
create pattern
[('VERB', 'build'), ('ADJ', '21st'), ('NOUN', 'century'), ('NOUN', 'energy'), ('CCONJ', 'and'), ('NOUN', 'water')]
*************************
 ->  for investments in energy, water
create pattern
[('ADP', 'for'), ('NOUN', 'investment'), ('ADP', 'in'), ('NOUN', 'energy'), ('PUNCT', ','), ('NOUN', 'water')]
*************************
 ->  -Paying Clean Energy JobsWe must
create pattern
[('PUNCT', '-'), ('VERB', 'pay'), ('PROPN', 'clean'), ('PROPN', 'energy'), ('PROPN', 'jobswe'), ('VERB', 'must')]
*************************
 ->  in global clean energy, high
create 

In [32]:
patterns = [
      
[('ADJ'),('NOUN',"***"), ('ADP',"***"),('NOUN','energy')],
      [('DET'), ('ADJ', "***"), ('NOUN', 'energy'), ('NOUN',"***")],
      [('VERB'), ('PRON', "***"), ('NOUN', 'energy'), ('NOUN',"***")], 
    
]
print(patterns)




[['ADJ', ('NOUN', '***'), ('ADP', '***'), ('NOUN', 'energy')], ['DET', ('ADJ', '***'), ('NOUN', 'energy'), ('NOUN', '***')], ['VERB', ('PRON', '***'), ('NOUN', 'energy'), ('NOUN', '***')]]


In [33]:
new_contexts = []

analyzed = nlp(text)
lemmas = [token.lemma_.lower() for token in analyzed]
intersect = set(lemmas).intersection(set(term_dictionary))
if len(intersect) > 0:
    for pattern in patterns:
        in_machine = 0
        pattern_pos = 0
        pattern_find = []
        finds = []
        for token in analyzed:
            if in_machine == 0 and token.pos_ == pattern[pattern_pos] :
                in_machine = 1
                pattern_pos += 1
                pattern_find.append(token)
            elif in_machine == 1:
                if pattern_pos == len(pattern):
                    print("FOUND MATCH")
                    print(pattern_find)
                    print(pattern_find[0].sent)
                    finds.append(pattern_find)
                    in_machine = 0
                    pattern_pos = 0
                    pattern_find = []
                if pattern[pattern_pos][1] == "***":
                    if token.pos_ == pattern[pattern_pos][0]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
                elif pattern[pattern_pos][0] != "NOUN":
                    if token.pos_ == pattern[pattern_pos][0]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
                else:
                    if token.pos_ == pattern[pattern_pos][0] and token.lemma_ == pattern[pattern_pos][1]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
        
        
                    
        if len(finds) > 0:
            print(finds)







FOUND MATCH
[new, investments, in, energy]
Democrats will fight to make sure these workers and their families get the benefits they have earned and the respect they deserve,and we will make new investments in energy-producing communities to help create jobs and build a brighter and more resilient economic future.
[[new, investments, in, energy]]
FOUND MATCH
[the, clean, energy, superpower]
Democrats believe that climate change poses a real and urgent threat to our economy, our national security, and our children’s health and futures,and that Americans deserve the jobs and security that come from becoming the clean energy superpower of the 21st century.
FOUND MATCH
[the, clean, energy, superpower]
We will promote collaborative stewardship of our natural resources,while developing clean fuels that will grow our economy, lower our energy bills, combat climate change, and make America the clean energy superpower of the 21st century.
FOUND MATCH
[a, clean, energy, economy]
We will take bold

# Republicans Economy 2016

In [34]:
term_dictionary = []

text = read_file('C:/Users/Place Holder/Downloads/Republican/61620_201611.txt')

term_dictionary.append("economy")

num_docs = len(text)

contexts = []

analyzed = nlp(text)

lemmas = [token.lemma_.lower() for token in analyzed]
intersect = set(lemmas).intersection(set(term_dictionary))
if len(intersect) > 0:
    context_sentences = [token.sent.as_doc() for token in analyzed if token.lemma_.lower() in term_dictionary ]
    contexts.extend(context_sentences)
        #print(context_sentences)

In [35]:
patterns = []

def pattern_creator(context):
    print("create pattern")
    pattern = [(token.pos_, token.lemma_.lower()) for token in context]
    print(pattern)

for sentence in contexts:
    for token in sentence:
        if token.lemma_.lower() in term_dictionary:
            #print(token.text, token.i, " - ", token.pos_)
            start_token = token.i - 3
            end_token = token.i + 3
            context = sentence[start_token:end_token]
            print(" -> ", context)
            try:
                new_pattern = pattern_creator(context.as_doc())
            except Exception as e:
                print(e)
            print("*" * 25)
        

 ->  
division by zero
*************************
 ->  strong and healthy economy.
create pattern
[('ADJ', 'strong'), ('CCONJ', 'and'), ('ADJ', 'healthy'), ('NOUN', 'economy'), ('PUNCT', '.')]
*************************
 ->  a free market economy that he
create pattern
[('DET', 'a'), ('ADJ', 'free'), ('NOUN', 'market'), ('NOUN', 'economy'), ('DET', 'that'), ('PRON', '-pron-')]
*************************
 ->  American DreamRebuilding the Economy and Creating
create pattern
[('PROPN', 'american'), ('PROPN', 'dreamrebuilding'), ('DET', 'the'), ('PROPN', 'economy'), ('CCONJ', 'and'), ('PROPN', 'creating')]
*************************
 ->  of a growing economy that gives
create pattern
[('ADP', 'of'), ('DET', 'a'), ('VERB', 'grow'), ('NOUN', 'economy'), ('DET', 'that'), ('VERB', 'give')]
*************************
 ->  American DreamRebuilding the Economy and Creating
create pattern
[('PROPN', 'american'), ('PROPN', 'dreamrebuilding'), ('DET', 'the'), ('PROPN', 'economy'), ('CCONJ', 'and'), ('PRO

In [36]:
patterns = [
[('DET'),('ADJ', 'global'), ('NOUN','economy'),('PUNCT','.')],
      [('DET'), ('ADJ', 'global'), ('NOUN', 'economy'), ('CCONJ', 'and')],
    [('DET'), ('ADJ','global'), ('NOUN', 'economy'), ('ADP',"***") ]
]
print(patterns)    


[['DET', ('ADJ', 'global'), ('NOUN', 'economy'), ('PUNCT', '.')], ['DET', ('ADJ', 'global'), ('NOUN', 'economy'), ('CCONJ', 'and')], ['DET', ('ADJ', 'global'), ('NOUN', 'economy'), ('ADP', '***')]]


In [37]:
new_contexts = []

analyzed = nlp(text)
lemmas = [token.lemma_.lower() for token in analyzed]
intersect = set(lemmas).intersection(set(term_dictionary))
if len(intersect) > 0:
    for pattern in patterns:
        in_machine = 0
        pattern_pos = 0
        pattern_find = []
        finds = []
        for token in analyzed:
            if in_machine == 0 and token.pos_ == pattern[pattern_pos] :
                in_machine = 1
                pattern_pos += 1
                pattern_find.append(token)
            elif in_machine == 1:
                if pattern_pos == len(pattern):
                    print("FOUND MATCH")
                    print(pattern_find)
                    print(pattern_find[0].sent)
                    finds.append(pattern_find)
                    in_machine = 0
                    pattern_pos = 0
                    pattern_find = []
                if pattern[pattern_pos][1] == "***":
                    if token.pos_ == pattern[pattern_pos][0]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
                elif pattern[pattern_pos][0] != "NOUN":
                    if token.pos_ == pattern[pattern_pos][0]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
                else:
                    if token.pos_ == pattern[pattern_pos][0] and token.lemma_ == pattern[pattern_pos][1]:
                            pattern_pos += 1
                            pattern_find.append(token)
                    else:
                            in_machine = 0
                            pattern_pos = 0
                            pattern_find = []
        
        
                    
        if len(finds) > 0:
            print(finds)

FOUND MATCH
[the, global, economy, .]
That equation governs our policies regarding U.S. corporations in the global economy.
FOUND MATCH
[the, global, economy, .]
We encourage public-private partnerships to provide predictable support for connecting rural areasso that every American can fully participate in the global economy.
FOUND MATCH
[our, new, economy, .]
Today they are the entrepreneurs, independent contractors, and small business men and women of our new economy.
FOUND MATCH
[the, new, economy, .]
They are attacking the franchise model of business development, which is essential to the flexibility and creativity of the new economy.
FOUND MATCH
[the, digital, economy, .]
These increased privacy protections have become crucial to the digital economy.
FOUND MATCH
[the, digital, economy, ,]
With the rise of the digital economy, it has become even more critical that we protect intellectual property rightsand preserve freedom of contract rather than create regulatory barriers to creat