## Text search with English example
### Change these to reflect your data folder and search terms

In [1]:
document_folder = "documents/sotu/*.txt"

search_terms = ["america", "free", "just", "equal", "war"]

#This must be an excel/xlsx filename
results_filename = "results_sotu.xlsx"

#To show the full text, put "True" (capitalize 'T')
show_all_data = False 

### Install and import dependencies

In [2]:
!pip install openpyxl



In [3]:
import glob
import pandas as pd

if show_all_data is True:
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', None)
    pd.set_option('display.max_colwidth', None)
    
else:
    pd.set_option('display.max_rows', 100)
    pd.set_option('display.max_columns', 20)
    pd.set_option('display.width', 50)
    pd.set_option('display.max_colwidth', 500)

### Scan and load all text files

In [4]:
glob.glob(document_folder)

['documents/sotu/1923_Coolidge.txt',
 'documents/sotu/2016_Obama.txt',
 'documents/sotu/1889_Harrison.txt',
 'documents/sotu/1795_Washington.txt',
 'documents/sotu/1840_Buren.txt',
 'documents/sotu/1856_Pierce.txt',
 'documents/sotu/1949_Truman.txt',
 'documents/sotu/1989_Bush.txt',
 'documents/sotu/1894_Cleveland.txt',
 'documents/sotu/1902_Roosevelt.txt',
 'documents/sotu/1829_Jackson.txt',
 'documents/sotu/2005_Bush.txt',
 'documents/sotu/2014_Obama.txt',
 'documents/sotu/1919_Wilson.txt',
 'documents/sotu/1792_Washington.txt',
 'documents/sotu/1877_Hayes.txt',
 'documents/sotu/1811_Madison.txt',
 'documents/sotu/1859_Buchanan.txt',
 'documents/sotu/1907_Roosevelt.txt',
 'documents/sotu/1932_Hoover.txt',
 'documents/sotu/1858_Buchanan.txt',
 'documents/sotu/2015_Obama.txt',
 'documents/sotu/1885_Cleveland.txt',
 'documents/sotu/1882_Arthur.txt',
 'documents/sotu/1820_Monroe.txt',
 'documents/sotu/1981_Carter.txt',
 'documents/sotu/1924_Coolidge.txt',
 'documents/sotu/1921_Harding.tx

In [5]:
all_texts = {}

for filename in glob.glob(document_folder):
    with open(filename) as file:
        all_texts[filename] = file.read().lower()

### Convert to pandas dataframe

In [6]:
all_texts_df = pd.DataFrame(all_texts,index=[0]).T
all_texts_df.columns = ["text"]
all_texts_df.sample(5)

Unnamed: 0,text
documents/sotu/1926_Coolidge.txt,"members of the congress:\n\nin reporting to the congress the state of the union, i find it impossible\nto characterize it other than one of general peace and prosperity. in some\nquarters our diplomacy is vexed with difficult and as yet unsolved\nproblems, but nowhere are we met with armed conflict. if some occupations\nand areas are not flourishing, in none does there remain any acute chronic\ndepression. what the country requires is not so much new policies as a\nsteady continuation of tho..."
documents/sotu/1923_Coolidge.txt,"since the close of the last congress the nation has lost president harding.\nthe world knew his kindness and his humanity, his greatness and his\ncharacter. he has left his mark upon history. he has made justice more\ncertain and peace more secure. the surpassing tribute paid to his memory as\nhe was borne across the continent to rest at last at home revealed the\nplace he held in the hearts of the american people. but this is not the\noccasion for extended reference to the man or his work. ..."
documents/sotu/1850_Fillmore.txt,"fellow-citizens of the senate and of the house of representatives:\n\nbeing suddenly called in the midst of the last session of congress by a\npainful dispensation of divine providence to the responsible station which\ni now hold, i contented myself with such communications to the legislature\nas the exigency of the moment seemed to require. the country was shrouded\nin mourning for the loss of its venerable chief magistrate and all hearts\nwere penetrated with grief. neither the time nor th..."
documents/sotu/1947_Truman.txt,"mr. president, mr. speaker, members of the congress of the united states:\n\nit looks like a good many of you have moved over to the left since i was\nhere last!\n\ni come before you today to report on the state of the union and, in the\nwords of the constitution, to recommend such measures as i judge necessary\nand expedient.\n\ni come also to welcome you as you take up your duties and to discuss with\nyou the manner in which you and i should fulfill our obligations to the\namerican people ..."
documents/sotu/1808_Jefferson.txt,"the senate and house of representatives of the united states:\n\nit would have been a source, fellow citizens, of much gratification if our\nlast communications from europe had enabled me to inform you that the\nbelligerent nations, whose disregard of neutral rights has been so\ndestructive to our commerce, had become awakened to the duty and true\npolicy of revoking their unrighteous edicts. that no means might be omitted\nto produce this salutary effect, i lost no time in availing myself o..."


### Calculate word counts

In [7]:
def word_count(text):
    return len(text.split(" "))

In [8]:
all_texts_df['text'].apply(word_count)[0:5]

documents/sotu/1923_Coolidge.txt       6077
documents/sotu/2016_Obama.txt          4959
documents/sotu/1889_Harrison.txt      11861
documents/sotu/1795_Washington.txt     1806
documents/sotu/1840_Buren.txt          8202
Name: text, dtype: int64

In [9]:
all_texts_df['word count'] = all_texts_df['text'].apply(word_count)
all_texts_df.sample(5)

Unnamed: 0,text,word count
documents/sotu/1808_Jefferson.txt,"the senate and house of representatives of the united states:\n\nit would have been a source, fellow citizens, of much gratification if our\nlast communications from europe had enabled me to inform you that the\nbelligerent nations, whose disregard of neutral rights has been so\ndestructive to our commerce, had become awakened to the duty and true\npolicy of revoking their unrighteous edicts. that no means might be omitted\nto produce this salutary effect, i lost no time in availing myself o...",2457
documents/sotu/1811_Madison.txt,"fellow-citizens of the senate and house of representatives:\n\nin calling you together sooner than a separation from your homes would\notherwise have been required i yielded to considerations drawn from the\nposture of our foreign affairs, and in fixing the present for the time of\nyour meeting regard was had to the probability of further developments of\nthe policy of the belligerent powers toward this country which might the\nmore unite the national councils in the measures to be pursued.\...",2072
documents/sotu/1874_Grant.txt,"to the senate and house of representatives:\n\nsince the convening of congress one year ago the nation has undergone a\nprostration in business and industries such as has not been witnessed with\nus for many years. speculation as to the causes for this prostration might\nbe indulged in without profit, because as many theories would be advanced\nas there would be independent writers--those who expressed their own views\nwithout borrowing--upon the subject. without indulging in theories as to\...",8411
documents/sotu/1936_Roosevelt.txt,"mr. president, mr. speaker, members of the senate and of the house of\nrepresentatives:\n\nwe are about to enter upon another year of the responsibility which the\nelectorate of the united states has placed in our hands. having come so\nfar, it is fitting that we should pause to survey the ground which we have\ncovered and the path which lies ahead.\n\non the fourth day of march, 1933, on the occasion of taking the oath of\noffice as president of the united states, i addressed the people of ...",3481
documents/sotu/1924_Coolidge.txt,"to the congress of the united states:\n\nthe present state of the union, upon which it is customary for the\npresident to report to the congress under the provisions of the\nconstitution, is such that it may be regarded with encouragement and\nsatisfaction by every american. our country is almost unique in its ability\nto discharge fully and promptly all its obligations at home and abroad, and\nprovide for all its inhabitants an increase in material resources, in\nintellectual vigor and in m...",6333


### Calculate counts for search terms

In [10]:
search_terms

['america', 'free', 'just', 'equal', 'war']

In [11]:
for term in search_terms:
    all_texts_df[term] = ""
    
all_texts_df.sample(5)

Unnamed: 0,text,word count,america,free,just,equal,war
documents/sotu/1849_Taylor.txt,"fellow-citizens of the senate and house of representatives:\n\nsixty years have elapsed since the establishment of this government, and\nthe congress of the united states again assembles to legislate for an\nempire of freemen. the predictions of evil prophets, who formerly pretended\nto foretell the downfall of our institutions, are now remembered only to be\nderided, and the united states of america at this moment present to the\nworld the most stable and permanent government on earth.\n\ns...",6959,,,,,
documents/sotu/1792_Washington.txt,"fellow-citizens of the senate and house of representatives:\n\nit is some abatement of the satisfaction with which i meet you on the\npresent occasion that, in felicitating you on a continuance of the national\nprosperity generally, i am not able to add to it information that the\nindian hostilities which have for some time past distressed our\nnorthwestern frontier have terminated.\n\nyou will, i am persuaded, learn with no less concern than i communicate it\nthat reiterated endeavors towar...",1912,,,,,
documents/sotu/2016_Obama.txt,"mr. speaker, mr. vice president, members of congress, my fellow\namericans:\n\ntonight marks the eighth year i've come here to report on the state of\nthe union. and for this final one, i'm going to try to make it shorter.\ni know some of you are antsy to get back to iowa.\n\ni also understand that because it's an election season, expectations for\nwhat we'll achieve this year are low. still, mr. speaker, i appreciate\nthe constructive approach you and the other leaders took at the end of\nl...",4959,,,,,
documents/sotu/2018_Trump.txt,"mr. speaker, mr. vice president, members of congress, the first lady of\nthe united states, and my fellow americans:\n\nless than 1 year has passed since i first stood at this podium, in this\nmajestic chamber, to speak on behalf of the american people -- and to\naddress their concerns, their hopes, and their dreams. that night, our\nnew administration had already taken swift action. a new tide of\noptimism was already sweeping across our land.\n\neach day since, we have gone forward with a ...",4690,,,,,
documents/sotu/1868_Johnson.txt,"fellow-citizens of the senate and house of representatives:\n\nupon the reassembling of congress it again becomes my duty to call your\nattention to the state of the union and to its continued disorganized\ncondition under the various laws which have been passed upon the\nsubject of reconstruction.\n\nit may be safely assumed as an axiom in the government of states that\nthe greatest wrongs inflicted upon a people are caused by unjust and\narbitrary legislation, or by the unrelenting decrees...",8902,,,,,


In [12]:
for filename, text in all_texts_df.iterrows():
    #print(filename)
    for term in search_terms:
        #print(term, text[0].count(term))
        all_texts_df.loc[filename,term] = text[0].count(term)

In [13]:
all_texts_df.sample(5)

Unnamed: 0,text,word count,america,free,just,equal,war
documents/sotu/1865_Johnson.txt,fellow-citizens of the senate and house of representatives:\n\nto express gratitude to god in the name of the people for the\npreservation of the united states is my first duty in addressing you.\nour thoughts next revert to the death of the late president by an act\nof parricidal treason. the grief of the nation is still fresh. it finds\nsome solace in the consideration that he lived to enjoy the highest\nproof of its confidence by entering on the renewed term of the chief\nmagistracy to wh...,8405,10,30,20,8,26
documents/sotu/1893_Cleveland.txt,"to the congress of the united states:\n\nthe constitutional duty which requires the president from time to time to give\nto the congress information of the state of the union and recommend to their\nconsideration such measures as he shall judge necessary and expedient is\nfittingly entered upon by commending to the congress a careful examination of\nthe detailed statements and well-supported recommendations contained in the\nreports of the heads of departments, who are chiefly charged with t...",11196,20,12,32,3,22
documents/sotu/1913_Wilson.txt,"gentlemen of the congress:\n\nin pursuance of my constitutional duty to ""give to the congress information\nof the state of the union,"" i take the liberty of addressing you on several\nmatters which ought, as it seems to me, particularly to engage the\nattention of your honorable bodies, as of all who study the welfare and\nprogress of the nation.\n\ni shall ask your indulgence if i venture to depart in some degree from the\nusual custom of setting before you in formal review the many matters...",3265,2,4,6,1,7
documents/sotu/1826_Adams.txt,"fellow citizens of the senate and of the house of representatives:\n\nthe assemblage of the representatives of our union in both houses of\nthe congress at this time occurs under circumstances calling for the\nrenewed homage of our grateful acknowledgments to the giver of all\ngood. with the exceptions incidental to the most felicitous condition\nof human existence, we continue to be highly favored in all the\nelements which contribute to individual comfort and to national\nprosperity. in th...",7048,7,4,12,8,30
documents/sotu/1793_Washington.txt,"fellow-citizens of the senate and house of representatives:\n\nsince the commencement of the term for which i have been again called into\noffice no fit occasion has arisen for expressing to my fellow citizens at\nlarge the deep and respectful sense which i feel of the renewed testimony\nof public approbation. while on the one hand it awakened my gratitude for\nall those instances of affectionate partiality with which i have been\nhonored by my country, on the other it could not prevent an e...",1794,0,1,4,2,9


### Counts of all keywords

In [14]:
all_texts_df.iloc[:,2:].sample(5)

Unnamed: 0,america,free,just,equal,war
documents/sotu/1898_McKinley.txt,35,8,39,4,93
documents/sotu/1915_Wilson.txt,30,18,11,1,17
documents/sotu/1895_Cleveland.txt,16,16,14,8,22
documents/sotu/1845_Polk.txt,30,22,43,14,35
documents/sotu/1796_Washington.txt,1,0,6,1,13


In [15]:
all_texts_df.iloc[:,2:].apply(sum,axis=1)[0:5]

documents/sotu/1923_Coolidge.txt       64
documents/sotu/2016_Obama.txt          93
documents/sotu/1889_Harrison.txt      103
documents/sotu/1795_Washington.txt     17
documents/sotu/1840_Buren.txt          43
dtype: int64

In [16]:
all_texts_df['all keyword count'] = all_texts_df.iloc[:,2:].apply(sum,axis=1)
all_texts_df.sample(5)

Unnamed: 0,text,word count,america,free,just,equal,war,all keyword count
documents/sotu/1886_Cleveland.txt,"to the congress of the united states:\n\nin discharge of a constitutional duty, and following a well-established\nprecedent in the executive office, i herewith transmit to the congress at\nits reassembling certain information concerning the state of the union,\ntogether with such recommendations for legislative consideration as appear\nnecessary and expedient.\n\nour government has consistently maintained its relations of friendship\ntoward all other powers and of neighborly interest toward ...",13756,22,8,40,11,26,107
documents/sotu/1909_Taft.txt,"the relations of the united states with all foreign governments have\ncontinued upon the normal basis of amity and good understanding, and are\nvery generally satisfactory. europe.\n\npursuant to the provisions of the general treaty of arbitration concluded\nbetween the united states and great britain, april 4, 1908, a special\nagreement was entered into between the two countries on january 27, 1909,\nfor the submission of questions relating to the fisheries on the north\natlantic coast to a...",12656,59,8,19,8,20,114
documents/sotu/2009_Obama.txt,"madame speaker, mr. vice president, members of congress, and the first lady of\nthe united states:\n\ni've come here tonight not only to address the distinguished men and women in\nthis great chamber, but to speak frankly and directly to the men and women who\nsent us here.\n\ni know that for many americans watching right now, the state of our economy is\na concern that rises above all others. and rightly so. if you haven't been\npersonally affected by this recession, you probably know som...",5637,56,3,11,1,15,86
documents/sotu/1945_Roosevelt.txt,"to the congress:\n\nin considering the state of the union, the war and the peace that is to\nfollow are naturally uppermost in the minds of all of us.\n\nthis war must be waged--it is being waged--with the greatest and most\npersistent intensity. everything we are and have is at stake. everything we\nare and have will be given. american men, fighting far from home, have\nalready won victories which the world will never forget.\n\nwe have no question of the ultimate victory. we have no questi...",7379,23,10,6,2,91,132
documents/sotu/1856_Pierce.txt,"fellow-citizens of the senate and of the house of representatives:\n\nthe constitution requires that the president shall from time to time not\nonly recommend to the consideration of congress such measures as he may\njudge necessary and expedient, but also that he shall give information to\nthem of the state of the union. to do this fully involves exposition of all\nmatters in the actual condition of the country, domestic or foreign, which\nessentially concern the general welfare. while perf...",9561,12,15,13,14,18,72


### Counts of any keywords (1 if any, 0 if none)

In [17]:
def not_zero(number):
    if number == 0:
        return 0
    else:
        return 1

In [18]:
all_texts_df['all keyword count'].apply(not_zero)

documents/sotu/1923_Coolidge.txt      1
documents/sotu/2016_Obama.txt         1
documents/sotu/1889_Harrison.txt      1
documents/sotu/1795_Washington.txt    1
documents/sotu/1840_Buren.txt         1
                                     ..
documents/sotu/1983_Reagan.txt        1
documents/sotu/1895_Cleveland.txt     1
documents/sotu/1903_Roosevelt.txt     1
documents/sotu/1844_Tyler.txt         1
documents/sotu/1918_Wilson.txt        1
Name: all keyword count, Length: 190, dtype: int64

In [19]:
all_texts_df['has any keyword'] = all_texts_df['all keyword count'].apply(not_zero)
all_texts_df

Unnamed: 0,text,word count,america,free,just,equal,war,all keyword count,has any keyword
documents/sotu/1923_Coolidge.txt,"since the close of the last congress the nation has lost president harding.\nthe world knew his kindness and his humanity, his greatness and his\ncharacter. he has left his mark upon history. he has made justice more\ncertain and peace more secure. the surpassing tribute paid to his memory as\nhe was borne across the continent to rest at last at home revealed the\nplace he held in the hearts of the american people. but this is not the\noccasion for extended reference to the man or his work. ...",6077,18,6,20,1,19,64,1
documents/sotu/2016_Obama.txt,"mr. speaker, mr. vice president, members of congress, my fellow\namericans:\n\ntonight marks the eighth year i've come here to report on the state of\nthe union. and for this final one, i'm going to try to make it shorter.\ni know some of you are antsy to get back to iowa.\n\ni also understand that because it's an election season, expectations for\nwhat we'll achieve this year are low. still, mr. speaker, i appreciate\nthe constructive approach you and the other leaders took at the end of\nl...",4959,57,2,19,4,11,93,1
documents/sotu/1889_Harrison.txt,"to the senate and house of representatives:\n\nthere are few transactions in the administration of the government that are\neven temporarily held in the confidence of those charged with the conduct\nof the public business. every step taken is under the observation of an\nintelligent and watchful people. the state of the union is known from day\nto day, and suggestions as to needed legislation find an earlier voice than\nthat which speaks in these annual communications of the president to\nco...",11861,26,6,35,12,24,103,1
documents/sotu/1795_Washington.txt,"fellow-citizens of the senate and house of representatives:\n\ni trust i do not deceive myself when i indulge the persuasion that i have\nnever met you at any period when more than at the present the situation of\nour public affairs has afforded just cause for mutual congratulation, and\nfor inviting you to join with me in profound gratitude to the author of all\ngood for the numerous and extraordinary blessings we enjoy.\n\nthe termination of the long, expensive, and distressing war in whic...",1806,1,0,8,2,6,17,1
documents/sotu/1840_Buren.txt,"fellow-citizens of the senate and house of representatives:\n\nour devout gratitude is due to the supreme being for having graciously\ncontinued to our beloved country through the vicissitudes of another year\nthe invaluable blessings of health, plenty, and peace. seldom has this\nfavored land been so generally exempted from the ravages of disease or the\nlabor of the husbandman more amply rewarded, and never before have our\nrelations with other countries been placed on a more favorable bas...",8202,6,2,20,7,8,43,1
...,...,...,...,...,...,...,...,...,...
documents/sotu/1983_Reagan.txt,"mr. speaker, mr. president, distinguished members of the congress, honored\nguests, and fellow citizens:\n\nthis solemn occasion marks the 196th time that a president of the united\nstates has reported on the state of the union since george washington first\ndid so in 1790. that's a lot of reports, but there's no shortage of new\nthings to say about the state of the union. the very key to our success has\nbeen our ability, foremost among nations, to preserve our lasting values by\nmaking cha...",5076,60,22,14,3,14,113,1
documents/sotu/1895_Cleveland.txt,to the congress of the united states:\n\nthe present assemblage of the legislative branch of our government occurs at a\ntime when the interests of our people and the needs of the country give\nespecial prominence to the condition of our foreign relations and the\nexigencies of our national finances. the reports of the heads of the several\nadministrative departments of the government fully and plainly exhibit what\nhas been accomplished within the scope of their respective duties and presen...,13424,16,16,14,8,22,76,1
documents/sotu/1903_Roosevelt.txt,"to the senate and house of representatives:\n\nthe country is to be congratulated on the amount of substantial\nachievement which has marked the past year both as regards our foreign\nand as regards our domestic policy.\n\nwith a nation as with a man the most important things are those of the\nhousehold, and therefore the country is especially to be congratulated\non what has been accomplished in the direction of providing for the\nexercise of supervision over the great corporations and comb...",13525,16,11,23,7,50,107,1
documents/sotu/1844_Tyler.txt,"to the senate and house of representatives of the united states:\n\nwe have continued cause for expressing our gratitude to the supreme ruler\nof the universe for the benefits and blessings which our country, under his\nkind providence, has enjoyed during the past year. notwithstanding the\nexciting scenes through which we have passed, nothing has occurred to\ndisturb the general peace or to derange the harmony of our political\nsystem. the great moral spectacle has been exhibited of a natio...",8527,7,11,20,2,27,67,1


### Export to Excel

In [20]:
all_texts_df.to_excel(results_filename)