**Analysis of Presidential speech and election data**

This notebook scrapes [The American Presidency Project](http://www.presidency.ucsb.edu) and downloads the campagin speeches of all 2016 presidential candidates.  It then builds a markov chain out each president's data capable of generating sentences in the style of their campaign speeches. 

In [4]:
import pandas as pd
import numpy as np
import requests
from lxml import html
from bs4 import BeautifulSoup
import markovify
import os.path

In [5]:

def getCandidateSpeechLinks(url):
    allCandidatePage = requests.get(url)
    allCandidatePageSoup = BeautifulSoup(allCandidatePage.text, 'lxml')
    links={}
    table = allCandidatePageSoup.find('table', width=680)
    for area in table.findAll('td', class_='doctext'):
        for a in area.findAll('a'):
            if ('campaign' in a.text.lower()):
                links[area.find('span', class_='roman').text] = a['href']
    return links

def scrapeCampaignSpeechesToFile(url, path):
    allSpeechPages = requests.get(url)
    allSpeechSoup=BeautifulSoup(allSpeechPages.text, 'lxml')
    root = 'http://www.presidency.ucsb.edu/'
    table = allSpeechSoup.find('table', width=700)
    links = []
    for link in table.findAll('a'):
        if('interview' not in link.text.lower()):
            links.append(root+(link['href'])[3:])

    speechPages = [requests.get(link , 'lxml')for link in links]
    speechesSoup = [BeautifulSoup(speechPage.text, 'lxml') for speechPage in speechPages]
    
    if (not os.path.isfile(path)):
        with open(path, "w+", encoding='utf-8') as outFile:
            outFile.seek(0)
            for i,speech in enumerate(speechesSoup):            
                outFile.write(speechesSoup[i].find('span', class_='displaytext').text+'\n')

def trainMarkov(path):

    # Get raw text as string.
    with open(path, encoding='utf-8') as f:
        text = f.read()

    # Build the model.
    text_model = markovify.Text(text)
    return text_model

def campaignLinkToBots(url):
    
    dataFolder = './Campaign Speeches/'
    
    #Create the dictionary of each candidate's name and link to their campaign speech page    
    campaignSpeechLinkDict = getCandidateSpeechLinks(url)
    
    root = 'http://www.presidency.ucsb.edu/'
    
    #Loops through the campagin speech links, puts each candidate's campagin speeches into individual files
    for name, url in campaignSpeechLinkDict.items():
        path = dataFolder + name.replace(' ', '-') + '.txt'
        scrapeCampaignSpeechesToFile(root + url, path)
    
    #Train the bots and store them in a dictionary
    bots = {}
    for pres in campaignSpeechLinkDict.keys():
        bots[pres] = trainMarkov(dataFolder + pres.replace(' ', '-') + '.txt')
    
    #return the bot dictionary
    return bots

In [None]:
bots = campaignLinkToBots('http://www.presidency.ucsb.edu/2016_election.php')

for name,bot in bots.items():
    print('\n' + name + ': ')
    for i in range(10):
        print(bot.make_short_sentence(max_chars=140))