When it comes to political hot topics, people often cite public opinion polls as an argument for or against policy ideas. After all, shouldn't policy-makers consider that data in a representative democracy? Furthermore, polling offers some of the only evidence possible for judging whether a political candidate will win their race in a rapidly changing political climate.

#**But in a sea of information, choppy waters obscure underlying currents.**

Media organizations and political commentators report on individual polls and rarely put them in context. Aggregate, 'big picture' analyses are rare and hard to find, even if you know exactly what you're looking for. Rhetoric around "popularity" and "electability" is presented without quantitative backing or nuance, more often just the intuition of the speaker with the platform. 

Below we demonstrate a different approach, driven by data available accross the web. We collect the results of the many polls conducted for two trending topics in American politics: <br>
Do Americans favor a single-payer healthcare system ("Medicare for All")? <br>
How would the Democratic primary candidates fare against Trump in the general election?

Keep in mind that pollsters generally report a margin of error of 2-3 percentage points, and make different assumptions about who counts as a 'likely voter'. Even averaging all the available polls cannot give a full story. Nonetheless, these trends are an important piece of it!

Click the play buttons on the left hand side to see results! (Only click 'show code' if you're interested in the Python we used) <br>

# Healthcare

Interact with aggregations of public opinion polls in the last year:

In [0]:
#@title
# import packages needed for interactive graph
%matplotlib inline
import pandas as pd
from ipywidgets import interactive
from IPython.display import display
import matplotlib.pyplot as plt
import numpy as np
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
import ipywidgets as widgets
# read in data uploaded to github, a csv of polls results from various organizations retrieved from Kaiser Family Foundation
Mpolls = pd.read_csv('https://raw.githubusercontent.com/kgbolton/Whose-Poll-Anyway/master/Trend_data.csv')
Mpolls['Field End Date'] = pd.to_datetime(Mpolls['Field End Date'])
# add percent favorability function
Mpolls['Pfavor'] = Mpolls.Favor/(Mpolls.Favor+Mpolls.Oppose)*100
# define a function to graph the subset of polls that the user selects
def f(Sample, Poll):
    if Poll == 'Both':
      pgraph = Mpolls
    else:
      pgraph = Mpolls.loc[Mpolls['Medicare-for-all'] == Poll,:]
    if Sample == 'Any':
      pgraph = pgraph
    elif Sample == 'All adults':
      pgraph = pgraph.loc[pgraph['Sample'] == 'All adults',:]
    else:
      pgraph = pgraph.loc[pgraph['Sample'] != 'All adults',:]
    plt.figure(figsize=(16,8))
    plt.scatter(pgraph['Field End Date'], pgraph['Pfavor'])
    plt.grid(b=True, axis='y')
    plt.axhline(y = pgraph['Pfavor'].mean(), color='red', label="Average")
    plt.ylim(0, 100)
    plt.yticks(np.arange(0, 100, 5))
    plt.xticks(np.arange(np.datetime64(min(Mpolls['Field End Date'])), np.datetime64(max(Mpolls['Field End Date'])+np.timedelta64(90, 'D')), np.timedelta64(90, 'D')), rotation=60)
    plt.xlabel("Date of Poll Taken")
    plt.ylabel("Percent that would favor single payer health care system")
    plt.legend()
    plt.show()
# make function inputs interactive, then deploy
healthcare_plot = interactive(f, Sample = ['All adults', 'Registered/likely voters', 'Any'], Poll = ['Includes "Medicare-for-all"', 'Does not include "Medicare-for-all"', 'Both'])
healthcare_plot

interactive(children=(Dropdown(description='Sample', options=('All adults', 'Registered/likely voters', 'Any')…

Scrape graphs about public opinion on healthcare from Gallup:

In [0]:
#DO NOT RUN if chromedriver & selenium are already installed - this will reinstall
#This installs the web browser needed for web scraping from Gallup.
!apt install chromium-chromedriver
!pip install selenium

In [0]:
#@title
#import libraries
import os
import re
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from google.colab import files
import urllib.request

#initialize the browser (these settings are specific to Google Colab - don't tweak)
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

browser = webdriver.Chrome('chromedriver',options=options)

#Open the browser to Gallup's home page. The browser is headless (not visible).
browser.get("https://www.gallup.com/home.aspx")

#Navigate to the search bar, ask user for a search term, then enter it and submit.
searchIcon = browser.find_element_by_class_name('c-gmn-search-toggle')
searchIcon.click()
searchBar = browser.find_element_by_class_name('c-search__input')
searchTerm = input('Enter a search term: ')
searchBar.send_keys(searchTerm)
searchBar.send_keys(Keys.ENTER)

#Scrape article titles
html = browser.page_source
soup = BeautifulSoup(html, 'html.parser')
messyTitles = soup.find_all(class_="c-search-page__result-title")
articleList = []
for article in messyTitles:
    titleLoc = [re.search('<span>.*</span>', str(article)).start() + 6, \
                re.search('<span>.*</span>', str(article)).end() - 7]
    title = str(article)[titleLoc[0]:titleLoc[1]]
    articleList.append(title)

#Display article titles in a menu and have user input a selection.
counter = 1
print()
for article in articleList:
    print(str(counter) + ': ' + article)
    counter += 1
choiceNum = int(input('Which article\'s graphs would you like to view? ')) - 1
choice = articleList[choiceNum]

#Navigate to the selected article.
soup = BeautifulSoup(browser.page_source, 'html.parser')
articleLinks = soup.find_all(class_='c-search-page__result')
urlLoc = [re.search(r'href=".*\.aspx', str(articleLinks[choiceNum])).start() + 6, \
          re.search(r'href=".*\.aspx', str(articleLinks[choiceNum])).end()]
selectionUrl = str(articleLinks[choiceNum])[urlLoc[0]:urlLoc[1]]
browser.get(selectionUrl)


#Scrape the article for graphs. 
#This part could be made more robust; Gallup is sometimes inconsistent 
#with their naming conventions on their pages, so graphs can be missed.
html = browser.page_source
soup = BeautifulSoup(html, 'html.parser')
graphs = re.findall(r'[gG]raph.*src=".*\.png', str(soup))
graphlinks = []

#Two methods of searching for graphs to avoid missing any. Again, due to 
#inconsistent or changing naming conventions/html styles in different articles
for graph in graphs:
    mo = re.search(r'src=".*\.png', str(graph))
    link = str(graph)[(mo.start()+5):(mo.end())]
    graphlinks.append(link)
graphs2 = soup.find_all(class_="sggt-image")
for graph in graphs2:
    mo = re.search(r'src=".*\.png', str(graph))
    link = str(graph)[(mo.start()+5):(mo.end())]
    graphlinks.append(link)

#Eliminate duplicates that were picked up with multiple of the methods above
graphlinks = list(set(graphlinks))

#Tell the user how many graphs were found
if len(graphlinks) == 0:
    message = 'No data visualizations were found in the article "' + choice + '"'
elif len(graphlinks) == 1:
    message = 'One data visualization was found in the article "' + choice + '"'
else:
    message = str(len(graphlinks)) + ' data visualizations were found in the article "' + choice + '"'
print()
print(message)
print('(Article location: ' + str(browser.current_url) + ')')
print()

#save graph images to current directory and display links and updates for the user
nameCounter = 1
for graph in graphlinks:
    name = 'downloadedGraph' + str(nameCounter)
    nameCounter += 1
    link = 'http:' + graph
    urllib.request.urlretrieve(link, name)
    print()
    print('"' + name + '"' + ' was saved to the current directory, from ' + link)

#Close the browser
browser.quit()

See sentiment analysis from a sample of U.S. Tweets about single-payer/Medicare for All:

In [0]:
#@title
# IMPORT PYTHON PACKAGES -------------------------------------------------------------------------------------------

import pandas as pd
import matplotlib.pyplot as plt

# IMPORT CSV DOCUMENT -------------------------------------------------------------------------------------------

twitter_data = pd.read_csv('https://raw.githubusercontent.com/kgbolton/Whose-Poll-Anyway/master/twitter_data%20(1).csv')

# GRAPHICS FOR SENTIMENT ANALYSIS -------------------------------------------------------------------------------------

# 1. Create a summary table where we can look at the nunmber of tweets per positive, negative and netural sentiments

sentiment_analysis = pd.DataFrame(twitter_data[['text','sentiment']].groupby('sentiment').count()).reset_index()

# 2. Create two separate variables named x and y to prepare for the data visualization

x = sentiment_analysis['sentiment']
y = sentiment_analysis['text']

# 3. Create bar graphs representing negative, neutral and positive sentiments based on the number of tweets

plt.figure(figsize=(16,8))
plt.title('Sentiment Twitter Analysis for Medicare for All')
plt.ylabel('# of Tweets')
plt.bar(x,y,color=("red","yellow","green"))
plt.show()

# Electability

Compare national and state-wide aggregate polling results for general election matchups:

In [0]:
#@title
# import packages needed for interactive graph
%matplotlib inline
import pandas as pd
from ipywidgets import interactive
from IPython.display import display
import matplotlib.pyplot as plt
import numpy as np
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
import ipywidgets as widgets
from matplotlib.lines import Line2D
# read in data uploaded to github, 363 presidential choice polls pulled from 538
Epolls = pd.read_csv('https://raw.githubusercontent.com/kgbolton/Whose-Poll-Anyway/master/president_polls.csv')
Epolls['state'] = Epolls.state.fillna('National')
# create legend icons for explaining the sample options
legend_elements = [Line2D([0], [0], marker='o', color='w', markerfacecolor='black', markersize=20),
                   Line2D([0], [0], marker='o', color='w', markerfacecolor='gray', markersize=15),
                   Line2D([0], [0], marker='o', color='w', markerfacecolor='lightgray', markersize=10)]
# define a function to graph the subset of polls that the user selects
def e(State, Sample):
    egraph = Epolls.loc[Epolls['state'] == State,:]
    if Sample == 'any':
      egraph = egraph
    else:
      egraph = egraph.loc[egraph['population'] == Sample,:]
    Svotes = egraph.loc[egraph['answer'] == 'Sanders',:]
    Bvotes = egraph.loc[egraph['answer'] == 'Biden',:]
    Tvotes = egraph.loc[egraph['answer'] == 'Trump',:]
    fig, axs = plt.subplots(1, 2, figsize=(8, 8))
    p1names = ['Biden', 'Trump']
    p2names = ['Sanders', 'Trump']
    p1values = [round(Bvotes['pct'].mean(),2), round(Tvotes['pct'].mean(),2)]
    p2values = [round(Svotes['pct'].mean(),2), round(Tvotes['pct'].mean(),2)]
    axs[0].bar(p1names, p1values, color=['blue', 'red'])
    axs[1].bar(p2names, p2values, color=['blue', 'red'])
    axs[0].set_ylim([0, 100])
    axs[0].set_yticks(np.arange(0, 100, 5))
    axs[1].set_ylim([0, 100])
    axs[1].set_yticks(np.arange(0, 100, 5))
    axs[0].grid(axis = "y", linestyle='--')
    axs[1].grid(axis = "y", linestyle='--')
    axs[0].text(p1names[0], p1values[0]+3, p1values[0], ha='center')
    axs[0].text(p1names[1], p1values[1]+3, p1values[1], ha='center')
    axs[1].text(p2names[0], p2values[0]+3, p2values[0], ha='center')
    axs[1].text(p2names[1], p2values[1]+3, p2values[1], ha='center')
    plt.figlegend(handles = legend_elements, labels = ['a: all adults', 'rv: registered voters', 'lv: likely voters'], loc = 'upper left', labelspacing = 2)
    fig.show()
# make function inputs interactive, then deploy
electability_plot = interactive(e, State = Epolls['state'].unique().tolist(), Sample = ['any', 'a', 'rv', 'lv'])
electability_plot