# Get Delegate Counts from Super Tuesday States
Will return & save delegate counts for Joe Biden and Bernie Sanders in all Super Tuesday states.

In [2]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
import time
import re
import json

In [4]:
## Navigate to each url, & save article information
headers = {'user-agent':'Mozilla/5.0'}
url = 'https://www.nytimes.com/interactive/2020/us/elections/delegate-count-primary-results.html'
response = requests.get(url, headers)

# save the html file
with open('delegate_counts.htm', 'w') as file:
    file.write(response.text)
    file.close

In [164]:
#open the file and parse to soup object
with open('delegate_counts.htm','r') as file:
    soup = BeautifulSoup(file)

In [165]:
#get only the rows of the table, so you can extract data
table_rows = soup.find_all("tr", class_ ="g-event")

In [166]:
### this code block will result in an error, but it captures all the data we need so that's fine
states_list = []
biden_delegates_list = []
sanders_delegates_list = []
biden_wins_list = []
sanders_wins_list = []

for row in table_rows:
    soup = BeautifulSoup(str(row))
    
    #get the state and add to list
    state = soup.find("span", class_="g-full-name").string
    states_list.append(state)
    
    #set each value to null at the beginning of each run of the lopp
    biden_delegates_nonwinner = np.nan
    biden_delegates_winner = np.nan
    sanders_delegates_nonwinner = np.nan
    sanders_delegates_winner = np.nan
    
    #if Biden did not win, get delegate count from table row
    try:
        biden_delegates_nonwinner = int(soup.find("td", class_="g-cand-wide g-cand g-biden in").string)
    #if that table row is not present, it means Biden won and need to get value as sibling of checkmark image
    except AttributeError:
        biden_delegates_winner = int(soup.find("img", class_="g-checkmark").next_sibling)
    #if biden_delegates_winner is > 0 (aka not null), it means Biden won the state so use that delegate value
    if biden_delegates_winner > 0:
        biden_delegates = biden_delegates_winner
        biden_winner = 1
    #if Biden lost the state, use biden_delegates_nonwinner value
    else:
        biden_delegates = biden_delegates_nonwinner
        biden_winner = 0
    #add delegate count & whether Biden won to list
    biden_delegates_list.append(biden_delegates)
    biden_wins_list.append(biden_winner)
    
    #if Sanders did not win, get delegate count from table row
    try:
        sanders_delegates_nonwinner = int(soup.find("td", class_="g-cand-wide g-cand g-sanders in").string)
    #if that table row is not present, it means Sanders won and need to get value as sibling of checkmark image
    except AttributeError:
        sanders_delegates_winner = int(soup.find("img", class_="g-checkmark").next_sibling)
    #if sanders_delegates_winner is > 0 (aka not null), it means Sanders won the state so use that delegate value
    if sanders_delegates_winner > 0:
        sanders_delegates = sanders_delegates_winner
        sanders_winner = 1
    #if Sanders lost the state, use sanders_delegates_nonwinner value
    else:
        sanders_delegates = sanders_delegates_nonwinner
        sanders_winner = 0
    #add delegate count & whether Sanders won to list
    sanders_delegates_list.append(sanders_delegates)
    sanders_wins_list.append(sanders_winner)
    
    

AttributeError: 'NoneType' object has no attribute 'string'

In [167]:
states_list

['Iowa',
 'New Hampshire',
 'Nevada',
 'South Carolina',
 'Alabama',
 'American Samoa',
 'Arkansas',
 'California',
 'Colorado',
 'Maine',
 'Massachussetts',
 'Minnesota',
 'North Carolina',
 'Oklahoma',
 'Tennessee',
 'Texas',
 'Utah',
 'Vermont',
 'Virginia',
 'Idaho',
 'Michigan',
 'Mississippi',
 'Missouri',
 'North Dakota',
 'Washington',
 'Northern Marianas']

In [168]:
biden_delegates_list

[6,
 0,
 9,
 39,
 44,
 0,
 17,
 163,
 12,
 11,
 37,
 38,
 67,
 21,
 33,
 111,
 5,
 5,
 66,
 11,
 72,
 34,
 44,
 6,
 37,
 2]

In [169]:
biden_wins_list

[0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0]

In [170]:
sanders_delegates_list

[12,
 9,
 24,
 15,
 8,
 0,
 9,
 211,
 21,
 9,
 29,
 27,
 37,
 13,
 19,
 102,
 13,
 11,
 31,
 9,
 52,
 2,
 24,
 8,
 37,
 4]

In [171]:
sanders_wins_list

[0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1]

In [173]:
dictionary = {'state':states_list, \
              'biden_delegates':biden_delegates_list, \
              'sanders_delegates':sanders_delegates_list, \
              'biden_win':biden_wins_list, \
              'sanders_win':sanders_wins_list}

In [177]:
df = pd.DataFrame(dictionary)

In [178]:
df.to_dict('records')

[{'state': 'Iowa',
  'biden_delegates': 6,
  'sanders_delegates': 12,
  'biden_win': 0,
  'sanders_win': 0},
 {'state': 'New Hampshire',
  'biden_delegates': 0,
  'sanders_delegates': 9,
  'biden_win': 0,
  'sanders_win': 1},
 {'state': 'Nevada',
  'biden_delegates': 9,
  'sanders_delegates': 24,
  'biden_win': 0,
  'sanders_win': 1},
 {'state': 'South Carolina',
  'biden_delegates': 39,
  'sanders_delegates': 15,
  'biden_win': 1,
  'sanders_win': 0},
 {'state': 'Alabama',
  'biden_delegates': 44,
  'sanders_delegates': 8,
  'biden_win': 1,
  'sanders_win': 0},
 {'state': 'American Samoa',
  'biden_delegates': 0,
  'sanders_delegates': 0,
  'biden_win': 0,
  'sanders_win': 0},
 {'state': 'Arkansas',
  'biden_delegates': 17,
  'sanders_delegates': 9,
  'biden_win': 1,
  'sanders_win': 0},
 {'state': 'California',
  'biden_delegates': 163,
  'sanders_delegates': 211,
  'biden_win': 0,
  'sanders_win': 0},
 {'state': 'Colorado',
  'biden_delegates': 12,
  'sanders_delegates': 21,
  'bide