In this part, you will obtain as much data as you can on the campaign contributions received by each candidate. This data is available through the website https://www.opensecrets.org/.

**Part 1: Data Gathering**
1. Start by scraping the data from the summary page for Tennessee's 7th District, which is available at https://www.opensecrets.org/races/candidates?cycle=2020&id=TN07&spec=N.
- Make a DataFrame showing, for each candidate:
    - the candidate's name
    - the candidate's party
    - state
    - district number
    - whether the candidate was an incumbent
    - whether the candidate won the race
    - the percentage of the vote that candidate received
    - the total amount raised by that candidate (as a numeric variable)
    - the total amount spent by the candidate (as a numeric variable)

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

In [2]:
URL = 'https://www.opensecrets.org/races/candidates?cycle=2020&id=TN07&spec=N'
response = requests.get(URL)
response.text

'<!DOCTYPE html>\n<!--[if lte IE 8]><html class="no-js lte-ie9 lte-ie8" lang="en" dir="ltr"><![endif]-->\n<!--[if IE 9]><html class="no-js lte-ie9" lang="en" dir="ltr"><![endif]-->\n<!--[if gt IE 9]><!-->\n\n      <meta name="scrim_frequency" content="Daily" />\n\n<html class="no-js" lang="en" dir="ltr">\n<!--<![endif]-->\n<head>\n  <!-- Google Tag Manager -->\n  <script>\n    (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({\'gtm.start\':\n      new Date().getTime(),event:\'gtm.js\'});var f=d.getElementsByTagName(s)[0],\n      j=d.createElement(s),dl=l!=\'dataLayer\'?\'&l=\'+l:\'\';j.async=true;j.src=\n      \'https://www.googletagmanager.com/gtm.js?id=\'+i+dl;f.parentNode.insertBefore(j,f);\n      })(window,document,\'script\',\'dataLayer\',\'GTM-MKPRXNV\');\n  </script>\n  <!-- End Google Tag Manager -->\n  <meta charset="utf-8">\n<script type="text/javascript">window.NREUM||(NREUM={});NREUM.info={"beacon":"bam.nr-data.net","errorBeacon":"bam.nr-data.net","licenseKey":"NRJS-b7f93d6e9af

In [3]:
soup = BeautifulSoup(response.text)
print(soup.prettify())

<!DOCTYPE html>
<!--[if lte IE 8]><html class="no-js lte-ie9 lte-ie8" lang="en" dir="ltr"><![endif]-->
<!--[if IE 9]><html class="no-js lte-ie9" lang="en" dir="ltr"><![endif]-->
<!--[if gt IE 9]><!-->
<html>
 <head>
  <meta content="Daily" name="scrim_frequency"/>
  <!--<![endif]-->
  <!-- Google Tag Manager -->
  <script>
   (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
      new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
      j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
      'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);
      })(window,document,'script','dataLayer','GTM-MKPRXNV');
  </script>
  <!-- End Google Tag Manager -->
  <meta charset="utf-8"/>
  <script type="text/javascript">
   window.NREUM||(NREUM={});NREUM.info={"beacon":"bam.nr-data.net","errorBeacon":"bam.nr-data.net","licenseKey":"NRJS-b7f93d6e9afef1935a5","applicationID":"1109265918","transactionName":"dFpeQkdcWQoGR

In [4]:
# the percentage of the vote that candidate received
headings = soup.findAll(class_ = 'Members--vote-pct')
vote = [heading.get_text().strip() for heading in headings]
for i in range(len(vote)):
    vote[i] = vote[i].replace('(', '').replace(' of vote)', '')

print(vote)

['69.9%', '27.3%', '2.2%', '0.6%']


In [5]:
soup.findAll('div', 'Members--list-item')

[<div class="Members--list-item">
 <div class="Members--bio u-richtext">
 <h2><strong>
 <a href="/members-of-congress/mark-green/summary?cid=N00041873">Mark Green (R) • <i>Incumbent</i></a>
  • <span class="winner"><i>Winner</i></span> <br/>
 <span class="Members--vote-pct">(69.9% of vote)</span>
 </strong></h2>
 </div>
 <table border="0" cellpadding="1" cellspacing="0" class="Members--table">
 <tr>
 <td>Raised:</td>
 <td class="Members--number">$1,194,960</td>
 <td width="33%">
 <div class="Members--bar-wrapper">
 <div class="Members--bar Members--bar-indivs" style="width:100.0%;"></div>
 </div>
 </td>
 </tr>
 <tr>
 <td>Spent:</td>
 <td class="Members--number">$935,487</td>
 <td width="33%">
 <div class="Members--bar-wrapper">
 <div class="Members--bar Members--bar-indivs" style="width:78.28599300862228%;"></div>
 </div>
 </td>
 </tr>
 <tr>
 <td>Cash on Hand:</td>
 <td class="Members--number">$287,889</td>
 <td width="33%">
 <div class="Members--bar-wrapper">
 <div class="Members--bar

In [6]:
# Find all h2 and strong elements
h2_and_strong_elements = soup.find_all('h2', 'strong')

# Extract and print the text from these elements
for element in h2_and_strong_elements:
    text = element.get_text().strip()  # Get and strip any leading/trailing whitespace
    element.unwrap()  # Remove the element, preserving its inner text
    print(text)

In [7]:
# the total amount raised by that candidate (as a numeric variable)
headings = soup.findAll('td','Members--number') # tag, class
amount_raised = [heading.get_text().strip() for heading in headings]
amount_raised = amount_raised[::3] # retrieve every 4th value
amount_raised

['$1,194,960', '$206,644', '$1,750', '$655']

In [8]:
# the total amount spent by that candidate (as a numeric variable)
headings = soup.findAll('td','Members--number') # tag, class
amount_spent = [heading.get_text().strip() for heading in headings]
amount_spent = amount_spent[1::3] # start at 2nd object, retrieve every 4th value
amount_spent

['$935,487', '$207,191', '$0', '$1,049']

In [9]:
# whether the candidate won the race
soup.findAll('span', 'winner')

[<span class="winner"><i>Winner</i></span>]

In [10]:
headings = soup.findAll('h2')
name = [heading.get_text().strip() for heading in headings]
name

['We follow the money. You make it possible.',
 'We follow the money. You make it happen.',
 'Mark Green (R) • Incumbent\n • Winner \n(69.9% of vote)',
 'Kiran Sreepada (D)\n\t\t\t\t\t\t\t\t\t\t\n(27.3% of vote)',
 'Ronald Brown (I)\n\t\t\t\t\t\t\t\t\t\t\n(2.2% of vote)',
 'Scott Vieira Jr (I)\n\t\t\t\t\t\t\t\t\t\t\n(0.6% of vote)',
 'We follow the money. You make it possible.',
 'Count Cash & Make Change']

In [11]:
headings = soup.findAll('h2')
name = []

for heading in headings:
    # Remove the <span> element with class "Members--vote-pct"
    span_element = heading.find('span')
    
    if span_element:
        span_element.decompose()
    
    name.append(heading.get_text().strip())

# Print the names after removing the <span> elements and specific <h2> elements
name

['We follow the money.  make it possible.',
 'We follow the money. You make it happen.',
 'Mark Green (R) • Incumbent\n •  \n(69.9% of vote)',
 'Kiran Sreepada (D)',
 'Ronald Brown (I)',
 'Scott Vieira Jr (I)',
 'We follow the money.  make it possible.',
 'Count Cash & Make Change']

In [12]:
soup.findAll('h2')

[<h2 class="Donate-title f-strata-title c-navy" data-orphans='{"words":"2","wrap":"true"}'>
             We follow the money.  make it possible.
         </h2>,
 <h2 class="f-strata-title f-uppercase" style="color: white; fill: white; text-align: center; font-size: 260%;"> We follow the money. You make it happen.</h2>,
 <h2><strong>
 <a href="/members-of-congress/mark-green/summary?cid=N00041873">Mark Green (R) • <i>Incumbent</i></a>
  •  <br/>
 <span class="Members--vote-pct">(69.9% of vote)</span>
 </strong></h2>,
 <h2><strong>
 										Kiran Sreepada (D)
 										<br/>
 
 </strong></h2>,
 <h2><strong>
 										Ronald Brown (I)
 										<br/>
 
 </strong></h2>,
 <h2><strong>
 										Scott Vieira Jr (I)
 										<br/>
 
 </strong></h2>,
 <h2 class="Donate-title f-strata-title c-navy" data-orphans='{"words":"2","wrap":"true"}'>
             We follow the money.  make it possible.
         </h2>,
 <h2 class="StayConnected-title c-white" data-orphans="">Count Cash &amp; Make

In [13]:
soup.findAll('h2', class_="Donate-title f-strata-title c-navy")

[<h2 class="Donate-title f-strata-title c-navy" data-orphans='{"words":"2","wrap":"true"}'>
             We follow the money.  make it possible.
         </h2>,
 <h2 class="Donate-title f-strata-title c-navy" data-orphans='{"words":"2","wrap":"true"}'>
             We follow the money.  make it possible.
         </h2>]

In [14]:
soup.findAll('h2', "StayConnected-title c-white")

[<h2 class="StayConnected-title c-white" data-orphans="">Count Cash &amp; Make Change</h2>]

headings = soup.findAll('h2')
name = []

for heading in headings:
    # Remove the <span> element with class "Members--vote-pct"
    span_element = heading.find('span', 'Members--vote-pct')
    h2_element = heading.find('h2', 'Donate-title f-strata-title c-navy')
    h2_element2 = heading.find('h2', 'StayConnected-title c-white')
    
    if span_element:
        span_element.decompose()
    if h2_element:
        h2_element.decompose()    
    if h2_element2:
        h2_element2.decompose()
        
    name.append(heading.get_text().strip())

print(n)

def remove_elements(soup, tag_name, class_name):
    elements = soup.find_all(tag_name, class_=class_name)
    for element in elements:
        element.decompose()


remove_elements(soup, 'h2', 'Donate-title f-strata-title c-navy')
remove_elements(soup, 'h2', 'StayConnected-title c-white')
remove_elements(soup, 'h2', 'f-strata-title f-uppercase')
remove_elements(soup, 'i', None)
remove_elements(soup, 'span', 'Members--vote-pct')


name = [element.get_text().replace('•', '').replace(' (R) ', '').replace(' (D) ', '').replace(' (I) ', '').strip() for element in soup.find_all('h2')]
name

In [22]:
def remove_elements(soup, tag_name, class_name):
    """
    remove elements by tag and class name
    """
    elements = soup.find_all(tag_name, class_=class_name)
    for element in elements:
        element.decompose()

# Remove the specific elements by class name
remove_elements(soup, 'h2', 'Donate-title f-strata-title c-navy')
remove_elements(soup, 'h2', 'StayConnected-title c-white')
remove_elements(soup, 'h2', 'f-strata-title f-uppercase')
remove_elements(soup, 'i', None)
remove_elements(soup, 'span', 'Members--vote-pct')

# Get the cleaned text
name = [re.sub(r'\s\([RDI]\)\s', '', element.get_text()).replace('•', '').strip() for element in soup.find_all('h2')]
party = [re.search(r'\((.*?)\)', element.get_text()).group(1) for element in soup.find_all('h2')]

In [23]:
name

['Mark Green', 'Kiran Sreepada', 'Ronald Brown', 'Scott Vieira Jr']

In [24]:
party

['R', 'D', 'I', 'I']

In [19]:
def remove_elements(soup, tag_name, class_name):
    """
    remove elements by tag and class name
    """
    elements = soup.find_all(tag_name, class_=class_name)
    for element in elements:
        element.decompose()

# Remove the specific elements by class name
remove_elements(soup, 'h2', 'Donate-title f-strata-title c-navy')
remove_elements(soup, 'h2', 'StayConnected-title c-white')
remove_elements(soup, 'h2', 'f-strata-title f-uppercase')
remove_elements(soup, 'i', None)
remove_elements(soup, 'span', 'Members--vote-pct')

# Get the cleaned text
party = [re.search(r'\((.*?)\)', element.get_text()).group(1) for element in soup.find_all('h2')]
party

['R', 'D', 'I', 'I']