# C-SPAN Tracker
Grabbing candidate info from their C-SPAN pages
<hr>

## Importing modules

In [68]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import re
from selenium import webdriver
import datetime as dt  
import numpy as np
from dotenv import load_dotenv
load_dotenv()
import os
import json

api_key = os.getenv("GMAP_API")
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
matplotlib.rcParams['pdf.fonttype'] = 42
import seaborn as sns

%matplotlib inline
plt.style.use('fivethirtyeight')

## Booting up BeautifulSoup
Grabbing candidate information from the Road to the White House Candidates page

In [2]:
response = requests.get("https://www.c-span.org/series/?roadToTheWhiteHouse#candidates-tab")
doc = BeautifulSoup(response.text)

In [3]:
## Zoning in on the running Democratic candidates
dems = doc.find("ul", class_="candidate-list").find_all("li", class_="D")
len(dems)

23

In [4]:
# All the info from the all-candidate page

rows = []

for dem in dems:
    row = {}
    row['Name'] = dem.find(class_="candidate D").text.strip()
    row['Announce Date'] = dem.find(class_="datetime").text.strip()
    row['Bio'] = dem.find(class_="candidate D")["href"]
    try:
        row['Slug'] = dem.find(class_="candidate-links").find_all("a")[1]['href']
    except: pass
    
    rows.append(row)

# rows

In [5]:
df = pd.DataFrame(rows, columns=["Name", "Slug", "Announce Date", "Bio"])

In [6]:
df.Bio = df.Bio.astype(str)
df.Bio

0          //www.c-span.org/person/?michaelbennet
1            //www.c-span.org/person/?josephbiden
2             //www.c-span.org/person/?corybooker
3         //www.c-span.org/person/?stevebullock02
4          //www.c-span.org/person/?petebuttigieg
5           //www.c-span.org/person/?juliancastro
6          //www.c-span.org/person/?johndelaney02
7           //www.c-span.org/person/?tulsigabbard
8      //www.c-span.org/person/?kirstengillibrand
9           //www.c-span.org/person/?kamalaharris
10     //www.c-span.org/person/?johnwhickenlooper
11             //www.c-span.org/person/?jayinslee
12          //www.c-span.org/person/?amyklobuchar
13           //www.c-span.org/person/?sethmoulton
14           //www.c-span.org/person/?betoorourke
15             //www.c-span.org/person/?timryan03
16         //www.c-span.org/person/?berniesanders
17          //www.c-span.org/person/?josephsestak
18          //www.c-span.org/person/?ericswalwell
19       //www.c-span.org/person/?elizabethwarren


## Cleaning it up a bit

In [7]:
df["Announce Date"] = df["Announce Date"].str.extract(":\s(.*)")
df['Slug'] = df['Slug'].str.extract("(personid.*)")
df['Bio'] = df['Bio'].str.extract("\?(.*)")

In [8]:
df.Bio.astype(str)

0          michaelbennet
1            josephbiden
2             corybooker
3         stevebullock02
4          petebuttigieg
5           juliancastro
6          johndelaney02
7           tulsigabbard
8      kirstengillibrand
9           kamalaharris
10     johnwhickenlooper
11             jayinslee
12          amyklobuchar
13           sethmoulton
14           betoorourke
15             timryan03
16         berniesanders
17          josephsestak
18          ericswalwell
19       elizabethwarren
20    mariannewilliamson
21            andrewyang
22          billdeblasio
Name: Bio, dtype: object

In [9]:
df.iloc[17,1] = "personid[]=1020918"

## Getting the date's formatted correctly

In [10]:
df['Announce Date'] = pd.to_datetime(df['Announce Date'], format="%B %d, %Y", errors='coerce')
df['Announce Date'] = df['Announce Date'].dt.strftime("%m/%d/%Y")

## Scraping the bio pages

In [11]:
def scrape_bio(row):
    regexp = re.compile(r'http://bioguide.*')
    url = "https://www.c-span.org/person/?" + row["Bio"]
    
    response = requests.get(url)
    doc = BeautifulSoup(response.text)
    for li in doc.find_all("li"):
        li.decompose()
    doc.find('footer').decompose()
    profile = doc.find(id="content")
    
    ## Print out the slug column
    print("Scraping", url)
    
    bio = {}

    bio['Name'] = profile.find('h1').text.strip()
    bio['IMG'] = profile.find('img')['src']
    bio['Official Bio'] = profile.find(href=regexp)
    
    bio['Description'] = []
    for desc in profile.find_all(itemprop="description"):
        bio['Description'].append(desc.text.strip())
    
    bio['Titles'] = []
    for titles in doc.find_all('span', class_=['title']):
        bio['Titles'].append(titles.text.strip())
    
    return pd.Series(bio)

In [12]:
bio_df = df.apply(scrape_bio, axis=1)

Scraping https://www.c-span.org/person/?michaelbennet
Scraping https://www.c-span.org/person/?michaelbennet
Scraping https://www.c-span.org/person/?josephbiden
Scraping https://www.c-span.org/person/?corybooker
Scraping https://www.c-span.org/person/?stevebullock02
Scraping https://www.c-span.org/person/?petebuttigieg
Scraping https://www.c-span.org/person/?juliancastro
Scraping https://www.c-span.org/person/?johndelaney02
Scraping https://www.c-span.org/person/?tulsigabbard
Scraping https://www.c-span.org/person/?kirstengillibrand
Scraping https://www.c-span.org/person/?kamalaharris
Scraping https://www.c-span.org/person/?johnwhickenlooper
Scraping https://www.c-span.org/person/?jayinslee
Scraping https://www.c-span.org/person/?amyklobuchar
Scraping https://www.c-span.org/person/?sethmoulton
Scraping https://www.c-span.org/person/?betoorourke
Scraping https://www.c-span.org/person/?timryan03
Scraping https://www.c-span.org/person/?berniesanders
Scraping https://www.c-span.org/person/?

In [13]:
bio_df.to_csv("candidate_info.csv", index=False)

In [14]:
bio_df

Unnamed: 0,Name,IMG,Official Bio,Description,Titles
0,Michael Bennet,https://images.c-span.org/Files/6de/2019062300...,"<a href=""http://bioguide.congress.gov/scripts/...",[Michael Farrand Bennet is an American busines...,"[U.S. Senator, D-CO View Map]"
1,Joe Biden Jr.,https://images.c-span.org/Files/993/2019062423...,"<a href=""http://bioguide.congress.gov/scripts/...",[Joseph Robinette Biden Jr. is an American pol...,"[U.S. Vice President, United StatesD, U.S. Sen..."
2,Cory Booker,https://images.c-span.org/Files/dbe/2019062423...,"<a href=""http://bioguide.congress.gov/scripts/...",[Cory Anthony Booker is an American politician...,"[U.S. Senator, D-NJ View Map, Mayor, Newark, N..."
3,Steve Bullock,https://images.c-span.org/Files/ff2/2019051721...,,[On the C-SPAN Networks:Steve Bullock is a Gov...,"[Governor, MontanaD-Montana, Chair, Democratic..."
4,Pete Buttigieg,https://images.c-span.org/Files/e28/2019062221...,,[On the C-SPAN Networks:Pete Buttigieg is a Ma...,"[Mayor, South Bend, IND]"
5,Julian Castro,https://images.c-span.org/Files/fc6/2019062222...,,[On the C-SPAN Networks:Julian Castro was a Se...,"[Secretary, Department of Housing and Urban De..."
6,John K. Delaney,https://images.c-span.org/Files/26c/2019062222...,"<a href=""http://bioguide.congress.gov/scripts/...",[John Kevin Delaney is an American politician ...,"[U.S. Representative, D-MD 6th View Map, U.S. ..."
7,Tulsi Gabbard,https://images.c-span.org/Files/793/2019062913...,"<a href=""http://bioguide.congress.gov/scripts/...",[Tulsi Gabbard is an American politician servi...,"[U.S. Representative, D-HI 2nd View Map, Veter..."
8,Kirsten E. Gillibrand,https://images.c-span.org/Files/991/2019062220...,"<a href=""http://bioguide.congress.gov/scripts/...",[Kirsten Elizabeth Gillibrand is an American a...,"[U.S. Senator, D-NY View Map, U.S. Representat..."
9,Kamala D. Harris,https://images.c-span.org/Files/e2a/2019062421...,"<a href=""http://bioguide.congress.gov/scripts/...",[Kamala Devi Harris is an American attorney an...,"[U.S. Senator, D-CA View Map, District Attorne..."
