In [1]:
import pandas as pd

In [2]:
url = 'https://www.nrcs.usda.gov/wps/portal/nrcs/detail/?cid=nrcs143_013696'

# Pandas

In [3]:
# read table data from webpage
table_data = pd.read_html(url)

In [4]:
# specify table
table_data[0]

Unnamed: 0,Name,Postal Code,FIPS
0,Alabama,AL,1.0
1,Alaska,AK,2.0
2,Arizona,AZ,4.0
3,Arkansas,AR,5.0
4,California,CA,6.0
5,Colorado,CO,8.0
6,Connecticut,CT,9.0
7,Delaware,DE,10.0
8,Florida,FL,12.0
9,Georgia,GA,13.0


In [5]:
# save to file
# table_data[0].to_csv('table_scrape.csv')

# BeautifulSoup

In [6]:
import requests
from bs4 import BeautifulSoup

In [7]:
# parse webpage
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

In [8]:
# specify table to scrape
table = soup.find('table', class_='data')

In [9]:
# select all table rows
table_data = table.tbody.find_all("tr")
table_data[0:3]

[<tr>
 <th scope="col">
 				Name</th>
 <th scope="col">
 				Postal Code</th>
 <th scope="col">
 				FIPS</th>
 </tr>, <tr>
 <td>
 				Alabama</td>
 <td>
 				AL</td>
 <td>
 				01</td>
 </tr>, <tr>
 <td>
 				Alaska</td>
 <td>
 				AK</td>
 <td>
 				02</td>
 </tr>]

In [10]:
# extract heading data
headings = []
for heading in table_data[0].find_all("th"):
    # add header to list; remove newlines and spaces
    headings.append(heading.text.replace('\n', ' ').strip())

In [11]:
headings

['Name', 'Postal Code', 'FIPS']

In [12]:
# extract table data
data = []
# iterate through each row (skip heading row)
for row in table_data[1:]:
    row_data = {}
    # iterate through each column in row
    for heading, column in zip(headings, row.find_all("td")):
        # save column values for each heading to dictionary
        row_data[heading] = column.text.replace('\n', '').strip()
    # add data for each row to data list
    data.append(row_data)

In [13]:
# create dataframe with extracted data list
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Postal Code,FIPS
0,Alabama,AL,1
1,Alaska,AK,2
2,Arizona,AZ,4
3,Arkansas,AR,5
4,California,CA,6
5,Colorado,CO,8
6,Connecticut,CT,9
7,Delaware,DE,10
8,Florida,FL,12
9,Georgia,GA,13


In [14]:
# save to file
# df.to_csv('table_scrape.csv')

# Selenium

In [15]:
from selenium import webdriver

In [16]:
# load url into selenium
drv_loc = '/usr/bin/chromedriver'
driver = webdriver.Chrome(drv_loc)
driver.get(url)

In [17]:
# extract heading data
headings = []
for heading in driver.find_elements_by_xpath('//table[@class="data"]/tbody/tr/th'):
    # add header to list; remove newlines and spaces
    headings.append(heading.text)

In [18]:
headings

['Name', 'Postal Code', 'FIPS']

In [19]:
# extract table data
data = []
# iterate through each row (skip heading row)
for row in driver.find_elements_by_xpath('//table[@class="data"]/tbody/tr')[1:]:
    row_data = {}
    # iterate through each column in row
    for heading, column in zip(headings, row.find_elements_by_tag_name('td')):
        # save column values for each heading to dictionary
        row_data[heading] = column.text
    # add data for each row to data list
    data.append(row_data)

In [20]:
driver.close()

In [21]:
# create dataframe with extracted data list
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Postal Code,FIPS
0,Alabama,AL,1
1,Alaska,AK,2
2,Arizona,AZ,4
3,Arkansas,AR,5
4,California,CA,6
5,Colorado,CO,8
6,Connecticut,CT,9
7,Delaware,DE,10
8,Florida,FL,12
9,Georgia,GA,13


In [22]:
# save to file
# df.to_csv('table_scrape.csv')