In [2]:
import geopandas as gpd
import numpy as np
from pathlib import Path
import os
import requests
import pandas as pd

In [4]:
ls tl_2019_us_primaryroads

[31mtl_2019_us_primaryroads.cpg[m[m*            [31mtl_2019_us_primaryroads.shp.ea.iso.xml[m[m*
[31mtl_2019_us_primaryroads.dbf[m[m*            tl_2019_us_primaryroads.shp.iso.gfs
[31mtl_2019_us_primaryroads.prj[m[m*            [31mtl_2019_us_primaryroads.shp.iso.xml[m[m*
[31mtl_2019_us_primaryroads.shp[m[m*            [31mtl_2019_us_primaryroads.shx[m[m*


In [5]:
gdf = gpd.read_file('tl_2019_us_primaryroads/tl_2019_us_primaryroads.shp')

# Download US Census Bureau TIGER Data

In [10]:
f'{2:02d}'

'02'

In [38]:
len(STATES), len(NUMS)

(51, 51)

In [42]:
FOLDERNAMES[20]

'24_MARYLAND'

In [6]:
STATES = ('alabama', 'alaska', 'arizona', 'arkansas', 'california', 'colorado', 'connecticut',
         'delaware', 'district of columbia', 'florida', 'georgia', 'hawaii', 'idaho', 'illinois',
         'indiana', 'iowa', 'kansas', 'kentucky', 'louisiana', 'maine', 'maryland', 'massachusetts', 'michigan',
         'minnesota', 'mississippi', 'missouri', 'montana', 'nebraska', 'nevada', 'new hampshire', 
         'new jersey', 'new mexico', 'new york', 'north carolina', 'north dakota', 'ohio', 'oklahoma',
         'oregon', 'pennsylvania', 'rhode island', 'south carolina', 'south dakota', 'tennessee', 
         'texas', 'utah', 'vermont', 'virginia', 'washington', 'west virginia', 'wisconsin', 'wyoming')

NUMS = (1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
       27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 
       49, 50, 51, 53, 54, 55, 56)

In [7]:
base_url = "https://www2.census.gov/geo/tiger/TIGER_RD18/STATE"


In [8]:
def gen_state_folder(state, num):
    return '_'.join([f'{num:02d}', state.upper().replace(' ', '_')]) 

In [None]:
states = []
nums = []
state_folders = []
idnums = []
fullpaths = []

for state, num in zip(STATES, NUMS):
    state_folder = gen_state_folder(state, num)
    state_url = '/'.join([base_url, state_folder])
    
    index = 1
    
    while True:
        idnum = f'{num:02d}{index:03d}'

        #print(f'{state_folder=} {idnum=}')

        zipname = f'tl_rd22_{idnum}_roads.zip'
        full_path = '/'.join([state_folder, idnum, zipname])

        full_url = '/'.join([state_url, idnum, zipname])

        #print(f'{full_url=}')
        
        r = requests.get(full_url)
        
        if r.ok:
            savepath = Path(state_folder, zipname)
            savepath.parent.mkdir(exist_ok=True)
            with open(savepath, 'wb') as f:
                f.write(r.content)
                
            states.append(state)
            nums.append(num)
            state_folders.append(state_folder)
            idnums.append(idnum)
            fullpaths.append(full_path)
        else:
            print(f'Failed to fetch {full_url}, moving to next state')
            break
        
        index += 2

Failed to fetch https://www2.census.gov/geo/tiger/TIGER_RD18/STATE/01_ALABAMA/01135/tl_rd22_01135_roads.zip, moving to next state
Failed to fetch https://www2.census.gov/geo/tiger/TIGER_RD18/STATE/02_ALASKA/02001/tl_rd22_02001_roads.zip, moving to next state
Failed to fetch https://www2.census.gov/geo/tiger/TIGER_RD18/STATE/04_ARIZONA/04029/tl_rd22_04029_roads.zip, moving to next state
Failed to fetch https://www2.census.gov/geo/tiger/TIGER_RD18/STATE/05_ARKANSAS/05151/tl_rd22_05151_roads.zip, moving to next state
Failed to fetch https://www2.census.gov/geo/tiger/TIGER_RD18/STATE/06_CALIFORNIA/06117/tl_rd22_06117_roads.zip, moving to next state
Failed to fetch https://www2.census.gov/geo/tiger/TIGER_RD18/STATE/08_COLORADO/08127/tl_rd22_08127_roads.zip, moving to next state
Failed to fetch https://www2.census.gov/geo/tiger/TIGER_RD18/STATE/09_CONNECTICUT/09017/tl_rd22_09017_roads.zip, moving to next state
Failed to fetch https://www2.census.gov/geo/tiger/TIGER_RD18/STATE/10_DELAWARE/100

In [12]:
df = pd.DataFrame({'state': states, 
                   'statenum': nums, 
                   'statefolder': state_folders,
                   'idnum': idnums,
                   'fullpath': fullpaths
                  })

In [14]:
df.to_csv('state-roads-data.csv', index=False)

In [45]:
r = requests.get('https://www2.census.gov/geo/tiger/TIGER_RD18/STATE/24_MARYLAND/')

In [46]:
r.content

b'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">\n<html>\n <head>\n  <title>Index of /geo/tiger/TIGER_RD18/STATE/24_MARYLAND</title>\n </head>\n <body>\n<!-- GOV BANNER AND SIMPLE HEADER LAST UPDATED 11/17/2021 -->\n<!DOCTYPE html>\n<html>\n  <head>\n    <meta charset="utf-8" />\n    <script src="https://www.census.gov/main/uswds/uswds-2.12.0/js/uswds-init.min.js"></script>\n   <!-- <link rel="stylesheet" href="https://www.census.gov/main/uswds/uswds-2.12.0/css/uswds.min.css" /> -->\n  <!--  <link rel="stylesheet" href="https://www.census.gov/main/.in/css/all_header_styles.css" /> -->\n  <link rel="stylesheet" href="/main/css/all_header_styles.css" />\n   \n   \n  </head>\n  <body>\n    <script src="https://www.census.gov/main/uswds/uswds-2.12.0/js/uswds.min.js"></script>\n    <section class="usa-banner" aria-label="Official government website">\n        <div class="usa-accordion">\n          <header class="usa-banner__header">\n            <div class="usa-banner__inner">\n   

# Load manifest

In [3]:
manifest = pd.read_csv('state-roads-data.csv')

In [4]:
manifest

Unnamed: 0,state,statenum,statefolder,idnum,fullpath
0,alabama,1,01_ALABAMA,1001,01_ALABAMA/01001/tl_rd22_01001_roads.zip
1,alabama,1,01_ALABAMA,1003,01_ALABAMA/01003/tl_rd22_01003_roads.zip
2,alabama,1,01_ALABAMA,1005,01_ALABAMA/01005/tl_rd22_01005_roads.zip
3,alabama,1,01_ALABAMA,1007,01_ALABAMA/01007/tl_rd22_01007_roads.zip
4,alabama,1,01_ALABAMA,1009,01_ALABAMA/01009/tl_rd22_01009_roads.zip
...,...,...,...,...,...
2683,wyoming,56,56_WYOMING,56037,56_WYOMING/56037/tl_rd22_56037_roads.zip
2684,wyoming,56,56_WYOMING,56039,56_WYOMING/56039/tl_rd22_56039_roads.zip
2685,wyoming,56,56_WYOMING,56041,56_WYOMING/56041/tl_rd22_56041_roads.zip
2686,wyoming,56,56_WYOMING,56043,56_WYOMING/56043/tl_rd22_56043_roads.zip


In [12]:
for state, num in zip(STATES, NUMS):
    state_folder = gen_state_folder(state, num)
    state_url = '/'.join([base_url, state_folder, f'{num:02d}'])
    zipname = f'tl_rd22_{num:02d}_prisecroads.zip'
    full_url = '/'.join([state_url, zipname])
    
    r = requests.get(full_url)
    
    if r.ok:
        savepath = Path('us_tiger_roads', state_folder, zipname)
        savepath.parent.mkdir(exist_ok=True)
        with open(savepath, 'wb') as f:
            f.write(r.content)
    else:
        print(f'Failed to fetch {full_url}...')