In [10]:
import shutil
import urllib.request as request
from contextlib import closing
import os

# Get National Survey of Family Growth Data

In [11]:
nsfg_files = [
    '2015_2017_FemPregData.dat', 
    '2015_2017_FemRespData.dat', 
    '2015_2017_MaleData.dat',
    'stata/2015_2017_FemPregSetup.dct', # Stata dictionaries
    'stata/2015_2017_FemRespSetup.dct',
    'stata/2015_2017_MaleSetup.dct',
]
try:
    os.mkdir('./stata')
except FileExistsError:
    pass

def get_nsfg_data(files):
    """The NSFG data is provided on an FTP server."""
    server_url = 'ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/'
    
    for file in files:
        with closing(request.urlopen(server_url+file)) as r:
            with open(file, 'wb') as f:
                shutil.copyfileobj(r, f)
    

In [12]:
# Note: this sometimes takes a few minutes.
get_nsfg_data(nsfg_files)

# Save all data file names to a file for use by our data cleaner

In [13]:
# Delete and rewrite the file if it exists
if os.path.isfile('all_data_files.txt'):
    os.remove('all_data_files.txt')

for files in [nsfg_files,]: # Can add more lists of data files here if desired
    for file in files:
        with open('all_data_files.txt','a') as f:
            f.write(file+'\n')