In [24]:

# File:  get_fars_data_files_from_server.ipynb

from ftplib import FTP
import os
import re
import requests
from zipfile import ZipFile

# International Classification of Diseases, Ninth Revision, Clinical Modification (ICD-9-CM) codes.
# https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0092052
# https://www.cpsc.gov/s3fs-public/2018-NEISS-CPSC-only-CodingManual.pdf

# https://www.cdc.gov/mmwr/preview/mmwrhtml/00000915.htm

# https://www.nhtsa.gov/crash-data-systems/crash-report-sampling-system-crss
# https://www.nhtsa.gov/research-data
# https://www.nhtsa.gov/research-data/fatality-analysis-reporting-system-fars
    
def retrieveFilesFromFARS(local_path):
    # https://www.nhtsa.gov/research-data/fatality-analysis-reporting-system-fars
    # https://stackoverflow.com/questions/4696413/ftp-retrbinary-help-python
    # https://docs.python.org/3/library/ftplib.html
    url_base = 'ftp://ftp.nhtsa.dot.gov/fars/'

    with FTP('ftp.nhtsa.dot.gov') as ftp:    # connect to host, default port
        ftp.login()                              # user anonymous, passwd anonymous@
        ftp.cwd('fars')               # change into "appropriate" directory

        for year in range(1975, 2018):
            fname = 'FARS{}NationalCSV.zip'.format(year)
            print('Beginning file download {}'.format(fname), end = '')

            url = '{}/National/{}'.format(year, fname)

            try:
                # Open a local file for writing (binary mode)...
                # The 'with' statement ensures that the file will be closed 
                with open(local_path + fname, 'wb') as f:
                    # Define the callback as a closure so it can access the opened 
                    # file in local scope
                    def callback(data):
                        f.write(data)

                    ftp.retrbinary('RETR {}'.format(url), callback)
                    print("  complete!")
            except Exception as e:
                print()
                print("ERROR: ", e)

        ftp.quit() # This is the “polite” way to close a connection

    print('Done')


def unzipFilesInDir(local_path):
    # https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory
    try:
        with os.scandir(local_path) as dir_entry:
            for entry in dir_entry:
                if entry.is_file():
                    output_dir, extension = os.path.splitext(entry.path)
                    if extension != '.zip':
                        continue

                    entry_name_prefix, _ = os.path.splitext(entry.name)
                    output_dir = re.sub('{}$'.format(entry_name_prefix), '', output_dir)
                    output_dir += 'unzipped/{}'.format(entry_name_prefix)
                    with ZipFile(entry.path, 'r') as zipObj:
                        # Extract all the contents of zip file in different directory
                        zipObj.extractall(output_dir)
                        print('{} unzipped to {}'.format(entry.name, output_dir))
            print('Done')
    except Exception as e:
        print()
        print("ERROR: ", e)


local_path = '/Users/mcorbett/Boston University/MET CS677 - Data Science with Python/Project/data/FARS/'
# retrieveFilesFromFARS(local_path)
# unzipFilesInDir(local_path)

flist =  [os.path.join(r,file) for r,d,f in os.walk(local_path) for file in f]
flist
for fname in flist:
    if fname.endswith('ACCIDENT.CSV'):
        print(fname)

/Users/mcorbett/Boston University/MET CS677 - Data Science with Python/Project/FARS_data/unzipped/FARS1981NationalCSV/ACCIDENT.CSV
/Users/mcorbett/Boston University/MET CS677 - Data Science with Python/Project/FARS_data/unzipped/FARS2009NationalCSV/ACCIDENT.CSV
/Users/mcorbett/Boston University/MET CS677 - Data Science with Python/Project/FARS_data/unzipped/FARS1990NationalCSV/ACCIDENT.CSV
/Users/mcorbett/Boston University/MET CS677 - Data Science with Python/Project/FARS_data/unzipped/FARS2004NationalCSV/ACCIDENT.CSV
/Users/mcorbett/Boston University/MET CS677 - Data Science with Python/Project/FARS_data/unzipped/FARS1976NationalCSV/ACCIDENT.CSV
/Users/mcorbett/Boston University/MET CS677 - Data Science with Python/Project/FARS_data/unzipped/FARS1989NationalCSV/ACCIDENT.CSV
/Users/mcorbett/Boston University/MET CS677 - Data Science with Python/Project/FARS_data/unzipped/FARS2001NationalCSV/ACCIDENT.CSV
/Users/mcorbett/Boston University/MET CS677 - Data Science with Python/Project/FARS