# Download Election Dataset
 
Download our State Board of Elections dataset from http://nc-campaign-finance-storage.s3-website-us-east-1.amazonaws.com/

In [None]:
pip install requests python-dateutil

In [None]:
from datetime import datetime, timezone
from dateutil import parser
from os import utime
from pathlib import Path
import requests

DATA_URL = "http://nc-campaign-finance-storage.s3.amazonaws.com/sboe-raw-files"
DATA_DIR = "./data"

raw_files = [
    "raw_files/contributions/contributions_20100101-20101231.csv",
    "raw_files/contributions/contributions_20110101-20111231.csv",
    "raw_files/contributions/contributions_20120101-20121231.csv",
    "raw_files/contributions/contributions_20130101-20131231.csv",
    "raw_files/contributions/contributions_20140101-20141231.csv",
    "raw_files/contributions/contributions_20150101-20151231.csv",
    "raw_files/contributions/contributions_20160101-20161231.csv",
    "raw_files/contributions/contributions_20170101-20171231.csv",
    "raw_files/contributions/contributions_20180101-20181231.csv",
    "raw_files/contributions/contributions_20190101-20191231.csv",
    "raw_files/contributions/contributions_20200101_20200630.csv",
    "raw_files/contributions/contributions_20200701_20201231.csv",
    "raw_files/expenses/expenses_20100101_20101231.csv",
    "raw_files/expenses/expenses_20110101_20111231.csv",
    "raw_files/expenses/expenses_20120101_20121231.csv",
    "raw_files/expenses/expenses_20130101_20131231.csv",
    "raw_files/expenses/expenses_20140101_20141231.csv",
    "raw_files/expenses/expenses_20150101_20151231.csv",
    "raw_files/expenses/expenses_20160101_20161231.csv",
    "raw_files/expenses/expenses_20170101_20171231.csv",
    "raw_files/expenses/expenses_20180101_20181231.csv",
    "raw_files/expenses/expenses_20190101_20191231.csv",
    "raw_files/expenses/expenses_20200101_20201231.csv"
]

def download_file(url, path):
    with requests.get(url, stream=True) as response:
        response.raise_for_status()
        remote_size = int(response.headers["Content-Length"])
        remote_mtime = parser.parse(response.headers["Last-Modified"])
        if path.exists():
            stats = path.stat()
            local_size = stats.st_size
            local_mtime = datetime.fromtimestamp(stats.st_mtime, timezone.utc)

            if local_size == remote_size and local_mtime == remote_mtime:
                print(f'{path} skipped (already downloaded)')
                return True
        else:
            path.parent.mkdir(parents=True, exist_ok=True)

        try:
            with open(path, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
            remote_ts = remote_mtime.timestamp()
            os.utime(path, times=(remote_ts, remote_ts))
            print(f'{path} downloaded')
            return True
        except:
            path.unlink()
            print(f'{path} incomplete, deleted')
            return False


interrupted = False

for file in raw_files:
    url = f"{DATA_URL}/{file}"
    path = Path(DATA_DIR, file)
    if not download_file(url, path):
        interrupted = True
        break

if interrupted:
    print('Downloads were interrupted')
else:
    print("Downloads complete")
