In [None]:
# Data handling libraries
import pandas as pd
#import fiona
#import geopandas as gpd

# Misc helpers
import json
from datetime import datetime
import os

# Authentication
from configparser import ConfigParser

config = ConfigParser()
config.read("../.env")
# FROM: https://resourcewatch.carto.com/u/wri-rw/your_apps
carto_api_token = config.get("auth", "carto_api_token")

# URL interactions
import requests as req

# Libraries for downloading data from FTP
import shutil
import urllib.request as obj_req
from contextlib import closing

remote_path = "ftp://satepsanone.nesdis.noaa.gov/FIRE/HMS/GIS/"

# data upload
import boto3
import sys
import threading

s3_upload = boto3.client("s3")
s3_download = boto3.resource("s3")
s3_bucket = "wri-public-data"
s3_folder = "resourcewatch/"
zipped_file_name = "yesterday_smoke_shapefile"

class ProgressPercentage(object):
        def __init__(self, filename):
            self._filename = filename
            self._size = float(os.path.getsize(filename))
            self._seen_so_far = 0
            self._lock = threading.Lock()

        def __call__(self, bytes_amount):
            # To simplify we'll assume this is hooked up
            # to a single filename.
            with self._lock:
                self._seen_so_far += bytes_amount
                percentage = (self._seen_so_far / self._size) * 100
                sys.stdout.write("\r%s  %s / %s  (%.2f%%)"%(
                        self._filename, self._seen_so_far, self._size,
                        percentage))
                sys.stdout.flush()

In [None]:
# View smoke data files available on ftp
file = obj_req.urlopen(remote_path).read().splitlines()
file

In [6]:
# Download most recent smoke data - or past 4 days?
# If want to include a history, need to do reformatting if 
# 4 day interval extends across 2 months

def format_month(mon):
    if mon < 10:
        return("0" + str(mon))
    else:
        return(str(mon))

now = datetime.now()
year = str(now.year)
month = format_month(now.month)
today = str(now.day)
yesterday = str(now.day-1)

def create_most_recent_file(year, month, day):
    files = [
        "hms_smoke{}{}{}.prelim.shp".format(year, month, day),
        "hms_smoke{}{}{}.prelim.shx".format(year, month, day),
        "hms_smoke{}{}{}.prelim.dbf".format(year, month, day)
    ]
    return(files)

# recent_files = create_most_recent_file(year, month, today)
recent_files = create_most_recent_file(year, month, yesterday)

smoke_folder = "/Users/nathansuberi/Desktop/RW_Data/Smoke/"
yesterday_smoke_folder = "/Users/nathansuberi/Desktop/RW_Data/Smoke/yesterday_smoke/"

for file in recent_files:
    ftp_loc = remote_path+file
    local_file = yesterday_smoke_folder + file
    print(local_file)
    with closing(obj_req.urlopen(ftp_loc)) as r:
        with open(local_file, 'wb') as f:
            shutil.copyfileobj(r, f)

/Users/nathansuberi/Desktop/RW_Data/Smoke/yesterday_smoke/hms_smoke20171019.prelim.shp
/Users/nathansuberi/Desktop/RW_Data/Smoke/yesterday_smoke/hms_smoke20171019.prelim.shx
/Users/nathansuberi/Desktop/RW_Data/Smoke/yesterday_smoke/hms_smoke20171019.prelim.dbf


In [7]:
# Prepare data, upload to S3

# Zip file: https://stackoverflow.com/questions/1855095/how-to-create-a-zip-archive-of-a-directory
os.chdir(smoke_folder)
shutil.make_archive(zipped_file_name, 'zip', yesterday_smoke_folder)

# Upload to S3
s3_upload.upload_file(zipped_file_name + ".zip", s3_bucket, s3_folder + zipped_file_name + ".zip",
                         Callback=ProgressPercentage(zipped_file_name + ".zip"))

# Deleteoriginal files and zipped files from local

# Unlink is the same as os.remove
for folder in [yesterday_smoke_folder, smoke_folder]:
    for file in os.listdir(folder):
        file_path = os.path.join(folder, file)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
            #elif os.path.isdir(file_path): shutil.rmtree(file_path)
        except Exception as e:
            print(e)    

yesterday_smoke_shapefile.zip  24035 / 24035.0  (100.00%)

In [10]:
# Sync carto table with S3 url
# zipped_file_name = "latest_smoke_shapefile.zip"
data_url = "https://wri-public-data.s3.amazonaws.com/resourcewatch/" + zipped_file_name + ".zip"
# 3600 = sync every hour
# 3600 * 24 = sync every day
interval = str(3600*24)

payload = {
    "url":data_url,
    "interval":interval
}

sync_url = "https://wri-rw.carto.com/api/v1/synchronizations/?api_key={}".format(carto_api_token)
headers = {
    'content-type': "application/json"
}

res = req.request("POST", sync_url, data=json.dumps(payload), headers = headers)
print(res.text)

{"data_import":{"endpoint":"/api/v1/imports","item_queue_id":"d8d79dbe-dd5d-482b-9e17-b05007f5f3a5"},"id":"75148898-b5a7-11e7-9db9-0e831088b0bc","name":null,"interval":86400,"url":"https://wri-public-data.s3.amazonaws.com/resourcewatch/yesterday_smoke_shapefile.zip","state":"created","user_id":"c73d7d4a-5ff5-4d7c-bfb8-53dd1a6ce8f9","created_at":"2017-10-20T15:00:48+00:00","updated_at":"2017-10-20T15:00:48+00:00","run_at":"2017-10-21T15:00:48+00:00","ran_at":"2017-10-20T15:00:48+00:00","modified_at":null,"etag":null,"checksum":"","log_id":null,"error_code":null,"error_message":null,"retried_times":0,"service_name":null,"service_item_id":null,"type_guessing":true,"quoted_fields_guessing":true,"content_guessing":false,"visualization_id":null,"from_external_source":false}


In [19]:
# Force sync, will only go through if last sync was more than 15 minutes ago
import_id = "75148898-b5a7-11e7-9db9-0e831088b0bc"
headers = {
    "content-length":"0"
}
res = req.put("https://wri-rw.carto.com/api/v1/synchronizations/{}/sync_now?api_key={}".format(import_id, carto_api_token),
             headers=headers)
print(res.text)

{"enqueued":false,"synchronization_id":"75148898-b5a7-11e7-9db9-0e831088b0bc"}
