In [1]:
import requests
import json
from bs4 import BeautifulSoup
import re
import os
import subprocess
from modis_scraper import MODIS_Scraper

In [1]:
# Downloading and pre-processing NVDI data from USGS MODIS website 
# Converting HDF files into netCDF format (.nc)
# Crop and re-project data to the same grid as mask.nc

In [14]:
products = ['MOLT/MOD13C1.006/', 'MOLA/MYD13C1.006/']
f_path = '/Users/Chiao/google-drive/projects/Galvanize/fall-foliage-finder/data/nvdi/nc/'

for product in products:
    m = MODIS_Scraper(mask='/Users/Chiao/google-drive/projects/Galvanize/fall-foliage-finder/data/mask.nc')
    dates = m.get_dates(product, tiled=False, convert=True)
    m.get_data(f_path, format='hdf')


Downloading for 2000.02.18/
Downloading for 2000.03.05/
Downloading for 2000.03.21/
Downloading for 2000.04.06/
Downloading for 2000.04.22/
Downloading for 2000.05.08/
Downloading for 2000.05.24/
Downloading for 2000.06.09/
Downloading for 2000.06.25/
Downloading for 2000.07.11/
Downloading for 2000.07.27/
Downloading for 2000.08.12/
Downloading for 2000.08.28/
Downloading for 2000.09.13/
Downloading for 2000.09.29/
Downloading for 2000.10.15/
Downloading for 2000.10.31/
Downloading for 2000.11.16/
Downloading for 2000.12.02/
Downloading for 2000.12.18/
Downloading for 2001.01.01/
Downloading for 2001.01.17/
Downloading for 2001.02.02/
Downloading for 2001.02.18/
Downloading for 2001.03.06/
Downloading for 2001.03.22/
Downloading for 2001.04.07/
Downloading for 2001.04.23/
Downloading for 2001.05.09/
Downloading for 2001.05.25/
Downloading for 2001.06.10/
Downloading for 2001.06.26/
Downloading for 2001.07.12/
Downloading for 2001.07.28/
Downloading for 2001.08.13/
Downloading for 2001

In [6]:
f_path = '/Users/Chiao/google-drive/projects/Galvanize/fall-foliage-finder/data/nvdi/nc/'
files = os.listdir(f_path)

def cdo_merge(f1, f2):
    subprocess.call(['cdo', 'merge', f_path+f1, f_path+f2, f_path+f2+'.out.nc'])
    return f2+'.out.nc'

reduce(cdo_merge, files)

'2011.12.27.mask.nc.out.nc.out.nc'

In [None]:
# Getting gridded weather data from UW server
# Reference: http://www.hydro.washington.edu/SurfaceWaterGroup/Data/livneh/livneh.et.al.2013.page.html
# Citation: Livneh, B., E. A. Rosenberg, C. Lin, B. Nijssen, V. Mishra, K. M. Andreadis, E. P. Maurer, and 
#     D. P. Lettenmaier, 2013: A Long-Term Hydrologically Based Dataset of Land Surface Fluxes and States for the 
#    Conterminous United States: Update and Extensions. J . Climate, 26.

# Version: ftp://ftp.hydro.washington.edu/pub/blivneh/CONUS/Meteorology.nc.v.1.2.1915.2011.bz2/VERSION_ID
#Version 1.2
#March, 2014
#ben.livneh@colorado.edu

In [2]:
%%writefile get_livneh_weather.py
import numpy as np
import subprocess

base_url = 'ftp://ftp.hydro.washington.edu/pub/blivneh/CONUS/Meteorology.nc.v.1.2.1915.2011.bz2/'
base_fname = 'Meteorology_Livneh_CONUSExt_v.1.2_2013.{t}.nc.bz2'

f_path = '/Users/Chiao/google-drive/projects/Galvanize/fall-foliage-finder/data/weather/'
mask = '/Users/Chiao/google-drive/projects/Galvanize/fall-foliage-finder/data/mask.nc'

yrs = np.arange(2002, 2012)
months = np.arange(1, 13)
variables = ['Prec', 'Tmax', 'Tmin', 'Wind']

for yr in yrs:
    for month in months:
        time = '{yr}{month:02d}'.format(yr=yr, month=month)
        fname = f_path + time + '{mod}{ext}'
        print 'Downloading for', time
        # Obtaining compressed file from UW ftp server
        subprocess.call(['wget', '-O', fname.format(mod='', ext='.nc.bz2'), base_url + base_fname.format(t=time)])
        # Unzip the file
        subprocess.call(['bunzip2', fname.format(mod='', ext='.nc.bz2')])
        
        print 'Processing for', time
        # Convert float data into integer, change data type into short to reduce file size (by half!)
        subprocess.call(['cdo', 'int', fname.format(mod='', ext='.nc'), fname.format(mod='.int', ext='.nc')])
        subprocess.call(['ncap2', '-s', 'Prec=short(Prec);Tmax=short(Tmax);Tmin=short(Tmin);Wind=short(Wind)',\
                        fname.format(mod='.int', ext='.nc'), fname.format(mod='.short', ext='.nc')])
        
        # Subsetting out each variable since different remap functions are being used
        for var in variables:
            subprocess.call(['cdo', 'selname,{var}'.format(var=var), \
                            fname.format(mod='.short', ext='.nc'), fname.format(mod='.short.'+str(var), ext='.nc')])
        subprocess.call(['cdo', 'remapcon,{grid}'.format(grid=mask), \
                        fname.format(mod='.short.Prec', ext='.nc'), fname.format(mod='.Prec', ext='.nc')])
        subprocess.call(['cdo', 'remapbil,{grid}'.format(grid=mask), \
                        fname.format(mod='.short.Tmax', ext='.nc'), fname.format(mod='.Tmax', ext='.nc')])
        subprocess.call(['cdo', 'remapbil,{grid}'.format(grid=mask), \
                        fname.format(mod='.short.Tmin', ext='.nc'), fname.format(mod='.Tmin', ext='.nc')])
        subprocess.call(['cdo', 'remapbil,{grid}'.format(grid=mask), \
                        fname.format(mod='.short.Wind', ext='.nc'), fname.format(mod='.Wind', ext='.nc')])
        
        # Clean up the directories
        subprocess.call(['rm', fname.format(mod='', ext='.nc'), fname.format(mod='.int', ext='.nc'), \
                        fname.format(mod='.short', ext='.nc')])
        for var in variables:
            subprocess.call(['rm', fname.format(mod='.short.'+str(var), ext='.nc')])

Writing get_livneh_weather.py


In [None]:
%run get_liveneh_weather.py

In [3]:
# Getting historic climate normals from PRISM project
# http://www.prism.oregonstate.edu/documents/PRISM_datasets.pdf
# http://www.prism.oregonstate.edu/normals/

In [26]:
%%writefile process_prism.py
# Obtained historic climate normals from PRISM project
# http://www.prism.oregonstate.edu/documents/PRISM_datasets.pdf
# http://www.prism.oregonstate.edu/normals/

import subprocess
import numpy as np

variables = ['ppt', 'tmean']
folder = '/Users/Chiao/google-drive/projects/Galvanize/fall-foliage-finder/data/normals/'
base_fname = 'PRISM_{var}_30yr_normal_4kmM2_all_asc/PRISM_{var}_30yr_normal_4kmM2_{m:02d}_asc.asc'
grid = '/Users/Chiao/google-drive/projects/Galvanize/fall-foliage-finder/data/mask.nc'
base_nc_name = '{var}.{m:02d}.nc'
gdal = '/opt/local/bin/gdal_translate'

months = np.arange(1, 13)

for var in variables:
    files = []
    for month in months:
        subprocess.call([gdal, '-of', 'netCDF', 
                         folder+base_fname.format(var=var, m=month), 
                         folder+base_nc_name.format(var=var, m=month)])
        files.append(folder+base_nc_name.format(var=var, m=month))
    subprocess.call(['cdo', 'merge', ' '.join(files), folder+'{var}.monthly.nc'.format(var=var)])
    for f in files:
        subprocess.call(['rm', f])

subprocess.call(['cdo', 'remapcon,{grid}'.format(grid=grid), 
                    folder+'ppt.monthly.nc', folder+'ppt.monthly.mask.nc'])

subprocess.call(['cdo', 'remapbil,{grid}'.format(grid=grid), 
                    folder+'tmean.monthly.nc', folder+ 'tmean.monthly.mask.nc'])

Overwriting process_prism.py


In [27]:
%run process_prism.py