In [1]:
! which python

/Users/j/miniconda/envs/nasa/bin/python


**Dependencies**

**Download**

In [8]:
#!/usr/bin/env python
 
# script supports either python2 or python3
#
# Attempts to do HTTP Gets with urllib2(py2) urllib.requets(py3) or subprocess
# if tlsv1.1+ isn't supported by the python ssl module
#
# Will download csv or json depending on which python module is available
#
 
from __future__ import (division, print_function, absolute_import, unicode_literals)
 
import argparse
import os
import os.path
import shutil
import sys
 
try:
    from StringIO import StringIO   # python2
except ImportError:
    from io import StringIO         # python3

USERAGENT = 'tis/download.py_1.0--' + sys.version.replace('\n','').replace('\r','')
 
def geturl(url, token=None, out=None):
    headers = { 'user-agent' : USERAGENT }
    if not token is None:
        headers['Authorization'] = 'Bearer ' + token
    try:
        import ssl
        CTX = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)
        if sys.version_info.major == 2:
            import urllib2
            try:
                fh = urllib2.urlopen(urllib2.Request(url, headers=headers), context=CTX)
                if out is None:
                    return fh.read()
                else:
                    shutil.copyfileobj(fh, out)
            except urllib2.HTTPError as e:
                print('HTTP GET error code: %d' % e.code(), file=sys.stderr)
                print('HTTP GET error message: %s' % e.message, file=sys.stderr)
            except urllib2.URLError as e:
                print('Failed to make request: %s' % e.reason, file=sys.stderr)
            return None
 
        else:
            from urllib.request import urlopen, Request, URLError, HTTPError
            try:
                fh = urlopen(Request(url, headers=headers), context=CTX)
                if out is None:
                    return fh.read().decode('utf-8')
                else:
                    shutil.copyfileobj(fh, out)
            except HTTPError as e:
                print('HTTP GET error code: %d' % e.code(), file=sys.stderr)
                print('HTTP GET error message: %s' % e.message, file=sys.stderr)
            except URLError as e:
                print('Failed to make request: %s' % e.reason, file=sys.stderr)
            return None
 
    except AttributeError:
        # OS X Python 2 and 3 don't support tlsv1.1+ therefore... curl
        import subprocess
        try:
            args = ['curl', '--fail', '-sS', '-L', '--get', url]
            for (k,v) in headers.items():
                args.extend(['-H', ': '.join([k, v])])
            if out is None:
                # python3's subprocess.check_output returns stdout as a byte string
                result = subprocess.check_output(args)
                return result.decode('utf-8') if isinstance(result, bytes) else result
            else:
                subprocess.call(args, stdout=out)
        except subprocess.CalledProcessError as e:
            print('curl GET error message: %' + (e.message if hasattr(e, 'message') else e.output), file=sys.stderr)
        return None

def sync(src, dest, tok):
    '''synchronize src url with dest directory'''
    try:
        import csv
        files = [ f for f in csv.DictReader(StringIO(geturl('%s.csv' % src, tok)), skipinitialspace=True) ]
    except ImportError:
        import json
        files = json.loads(geturl(src + '.json', tok))
 
    # use os.path since python 2/3 both support it while pathlib is 3.4+
    for f in files:
        # currently we use filesize of 0 to indicate directory
        filesize = int(f['size'])
        path = os.path.join(dest, f['name'])
        url = src + '/' + f['name']
        if filesize == 0:
            try:
                print('creating dir:', path)
                os.mkdir(path)
                sync(src + '/' + f['name'], path, tok)
            except IOError as e:
                print("mkdir `%s': %s" % (e.filename, e.strerror), file=sys.stderr)
                sys.exit(-1)
        else:
            try:
                if not os.path.exists(path):
                    print('downloading: ' , path)
                    with open(path, 'w+b') as fh:
                        geturl(url, tok, fh)
                else:
                    print('skipping: ', path)
            except IOError as e:
                print("open `%s': %s" % (e.filename, e.strerror), file=sys.stderr)
                sys.exit(-1)
    return 0
 

**Run this line to download files**

In [22]:
import os
years=
destination = "data/{}".format(year)


In [23]:
# Read urls
token = "62926436-9585-11EA-83B7-CBDDAED16685"
url = "https://ladsweb.modaps.eosdis.nasa.gov/archive/archive/allData/61/MOD04_3K/2015/001/"
destination = "data/2015/"


sync( url, destination, token)

downloading:  data/2015/MOD04_3K.A2015001.0040.061.2017318202804.hdf
downloading:  data/2015/MOD04_3K.A2015001.0045.061.2017318203003.hdf
downloading:  data/2015/MOD04_3K.A2015001.0050.061.2017318203116.hdf
downloading:  data/2015/MOD04_3K.A2015001.0055.061.2017318203034.hdf
downloading:  data/2015/MOD04_3K.A2015001.0100.061.2017318202902.hdf
downloading:  data/2015/MOD04_3K.A2015001.0105.061.2017318202918.hdf
downloading:  data/2015/MOD04_3K.A2015001.0110.061.2017318202919.hdf
downloading:  data/2015/MOD04_3K.A2015001.0115.061.2017318202934.hdf
downloading:  data/2015/MOD04_3K.A2015001.0120.061.2017318202837.hdf
downloading:  data/2015/MOD04_3K.A2015001.0125.061.2017318202847.hdf
downloading:  data/2015/MOD04_3K.A2015001.0130.061.2017318202720.hdf
downloading:  data/2015/MOD04_3K.A2015001.0220.061.2017318202906.hdf
downloading:  data/2015/MOD04_3K.A2015001.0225.061.2017318203151.hdf
downloading:  data/2015/MOD04_3K.A2015001.0230.061.2017318203024.hdf
downloading:  data/2015/MOD04_3K.A

downloading:  data/2015/MOD04_3K.A2015001.1855.061.2017318203703.hdf
downloading:  data/2015/MOD04_3K.A2015001.1900.061.2017318203559.hdf
downloading:  data/2015/MOD04_3K.A2015001.1905.061.2017318203654.hdf
downloading:  data/2015/MOD04_3K.A2015001.1910.061.2017318203817.hdf
downloading:  data/2015/MOD04_3K.A2015001.1915.061.2017318203543.hdf
downloading:  data/2015/MOD04_3K.A2015001.1920.061.2017318203505.hdf
downloading:  data/2015/MOD04_3K.A2015001.1925.061.2017318203529.hdf
downloading:  data/2015/MOD04_3K.A2015001.1930.061.2017318203450.hdf
downloading:  data/2015/MOD04_3K.A2015001.1935.061.2017318203421.hdf
downloading:  data/2015/MOD04_3K.A2015001.2025.061.2017318203430.hdf
downloading:  data/2015/MOD04_3K.A2015001.2030.061.2017318203609.hdf
downloading:  data/2015/MOD04_3K.A2015001.2035.061.2017318203608.hdf
downloading:  data/2015/MOD04_3K.A2015001.2040.061.2017318203639.hdf
downloading:  data/2015/MOD04_3K.A2015001.2045.061.2017318203804.hdf
downloading:  data/2015/MOD04_3K.A

0

***

**Transform**

In [None]:
import gdal

In [24]:
def load_data(FILEPATH):
    ds = gdal.Open(FILEPATH)
    return ds
    
# Opens the data HDF file and returns as a dataframe
def read_dataset(SUBDATASET_NAME, FILEPATH):
    dataset = load_data(FILEPATH)
    path = ''
    for sub, description in dataset.GetSubDatasets():
        if (description.endswith(SUBDATASET_NAME)):
            path = sub
            break
    if(path == ''):
        print(SUBDATASET_NAME + ' not found')
        return
    subdataset = gdal.Open(path)
    subdataset = subdataset.ReadAsArray()
    subdataset = pd.DataFrame(subdataset)
    return subdataset

In [5]:
#Lists all subdatasets of any one file
file = gdal.Open('MYD04_3K.A2017245.1945.061.2018010142915.hdf')
for path, desc in file.GetSubDatasets():
    print(desc)

[676x451] Scan_Start_Time mod04 (64-bit floating-point)
[676x451] Solar_Zenith mod04 (16-bit integer)
[676x451] Solar_Azimuth mod04 (16-bit integer)
[676x451] Sensor_Zenith mod04 (16-bit integer)
[676x451] Sensor_Azimuth mod04 (16-bit integer)
[676x451] Scattering_Angle mod04 (16-bit integer)
[676x451] Glint_Angle mod04 (16-bit integer)
[676x451] Land_Ocean_Quality_Flag mod04 (16-bit integer)
[676x451] Land_sea_Flag mod04 (16-bit integer)
[676x451] Wind_Speed_Ncep_Ocean mod04 (16-bit integer)
[676x451] Optical_Depth_Land_And_Ocean mod04 (16-bit integer)
[676x451] Image_Optical_Depth_Land_And_Ocean mod04 (16-bit integer)
[676x451] Aerosol_Type_Land mod04 (16-bit integer)
[676x451] Fitting_Error_Land mod04 (16-bit integer)
[3x676x451] Surface_Reflectance_Land mod04 (16-bit integer)
[3x676x451] Corrected_Optical_Depth_Land mod04 (16-bit integer)
[676x451] Corrected_Optical_Depth_Land_wav2p1 mod04 (16-bit integer)
[676x451] Optical_Depth_Ratio_Small_Land mod04 (16-bit integer)
[2x676x451] 