**Dependencies**

In [1]:
# standard libraries
import glob, os
import datetime

# third party
import gdal
import pandas as pd
import numpy as np

# local
from data_proccesing_utils import (
    load_data,
    read_dataset,
    find_latitude_position,
    find_longitude_position,
    create_list,
    find_product_value,
    get_julian_date,
    get_year,
    get_month,
    get_day,
    jdtodatestd   
)

**Change to directory**

In [2]:
# change to data directory
os.chdir("../data/raw")

**Process HDF files**

In [3]:
values_land_and_ocean = []
file_names = []
latitude = []
longitude = []
errors = []

# dataset to extract
SUBDATASET_NAME = "Optical_Depth_Land_And_Ocean (16-bit integer)"

for file in glob.glob('*.hdf'):
    try:
        FILEPATH = file
        land_and_ocean, lat, lon = create_list(SUBDATASET_NAME, 20.5874, -100.3949, FILEPATH)
        print("Data collected for: " + str(lat) + " N (20.5874 N) and " + str(lon) + " E.(-100.3949)")
        file_names.append(file)
        values_land_and_ocean.append(land_and_ocean)
        latitude.append(lat)
        longitude.append(lon)
    except:
        errors.append(file)

Data collected for: 20.586946 N (20.5874 N) and -100.38291 E.(-100.3949)
Data collected for: 20.586254 N (20.5874 N) and -100.40908 E.(-100.3949)
Data collected for: 20.587759 N (20.5874 N) and -100.39133 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.586784 N (20.5874 N) and -100.368225 E.(-100.3949)
Data collected for: 20.587906 N (20.5874 N) and -100.398636 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.587168 N (20.5874 N) and -100.40928 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.586937 N (20.5874 N) and -100.38783 E.(-100.3949)
Data collected for: 20.588358 N (20.5874 N) and -100.375534 E.(-100.3949)
Data collected for: 20.586843 N (20.5874 N) and -100.384094 E.(-100.3949)
Data collected for: 20.587482 N (20.5874 N) and -100.366104 E.(-100.3949)
Latitude (32-bit floating-point) not found
Latitude (32-bit floating-point) not found
Data collected for: 20.586973 N (20.5874 N) and -1

Data collected for: 20.587482 N (20.5874 N) and -100.361084 E.(-100.3949)
Data collected for: 20.586687 N (20.5874 N) and -100.40814 E.(-100.3949)
Latitude (32-bit floating-point) not found
Latitude (32-bit floating-point) not found
Data collected for: 20.588776 N (20.5874 N) and -100.39331 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.588005 N (20.5874 N) and -100.37845 E.(-100.3949)
Data collected for: 20.588755 N (20.5874 N) and -100.38742 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.586874 N (20.5874 N) and -100.38204 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.590185 N (20.5874 N) and -100.39591 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.587502 N (20.5874 N) and -100.38077 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.587193 N (20.5874 N) and -100.351166 E.(-100.3949)
Data collected for: 20.587755 N (20.5874

Data collected for: 20.588371 N (20.5874 N) and -100.39047 E.(-100.3949)
Data collected for: 20.587244 N (20.5874 N) and -100.38614 E.(-100.3949)
Data collected for: 20.587978 N (20.5874 N) and -100.37954 E.(-100.3949)
Data collected for: 20.587599 N (20.5874 N) and -100.38172 E.(-100.3949)
Data collected for: 20.587563 N (20.5874 N) and -100.42077 E.(-100.3949)
Data collected for: 20.587969 N (20.5874 N) and -100.389404 E.(-100.3949)
Data collected for: 20.587336 N (20.5874 N) and -100.38546 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.587994 N (20.5874 N) and -100.3721 E.(-100.3949)
Data collected for: 20.58784 N (20.5874 N) and -100.40896 E.(-100.3949)
Data collected for: 20.586916 N (20.5874 N) and -100.412544 E.(-100.3949)
Data collected for: 20.58713 N (20.5874 N) and -100.374 E.(-100.3949)
Data collected for: 20.587044 N (20.5874 N) and -100.42823 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.588718 N (20.5874 

Data collected for: 20.58761 N (20.5874 N) and -100.385925 E.(-100.3949)
Data collected for: 20.586739 N (20.5874 N) and -100.389206 E.(-100.3949)
Data collected for: 20.587389 N (20.5874 N) and -100.40142 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.588337 N (20.5874 N) and -100.388565 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.587088 N (20.5874 N) and -100.39907 E.(-100.3949)
Data collected for: 20.587198 N (20.5874 N) and -100.40236 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.587053 N (20.5874 N) and -100.40354 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.587696 N (20.5874 N) and -100.426605 E.(-100.3949)
Latitude (32-bit floating-point) not found
Latitude (32-bit floating-point) not found
Data collected for: 20.586527 N (20.5874 N) and -100.38202 E.(-100.3949)
Data collected for: 20.58858 N (20.5874 N) and -100.38424 E.(-100.3949)
Data colle

Data collected for: 20.587326 N (20.5874 N) and -100.41584 E.(-100.3949)
Data collected for: 20.58687 N (20.5874 N) and -100.390045 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.587538 N (20.5874 N) and -100.42635 E.(-100.3949)
Data collected for: 20.58802 N (20.5874 N) and -100.40105 E.(-100.3949)
Data collected for: 20.587555 N (20.5874 N) and -100.40111 E.(-100.3949)
Data collected for: 20.585892 N (20.5874 N) and -100.34046 E.(-100.3949)
Data collected for: 20.587782 N (20.5874 N) and -100.40897 E.(-100.3949)
Data collected for: 20.587135 N (20.5874 N) and -100.39755 E.(-100.3949)
Data collected for: 20.590643 N (20.5874 N) and -100.41819 E.(-100.3949)
Data collected for: 20.586933 N (20.5874 N) and -100.390755 E.(-100.3949)
Data collected for: 20.586168 N (20.5874 N) and -100.400795 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.586966 N (20.5874 N) and -100.367966 E.(-100.3949)
Data collected for: 20.586617 N (20.

Data collected for: 20.587852 N (20.5874 N) and -100.374275 E.(-100.3949)
Data collected for: 20.587505 N (20.5874 N) and -100.40611 E.(-100.3949)
Data collected for: 20.587822 N (20.5874 N) and -100.38873 E.(-100.3949)
Data collected for: 20.590174 N (20.5874 N) and -100.41012 E.(-100.3949)
Data collected for: 20.58696 N (20.5874 N) and -100.4073 E.(-100.3949)
Data collected for: 20.587292 N (20.5874 N) and -100.41177 E.(-100.3949)
Data collected for: 20.587343 N (20.5874 N) and -100.38674 E.(-100.3949)
Data collected for: 20.587652 N (20.5874 N) and -100.427025 E.(-100.3949)
Data collected for: 20.58774 N (20.5874 N) and -100.393486 E.(-100.3949)
Data collected for: 20.587976 N (20.5874 N) and -100.385155 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.585636 N (20.5874 N) and -100.44375 E.(-100.3949)
Data collected for: 20.58665 N (20.5874 N) and -100.393036 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.587744 N (20.5

Data collected for: 20.587984 N (20.5874 N) and -100.408516 E.(-100.3949)
Data collected for: 20.587292 N (20.5874 N) and -100.39212 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.589087 N (20.5874 N) and -100.36769 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.587236 N (20.5874 N) and -100.411545 E.(-100.3949)
Latitude (32-bit floating-point) not found
Latitude (32-bit floating-point) not found
Latitude (32-bit floating-point) not found
Latitude (32-bit floating-point) not found
Latitude (32-bit floating-point) not found
Latitude (32-bit floating-point) not found
Data collected for: 20.58697 N (20.5874 N) and -100.38036 E.(-100.3949)
Latitude (32-bit floating-point) not found
Latitude (32-bit floating-point) not found
Data collected for: 20.585955 N (20.5874 N) and -100.4047 E.(-100.3949)
Data collected for: 20.589846 N (20.5874 N) and -100.38051 E.(-100.3949)
Data collected for: 20.586065 N (20.5874 N) and -100.42097 

Data collected for: 20.58774 N (20.5874 N) and -100.387924 E.(-100.3949)
Data collected for: 20.587605 N (20.5874 N) and -100.4072 E.(-100.3949)
Data collected for: 20.587738 N (20.5874 N) and -100.40449 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.586973 N (20.5874 N) and -100.40811 E.(-100.3949)
Data collected for: 20.587212 N (20.5874 N) and -100.38617 E.(-100.3949)
Data collected for: 20.589178 N (20.5874 N) and -100.39349 E.(-100.3949)
Data collected for: 20.587893 N (20.5874 N) and -100.38321 E.(-100.3949)
Data collected for: 20.587416 N (20.5874 N) and -100.384766 E.(-100.3949)
Data collected for: 20.586477 N (20.5874 N) and -100.38671 E.(-100.3949)
Data collected for: 20.587303 N (20.5874 N) and -100.38865 E.(-100.3949)
Data collected for: 20.58705 N (20.5874 N) and -100.408005 E.(-100.3949)
Data collected for: 20.587425 N (20.5874 N) and -100.38805 E.(-100.3949)
Data collected for: 20.58739 N (20.5874 N) and -100.37808 E.(-100.3949)
Data coll

Data collected for: 20.589071 N (20.5874 N) and -100.429375 E.(-100.3949)
Data collected for: 20.586912 N (20.5874 N) and -100.36765 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.587393 N (20.5874 N) and -100.37639 E.(-100.3949)
Data collected for: 20.58816 N (20.5874 N) and -100.39982 E.(-100.3949)
Latitude (32-bit floating-point) not found
Latitude (32-bit floating-point) not found
Latitude (32-bit floating-point) not found
Data collected for: 20.58725 N (20.5874 N) and -100.39414 E.(-100.3949)
Data collected for: 20.585415 N (20.5874 N) and -100.40261 E.(-100.3949)
Data collected for: 20.588552 N (20.5874 N) and -100.41523 E.(-100.3949)
Data collected for: 20.587528 N (20.5874 N) and -100.4018 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.58741 N (20.5874 N) and -100.38059 E.(-100.3949)
Data collected for: 20.587358 N (20.5874 N) and -100.40736 E.(-100.3949)
Data collected for: 20.58598 N (20.5874 N) and -100.40764 

Data collected for: 20.586958 N (20.5874 N) and -100.409134 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.584932 N (20.5874 N) and -100.41418 E.(-100.3949)
Data collected for: 20.587393 N (20.5874 N) and -100.40846 E.(-100.3949)
Data collected for: 20.587555 N (20.5874 N) and -100.40142 E.(-100.3949)
Data collected for: 20.587667 N (20.5874 N) and -100.3983 E.(-100.3949)
Data collected for: 20.587719 N (20.5874 N) and -100.36489 E.(-100.3949)
Latitude (32-bit floating-point) not found
Latitude (32-bit floating-point) not found
Latitude (32-bit floating-point) not found
Data collected for: 20.58782 N (20.5874 N) and -100.37958 E.(-100.3949)
Data collected for: 20.587399 N (20.5874 N) and -100.39427 E.(-100.3949)
Data collected for: 20.587284 N (20.5874 N) and -100.3964 E.(-100.3949)
Data collected for: 20.588297 N (20.5874 N) and -100.37902 E.(-100.3949)
Data collected for: 20.588587 N (20.5874 N) and -100.37988 E.(-100.3949)
Data collected for: 20.5873

Data collected for: 20.586506 N (20.5874 N) and -100.41165 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.587337 N (20.5874 N) and -100.38498 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.587975 N (20.5874 N) and -100.39654 E.(-100.3949)
Data collected for: 20.58964 N (20.5874 N) and -100.357956 E.(-100.3949)
Data collected for: 20.58708 N (20.5874 N) and -100.38054 E.(-100.3949)
Data collected for: 20.588879 N (20.5874 N) and -100.42665 E.(-100.3949)
Latitude (32-bit floating-point) not found
Data collected for: 20.58744 N (20.5874 N) and -100.395355 E.(-100.3949)
Data collected for: 20.587654 N (20.5874 N) and -100.390205 E.(-100.3949)
Data collected for: 20.586657 N (20.5874 N) and -100.36299 E.(-100.3949)
Data collected for: 20.5871 N (20.5874 N) and -100.38531 E.(-100.3949)
Data collected for: 20.586557 N (20.5874 N) and -100.39607 E.(-100.3949)
Data collected for: 20.588749 N (20.5874 N) and -100.42694 E.(-100.394

**Build data frame**

In [5]:
# build dataframe
final_list = pd.DataFrame()
final_list['File Name'] = pd.Series(file_names)
final_list['land_and_ocean'] = pd.Series(values_land_and_ocean)
# add latitude
final_list['Latitude'] = pd.Series(latitude)
final_list['Longitude'] = pd.Series(longitude)
# extract year, month and day.
final_list["julian_date"] = final_list["File Name"].apply(get_julian_date)
final_list["year"] = final_list["julian_date"].apply(get_year)  
final_list["month"] = final_list["julian_date"].apply(get_month)  
final_list["day"] = final_list["julian_date"].apply(get_day)

# drop rows with value 0.
final_list = final_list.query("land_and_ocean != 0")
final_list.to_csv("../processed/queretaro.csv")

In [None]:
print "Number of files with errors: ", len(errors)

In [None]:
final_list

***

In case you need to check out what datasets are contained in each file run the code block below.

**Verify that a datasets inside .hdf file.**

In [None]:
# read hdf files from current directory
all_files = glob.glob('*.hdf')
file = gdal.Open(all_files[0])

#Lists all subdatasets of any one file
for path, desc in file.GetSubDatasets():
    print(desc)