
**Module:** viirs_export_csv.ipynb

**Disclaimer**: The code is for demonstration purposes only. Users are responsible to check for accuracy and revise to fit their objective.

**Organization**: NASA ARSET

**Author**: Justin Roberts-Pierel and Pawan Gupta, 2015.

**Modified to work with netCDF** : Vikalp Mishra, 2019 

**Modified to work with VIIRS data**: Aavash Thapa, 2020

**Modified**: Pawan Gupta, September 28 2021 to work with VIIRS DT data

**Purpose**: To save data into a csv file from a VIIRS Deep Blue netcdf4 file


In [1]:
#Mount drive to save files there
#clone the repository to access files from there
#pull the latest
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
! git clone https://github.com/pawanpgupta/DTAerosols.git
! git -C DTAerosols/ pull

Mounted at /content/drive
Cloning into 'DTAerosols'...
remote: Enumerating objects: 18, done.[K
remote: Counting objects: 100% (18/18), done.[K
remote: Compressing objects: 100% (16/16), done.[K
remote: Total 18 (delta 4), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (18/18), done.
Already up to date.


In [2]:
! pip install netCDF4
from netCDF4 import Dataset
import numpy as np
import sys
import time
import calendar
import datetime as dt
import pandas as pd



In [37]:

#!/usr/bin/python      

#This finds the user's current path so that all hdf4 files can be found
try:
    fileList = open('DTAerosols/fileList.txt', 'r')

except:
    print('Did not find a text file containing file names (perhaps name does not match)')
    sys.exit()

#loops through all files listed in the text file
for FILE_NAME in fileList:
    FILE_NAME=FILE_NAME.strip()
    user_input=input('\nWould you like to process\n' + FILE_NAME + '\n\n(Y/N)')
    if (user_input == 'N' or user_input == 'n'):
        print('Skipping...')
        continue
    else:
        file = Dataset('DTAerosols/' + FILE_NAME, 'r')
# read the data
        if 'AERDT' in FILE_NAME:
            print('This is a VIIRS Dark Target file.')
            #this is how you access the data tree in an hdf5 file
            SDS_NAME='Optical_Depth_Land_And_Ocean'    
        ds=file  
        grp='/geolocation_data/'
        geods = ds[grp]
        lat= geods.variables['latitude'][:][:]
        lon= geods.variables['longitude'][:][:]
        grp='/geophysical_data/'
        geods = ds[grp]
        data = geods.variables[SDS_NAME]
        ds=geods
        vlist = [var for var in ds.variables] 

        #get necessary attributes 
        fv=data._FillValue
          
        fileparts=FILE_NAME.split('.')

        #There are some columns that are going to be the same
        #like the year, month and so on listed below.
        #Therefore, we can make the columns for them to store
        #the data for every row.
        year = np.zeros(lat.shape)
        mth = np.zeros(lat.shape)
        doy = np.zeros(lat.shape)
        hr = np.zeros(lat.shape)
        mn = np.zeros(lat.shape)
        
        for i in range(0,lat.shape[0]):
            y= fileparts[1][1:5]
            h = fileparts[2][0:2]
            m = fileparts[2][2:4]
            date = y + ',' + fileparts[1][5:8] + ',' + h + ',' + m
            t2 = dt.datetime.strptime(date,'%Y,%j,%H,%M')
           
            mt = t2.month
            d = t2.day
            
            year[i][:] = y
            mth[i][:] = mt
            doy[i][:] = d
            hr[i][:] = h
            mn[i][:] = m
       
        #create the dataframe and enter the values here
        df = pd.DataFrame()
        df['Year'] = year.ravel()
        df['Month'] = mth.ravel()
        df['Day'] = doy.ravel()
        df['Hour'] = hr.ravel()
        df['Minute'] = mn.ravel()
        
        #0-->Aerosol_Optical_Thickness_550_Land
        #3-->Aerosol_Optical_Thickness_550_Land_Ocean_Best_Estimate
        #8-->Aerosol_Optical_Thickness_QA_Flag_Land
        #11-->Aerosol_Type_Land_Ocean
        #18-->Angstrom_Exponent_Land_Ocean_Best_Estimate
        sds_lst = ['Image_Optical_Depth_Land_And_Ocean',
                   'Optical_Depth_Land_And_Ocean',
                   'Land_Ocean_Quality_Flag',
                   'Land_Sea_Flag', 'Angstrom_Exponent_1_Ocean']
        
        #This for loop saves all of the SDS in the dictionary at the top (dependent on file type) to the array (with titles)
        #All the sds that we need seem to be contained in this range.
        #Can extend this range to loop through more sds variables in the NC file.
        for i in range(0,30):
            SDS_NAME=vlist[(i)] # The name of the sds to read
            if SDS_NAME in sds_lst:
                print('SDS_NAME', SDS_NAME)
                #try:
                sds=geods.variables[SDS_NAME]
               
                scale = 1.0
                fv=sds._FillValue
                #get SDS data as a vector
                data=sds[:].ravel()
               #The next few lines change fill value/missing value to NaN so that we can multiply valid values by the scale factor, then back to fill values for saving
                data=data.astype(float)
                data=(data)*scale  
                data[np.isnan(data)]=fv
                data[data==float(fv)]=np.nan
                data=np.array(data[:])
                df[SDS_NAME] = data
    
    outfilename=FILE_NAME[:-3]+'.csv'    
    df.to_csv("drive/My Drive/Colab Notebooks/" + outfilename, index = False) 
    print('\nAll files have been saved successfully.')


Would you like to process
AERDT_L2_VIIRS_SNPP.A2021269.2042.011.2021270073049.nc

(Y/N)y
This is a VIIRS Dark Target file.
SDS_NAME Angstrom_Exponent_1_Ocean
SDS_NAME Image_Optical_Depth_Land_And_Ocean
SDS_NAME Land_Ocean_Quality_Flag
SDS_NAME Land_Sea_Flag
SDS_NAME Optical_Depth_Land_And_Ocean

All files have been saved successfully.


In [29]:
vlist

['Aerosol_Cldmask_Land_Ocean',
 'Aerosol_Cloud_Fraction_Land',
 'Aerosol_Cloud_Fraction_Ocean',
 'Aerosol_Type_Land',
 'Angstrom_Exponent_1_Ocean',
 'Angstrom_Exponent_2_Ocean',
 'Asymmetry_Factor_Average_Ocean',
 'Average_Cloud_Pixel_Distance_Land_Ocean',
 'Backscattering_Ratio_Average_Ocean',
 'Cloud_Pixel_Distance_Land_Ocean',
 'Corrected_Optical_Depth_Land',
 'Effective_Optical_Depth_Average_Ocean',
 'Effective_Radius_Ocean',
 'Error_Flag_Land_And_Ocean',
 'Fitting_Error_Land',
 'Image_Optical_Depth_Land_And_Ocean',
 'Land_Ocean_Quality_Flag',
 'Land_Sea_Flag',
 'Least_Squares_Error_Ocean',
 'Mass_Concentration_Land',
 'Mass_Concentration_Ocean',
 'Mean_Reflectance_Land',
 'Mean_Reflectance_Ocean',
 'Number_Pixels_Used_Land',
 'Number_Pixels_Used_Ocean',
 'Optical_Depth_By_Models_Ocean',
 'Optical_Depth_Land_And_Ocean',
 'Optical_Depth_Large_Average_Ocean',
 'Optical_Depth_Ratio_Small_Land',
 'Optical_Depth_Ratio_Small_Ocean_0p55micron',
 'Optical_Depth_Small_Average_Ocean',
 'PSML