In [1]:
import dask
import numpy as np
import xarray as xr
import glob
import matplotlib.pyplot as plt
import time
import datetime
from dask_jobqueue import SLURMCluster
from dask.distributed import Client
import warnings

In [2]:
warnings.filterwarnings('ignore')
now = datetime.datetime.now()
now_string = str(now.strftime("%Y-%m-%d_%A_%H%M%S"))
now_string

'2019-07-06_Saturday_170020'

In [3]:
cluster = SLURMCluster(cores=16, memory='20GB', project='pi_jianwu', walltime='20:00:00', queue='high_mem', job_extra=['--qos=medium+','--exclusive'])

In [4]:
cluster.scale(10)

In [5]:
!squeue -u savio1

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
           1075473  high_mem dask-wor   savio1 PD       0:00      1 (None)
           1075474  high_mem dask-wor   savio1 PD       0:00      1 (None)
           1075475  high_mem dask-wor   savio1 PD       0:00      1 (None)
           1075476  high_mem dask-wor   savio1 PD       0:00      1 (None)
           1075477  high_mem dask-wor   savio1 PD       0:00      1 (None)
           1075478  high_mem dask-wor   savio1 PD       0:00      1 (None)
           1075479  high_mem dask-wor   savio1 PD       0:00      1 (None)
           1075480  high_mem dask-wor   savio1 PD       0:00      1 (None)
           1075481  high_mem dask-wor   savio1 PD       0:00      1 (None)
           1075482  high_mem dask-wor   savio1 PD       0:00      1 (None)
           1075472  high_mem   tunnel   savio1  R      20:10     16 cnode[009-024]


In [6]:
client = Client()

In [22]:
cluster.dashboard_link

'http://10.2.1.9:43786/status'

In [17]:
cluster.running_jobs

OrderedDict([('1075475',
              {'dask-worker--1075475--': <Worker 'tcp://10.2.1.25:34940', memory: 0, processing: 0>}),
             ('1075473',
              {'dask-worker--1075473--': <Worker 'tcp://10.2.1.6:39489', memory: 0, processing: 0>}),
             ('1075474',
              {'dask-worker--1075474--': <Worker 'tcp://10.2.1.7:36198', memory: 0, processing: 0>}),
             ('1075480',
              {'dask-worker--1075480--': <Worker 'tcp://10.2.1.31:43873', memory: 0, processing: 0>}),
             ('1075479',
              {'dask-worker--1075479--': <Worker 'tcp://10.2.1.29:44901', memory: 0, processing: 0>}),
             ('1075476',
              {'dask-worker--1075476--': <Worker 'tcp://10.2.1.26:43100', memory: 0, processing: 0>}),
             ('1075478',
              {'dask-worker--1075478--': <Worker 'tcp://10.2.1.28:33539', memory: 0, processing: 0>}),
             ('1075477',
              {'dask-worker--1075477--': <Worker 'tcp://10.2.1.27:39142', memory:

In [18]:
cluster.pending_jobs

OrderedDict([('1075482', {})])

In [19]:
cluster.scheduler_address

'tcp://10.2.1.9:42294'

In [20]:
client = Client('10.2.1.9:42294')

In [21]:
print(client)

<Client: scheduler='tcp://10.2.1.9:42294' processes=9 cores=144>


In [10]:
t0 = time.time()
total_pix = np.zeros((180, 360))
cloud_pix = np.zeros((180, 360))

In [11]:
def ingest_data(M03_dir, M06_dir):
    M03_files = sorted(glob.glob(M03_dir + "MYD03.A2008*.hdf"))
    M06_files = sorted(glob.glob(M06_dir + "MYD06_L2.A2008*.hdf"))
    for M03, M06 in zip (M03_files, M06_files):
        d06 = xr.open_mfdataset(M06[:], parallel=True)['Cloud_Mask_1km'][:,:,:].values
        d06CM = d06[::3,::3,0]
        ds06_decoded = (np.array(d06CM, dtype = "byte") & 0b00000110) >> 1
        d03_lat = xr.open_mfdataset(M03[:], drop_variables = "Scan Type", parallel=True)['Latitude'][:,:].values
        d03_lon = xr.open_mfdataset(M03[:], drop_variables = "Scan Type", parallel=True)['Longitude'][:,:].values

        lat = d03_lat[::3,::3]
        lon = d03_lon[::3,::3]

        l_index = (lat + 89.5).astype(int).reshape(lat.shape[0]*lat.shape[1])
        lat_index = np.where(l_index > -1, l_index, 0)
        ll_index = (lon + 179.5).astype(int).reshape(lon.shape[0]*lon.shape[1])
        lon_index = np.where(ll_index > -1, ll_index, 0)
        for i, j in zip(lat_index, lon_index):
            total_pix[i,j] += 1

        indicies = np.nonzero(ds06_decoded <= 0)
        row_i = indicies[0]
        column_i = indicies[1]
        cloud_lon = [lon_index.reshape(ds06_decoded.shape[0],ds06_decoded.shape[1])[i,j] for i, j in zip(row_i, column_i)]
        cloud_lat = [lat_index.reshape(ds06_decoded.shape[0],ds06_decoded.shape[1])[i,j] for i, j in zip(row_i, column_i)]

        for x, y in zip(cloud_lat, cloud_lon):
            cloud_pix[int(x),int(y)] += 1
            
    return cloud_pix, total_pix

In [None]:
t0 = time.time()
import dask.multiprocessing
dask.config.set(num_workers=5)
M03_dir = "/home/savio1/cybertrn_common/Data/Satellite_Observations/MODIS/MYD03/"
M06_dir = "/home/savio1/cybertrn_common/Data/Satellite_Observations/MODIS/MYD06_L2/"
future1 = client.submit(ingest_data,M03_dir,M06_dir)
result1 = client.gather(future1)

In [None]:
future1.result()

In [None]:
cf1 = future1.result()[0]/future1.result()[1]

In [None]:
plt.figure(figsize=(14,7))
plt.contourf(range(-180,180), range(-90,90), cf1, 100, cmap = "jet")
plt.xlabel("Longitude", fontsize = 14)
plt.ylabel("Latitude", fontsize = 14)
plt.title("Level 3 Cloud Fraction Aggregation For One Month %s" %now_string, fontsize = 16)
plt.colorbar()
plt.savefig("/umbc/xfs1/jianwu/common/MODIS_Aggregation/savioexe/test/8/%s.png" %now_string)

In [None]:
cf2 = xr.DataArray(cf1)
cf2.to_netcdf("/umbc/xfs1/jianwu/common/MODIS_Aggregation/savioexe/test/8/%s.hdf" %now_string)

In [None]:
t1 = time.time()
total = t1-t0
print(total)

In [None]:
print(total/3600,"hours")

In [None]:
cluster.pending_jobs

In [None]:
cluster.close()

In [None]:
client.close()

In [None]:
!squeue -u savio1