# Pre-process the raw modis data

- I am using the conda environment version 21.10 http://climate-cms.wikis.unsw.edu.au/Conda#21.10
- MODIS data has been downloaded from: https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/MCD06COSP_D3_MODIS/

### In this notebook: 
- Pull out fields of interest and regrid onto ACCESS grid, and offset time by 12 hrs to match ACCESS too.  
- For the histograms, sum the total and partly cloudy total fields 
- Rename dimensions to match the ACCESS names, and variables so that ACCESS & modis will match 

In [1]:
import sys
import os
import numpy as np
import pandas as pd
import xarray as xr
import dask.array as da
sys.path.append('/home/563/slf563/code/gadi/jk72/ACCESS-CM2_analysis/COSP_analysis/Clustering_paper')
from functions import create_time
from calendar import monthrange, isleap

In [2]:
access_grid = xr.open_mfdataset('/g/data/jk72/slf563/ACCESS/fx/sftlf_fx_ACCESS-CM2_amip_r1i1p1f1_gn.nc') # ACCESS grid 

In [3]:
fout = '/g/data/p66/slf563/OBS/MCD06COSP_D3_MODIS/'

for yr in range(2015,2020):
    time = create_time('{}-01-01'.format(yr),'{}-12-31'.format(yr),'D')
    fdir = '/g/data/p66/slf563/OBS/MCD06COSP_D3_MODIS/{}/'.format(yr)
    dims = xr.open_mfdataset(fdir+'001/*.nc')

    if isleap(yr): ndays = 366
    else: ndays = 365
    print(yr)
    for day in range(0,ndays): 
        
        h = xr.open_mfdataset(fdir+'{:03d}/*.nc'.format(day+1),
                        group='Cloud_Optical_Thickness_Total')
        h = h.assign_coords({'time':time[day]})
        h = h.expand_dims('time')

        h2 = xr.open_mfdataset(fdir+'{:03d}/*.nc'.format(day+1),
                        group='Cloud_Optical_Thickness_PCL_Total')
        h2 = h2.assign_coords({'time':time[day]})
        h2 = h2.expand_dims('time')  
        
        # sum cloudy and partly cloudy 
        h.JHisto_vs_Cloud_Top_Pressure.values = h2.JHisto_vs_Cloud_Top_Pressure.values + h.JHisto_vs_Cloud_Top_Pressure.values
        del h2
        h = h.JHisto_vs_Cloud_Top_Pressure.to_dataset()
        
        ctp = xr.open_mfdataset(fdir+'{:03d}/*.nc'.format(day+1),
                        group='Cloud_Top_Pressure')
        ctp = ctp.assign_coords({'time':time[day]})
        ctp = ctp.expand_dims('time') 
        ctp = ctp[['Mean','Standard_Deviation']]
        ctp = ctp.rename({'Mean':'CTP','Standard_Deviation':'CTP_StD'})
        
        lwp = xr.open_mfdataset(fdir+'{:03d}/*.nc'.format(day+1),
                        group='Cloud_Water_Path_Liquid')
        lwp = lwp.assign_coords({'time':time[day]})
        lwp = lwp.expand_dims('time')
        lwp = lwp[['Mean','Standard_Deviation']]
        lwp = lwp.rename({'Mean':'LWP','Standard_Deviation':'LWP_StD'})
        
        iwp = xr.open_mfdataset(fdir+'{:03d}/*.nc'.format(day+1),
                        group='Cloud_Water_Path_Ice')
        iwp = iwp.assign_coords({'time':time[day]})
        iwp = iwp.expand_dims('time') 
        iwp = iwp[['Mean','Standard_Deviation']]
        iwp = iwp.rename({'Mean':'IWP','Standard_Deviation':'IWP_StD'})
        
        taul = xr.open_mfdataset(fdir+'{:03d}/*.nc'.format(day+1),
                        group='Cloud_Optical_Thickness_Liquid')
        taul = taul.assign_coords({'time':time[day]})
        taul = taul.expand_dims('time') 
        taul = taul[['Mean','Standard_Deviation']]
        taul = taul.rename({'Mean':'TauL','Standard_Deviation':'TauL_StD'})        
        
        taui = xr.open_mfdataset(fdir+'{:03d}/*.nc'.format(day+1),
                        group='Cloud_Optical_Thickness_Ice')
        taui = taui.assign_coords({'time':time[day]})
        taui = taui.expand_dims('time') 
        taui = taui[['Mean','Standard_Deviation']]
        taui = taui.rename({'Mean':'TauI','Standard_Deviation':'TauI_StD'})  
        
        cfi = xr.open_mfdataset(fdir+'{:03d}/*.nc'.format(day+1),
                        group='Cloud_Retrieval_Fraction_Ice')
        cfi = cfi.assign_coords({'time':time[day]})
        cfi = cfi.expand_dims('time') 
        cfi = cfi[['Mean','Standard_Deviation']]
        cfi = cfi.rename({'Mean':'CFI','Standard_Deviation':'CFI_StD'})  
        
        cfl = xr.open_mfdataset(fdir+'{:03d}/*.nc'.format(day+1),
                        group='Cloud_Retrieval_Fraction_Liquid')
        cfl = cfl.assign_coords({'time':time[day]})
        cfl = cfl.expand_dims('time') 
        cfl = cfl[['Mean','Standard_Deviation']]
        cfl = cfl.rename({'Mean':'CFL','Standard_Deviation':'CFL_StD'})
        
        cf = xr.open_mfdataset(fdir+'{:03d}/*.nc'.format(day+1),
                        group='Cloud_Retrieval_Fraction_Total')
        cf = cf.assign_coords({'time':time[day]})
        cf = cf.expand_dims('time') 
        cf = cf[['Mean','Standard_Deviation']]
        cf = cf.rename({'Mean':'CF','Standard_Deviation':'CF_StD'})        
        
        
        c = xr.merge([ctp,lwp,iwp,taul,taui,cfl,cfi,cf]) # merge all the cloud fields into one file 
        
        if time[day].day == 1: 
            histo = h.copy()
            clouds = c.copy()
        else: 
            histo = histo.merge(h)
            clouds = clouds.merge(c)
            
        if time[day].day == monthrange(yr, time[day].month)[1]:
            
            histo = histo.assign_coords({'latitude':dims.latitude})
            histo = histo.assign_coords({'longitude':(dims.longitude)%360}).sortby('longitude')
            histo = histo.rename({'latitude':'lat',
                                'longitude':'lon'})
            histo = histo.interp(lat=access_grid.lat,lon=access_grid.lon)
            histo['time'] = histo.time + np.timedelta64(12,'h')
            histo = histo.rename({'jhisto_cloud_optical_thickness_total_7':'tau',
                                       'jhisto_cloud_top_pressure_7':'pressure',
                                       })
            histo = histo.rename({'JHisto_vs_Cloud_Top_Pressure':'histo'})
            histo.attrs = h.JHisto_vs_Cloud_Top_Pressure.attrs
            histo = histo.transpose('tau','pressure','time','lat','lon',)
            histo.to_netcdf(fout+'MCD06COSP_D3_MODIS.{}{:02d}_histos.nc'.format(yr,time[day].month))

            clouds = clouds.assign_coords({'latitude':dims.latitude})
            clouds = clouds.assign_coords({'longitude':(dims.longitude)%360}).sortby('longitude')
            clouds = clouds.rename({'latitude':'lat',
                                'longitude':'lon'})
            clouds = clouds.interp(lat=access_grid.lat,lon=access_grid.lon)
            clouds['time'] = clouds.time + np.timedelta64(12,'h')
            clouds.to_netcdf(fout+'MCD06COSP_D3_MODIS.{}{:02d}_cloud.nc'.format(yr,time[day].month))


2015
2016
2017
2018
2019
