## Implementing cloud-top phase retrievals
New data from Olimpia (through Trude) is organized differently. I need to write it to a .nc format so I can make quick comparisons.

In [217]:
import sys
# Add common resources folder to path
sys.path.append("/mnt/mcc-ns9600k/jonahks/git_repos/netcdf_analysis/Common/")
sys.path.append("/mnt/mcc-ns9600k/jonahks/git_repos/netcdf_analysis/")
sys.path.append("/home/jonahks/git_repos/netcdf_analysis/")
sys.path.append("/home/jonahks/git_repos/netcdf_analysis/Common/")

from imports import (
    pd, np, xr, mpl, plt, sns, os, 
    datetime, sys, crt, gridspec,
    polyfit, ccrs, LinearRegression, metrics,
    datetime
    )

from functions import (
    masked_average, plot_slf_isotherms,
    season_mean, add_weights
    )

%matplotlib inline

In [218]:
host = os.uname()[1]
if 'jupyter' in host.split('-'): # Check if running on NIRD through the Jupyter Hub
    print('Running through MC2 Jupyter Hub')
    model_dir = '/mnt/mcc-ns9600k/jonahks/'
    os.chdir(model_dir)

else:  # Assume that we're running on a local machine and mounting NIRD
    print('Running on %s, attempting to mount ns9600k/jonahks/ from NIRD' % str(host))
    os.system('fusermount -zu ~/drivemount/')  # unmount first
    os.system('sshfs jonahks@login.nird.sigma2.no:"p/jonahks/" ~/drivemount/')    # Calling mountnird from .bashrc doesn't work
    os.chdir('/home/jonahks/drivemount/')
    save_dir = '~/DATAOUT/'
    save_to = os.path.expanduser(save_dir)

bulk_obs_dir = "caliop_olimpia/4_Jonah/bulk3/season/global/"
top_obs_dir = 'caliop_olimpia/4_Jonah/top03/season/global/'

output_dir = 'caliop_olimpia/seasonal_data/'
model_dir = 'mnth15runs/'   # inconsistent label compared to jupy_test

# Check that each important directory can be accessed:    
access_paths = os.path.exists(bulk_obs_dir) and os.path.exists(top_obs_dir) and os.path.exists(output_dir)
print('Can access all directory paths:', access_paths)

Running through MC2 Jupyter Hub
Can access all directory paths: True


## Load incloud/bulk cloud values.


In [216]:
os.listdir(bulk_obs_dir)

['read_me.txt', 'SON', 'JJA', 'MAM', 'DJF']

In [224]:
df = np.zeros((180, 360, 9, 4)) # lat, lon, iso, season

lat = np.linspace(-89.5, 89.5, 180)
lon = np.linspace(-180, 179, 360)
iso = np.linspace(0, -40, 9)
time = [datetime(2010, 1, 1),datetime(2010, 7, 1),datetime(2010, 4, 1),datetime(2010, 10, 1)]
season = ['DJF', 'JJA', 'MAM', 'SON'] # weird order
season_dict = {'DJF':datetime(2010, 1, 15), 'JJA':datetime(2010, 7, 15), 
               'MAM':datetime(2010, 4, 15), 'SON':datetime(2010, 10, 15)} # by middle month

cal_seasons = os.listdir(bulk_obs_dir)
cal_seasons.sort()
for i,season in enumerate(cal_seasons):
    if len(season) == 3: # quickly filter out the README
        print(season, time[i])
        obs_files = os.listdir("%s%s" % (bulk_obs_dir,season))
        file_str = '.npz'
        data_file = [x for x in obs_files if file_str in x][0] # weird list/indexing things :(
        print(data_file)
        _npz = np.load('%s%s/%s' % (bulk_obs_dir,season,data_file))
        _slf_in = _npz["slf_geo"]
        _slf_out = np.moveaxis(_slf_in, 0, -1)
        
        df[:,:,:,i] = _slf_out

da = xr.DataArray(data=df,
                  coords={"lat": lat,
                          "lon": lon,
                          "isotherm": iso,
                          "time": time},
                  dims=["lat","lon", "isotherm","time"])
da = da.sortby('isotherm')

da.attrs['long_name'] = "SLF retrieved by CALIOP from within cloud (TAU: 0.0-3.0) "
da.attrs['units'] = "Fraction [0-1]"

ds = xr.Dataset()
ds['SLF'] = da
ds = add_weights(ds)

DJF 2010-01-01 00:00:00
slf_count_geo_bulk_DJF.npz
JJA 2010-07-01 00:00:00
slf_count_geo_bulk_JJA.npz
MAM 2010-04-01 00:00:00
slf_count_geo_bulk_MAM.npz
SON 2010-10-01 00:00:00
slf_count_geo_bulk_SON.npz


In [225]:
ds.to_netcdf(path='%s/incloud_slfs_seasonal.nc' % output_dir)

## Load cloudtop slf values.

In [226]:
df = np.zeros((180, 360, 9, 4)) # lat, lon, iso, season

lat = np.linspace(-89.5, 89.5, 180)
lon = np.linspace(-180, 179, 360)
iso = np.linspace(0, -40, 9)
time = [datetime(2010, 1, 1),datetime(2010, 7, 1),datetime(2010, 4, 1),datetime(2010, 10, 1)]
season = ['DJF', 'JJA', 'MAM', 'SON'] # weird order
season_dict = {'DJF':datetime(2010, 1, 15), 'JJA':datetime(2010, 7, 15), 
               'MAM':datetime(2010, 4, 15), 'SON':datetime(2010, 10, 15)} # by middle month

cal_seasons = os.listdir(top_obs_dir)
cal_seasons.sort()
for i,season in enumerate(cal_seasons):
    if len(season) == 3: # quickly filter out the README
        print(season, time[i])
        obs_files = os.listdir("%s%s" % (top_obs_dir,season))
        file_str = '.npz'
        data_file = [x for x in obs_files if file_str in x][0] # weird list/indexing things :(
        print(data_file)
        _npz = np.load('%s%s/%s' % (top_obs_dir,season,data_file))
        _slf_in = _npz["slf_geo"]
        _slf_out = np.moveaxis(_slf_in, 0, -1)
        
        df[:,:,:,i] = _slf_out

da = xr.DataArray(data=df,
                  coords={"lat": lat,
                          "lon": lon,
                          "isotherm": iso,
                          "time": time},
                  dims=["lat","lon", "isotherm","time"])
da = da.sortby('isotherm')

da.attrs['long_name'] = "SLF retrieved by CALIOP from the top lower discarding TAU < 0.3) "
da.attrs['units'] = "Fraction [0-1]"

ds = xr.Dataset()
ds['SLF'] = da
ds = add_weights(ds)

DJF 2010-01-01 00:00:00
slf_count_geo_DJF.npz
JJA 2010-07-01 00:00:00
slf_count_geo_JJA.npz
MAM 2010-04-01 00:00:00
slf_count_geo_MAM.npz
SON 2010-10-01 00:00:00
slf_count_geo_SON.npz


In [227]:
ds.to_netcdf(path='%s/cloudtop_slfs_seasonal.nc' % output_dir)

Interesting that the SLF at the -10C isotherm is latitude dependent