In [2]:
%load_ext autoreload
%autoreload 2

import xarray as xr
import numpy as np
from dask.distributed import Client
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from scipy import stats
import xesmf as xe
from scipy.stats import linregress
import os
import requests
# Lets import our functions. There are no tests here. Stored separately so as to not make a mess.
# Make sure in right dir first

os.chdir('/g/data/xv83/np1383/src_CAFE60_eqpac_analysis_code/')
from a_carbon_math import carbon_flux
from a_model_cutting_functions import CAFE60_eqpac_cutter, cut_regrid_reynolds_sst,cut_process_sst_obs_trends,proc_landschutzer,process_co2_land_trends

Downloads TPCA chl Data from Research Data Portal. 

In [3]:
# Set up the remote dask cluster
from dask.distributed import Client,Scheduler
from dask_jobqueue import SLURMCluster
cluster = SLURMCluster(cores=2,memory="16GB")
client = Client(cluster)
#cluster.scale(cores=4)
cluster.scale(cores=4)#adapt(minimum=2, maximum=10)

##

Perhaps you already have a cluster running?
Hosting the HTTP server on port 33557 instead


In [5]:
cluster

Tab(children=(HTML(value='<div class="jp-RenderedHTMLCommon jp-RenderedHTML jp-mod-trusted jp-OutputArea-outpu…

In [2]:
def Download_TPCA():
    '''
    Function to download TPCA MODIS and SeaWIFS from nci.org.
    
    Possible to also use DAPPS through xarray and save the files
    rather than using requests.
    '''
    path_sw='/g/data/xv83/np1383/external_data/chl/TPCA/seawifs/'
    if not os.path.isdir(path_sw):
        print('Creating directory: ',path_sw)
        os.makedirs(path_sw)
       
    path_mod='/g/data/xv83/np1383/external_data/chl/TPCA/modis/'
    if not os.path.isdir(path_mod):
        print('Creating directory: ',path_mod)
        os.makedirs(path_mod)
    
    
    tpca_link=['http://dapds00.nci.org.au/thredds/fileServer/ks32/CLEX_Data/TPCA_reprocessing/v2019_01/']
    sensors=['SeaWiFS/tpca_seawifs_','MODIS-Aqua/tpca_modis_aqua_'] #and then year
    sens=['sw','mod']           
    #Download SeaWiFS files from the above array, spaced by each year.
    
    for i in range(0,2): #To do SeaWiFS and then MODIS
        for yr in np.arange(1997,2020):
            if i==0: #SW
                sensor=tpca_link[0]+sensors[0]+str(yr)+'.nc'
                path=path_sw
            elif i==1: #MODIS
                sensor=tpca_link[0]+sensors[1]+str(yr)+'.nc'
                path=path_mod
        
            #Start the download
            try:
                r = requests.get(sensor)#,timeout=s20)
                fileloc=path+sensors[0].split('/')[1]+str(yr)+'.nc'
                if r.status_code!=404:
                    with open(fileloc, 'wb') as f:
                        f.write(r.content)
                    print('Downloaded: ' + sens[i] + str(yr))
                else:
                    print(i,str(r.status_code))
            except KeyboardInterrupt:
                import sys
                sys.exit()
            except:
                print(str(yr)+ sens[i]+'  Unavailable')
            pass

In [3]:
Download_TPCA()

Downloaded: sw1997
Downloaded: sw1998
Downloaded: sw1999
Downloaded: sw2000
Downloaded: sw2001
Downloaded: sw2002
Downloaded: sw2003
Downloaded: sw2004
Downloaded: sw2005
Downloaded: sw2006
Downloaded: sw2007
Downloaded: sw2008
Downloaded: sw2009
Downloaded: sw2010
0 404
0 404
0 404
0 404
0 404
0 404
0 404
0 404
0 404
1 404
1 404
1 404
1 404
1 404
Downloaded: mod2002
Downloaded: mod2003
Downloaded: mod2004
Downloaded: mod2005
Downloaded: mod2006
Downloaded: mod2007
Downloaded: mod2008
Downloaded: mod2009
Downloaded: mod2010
Downloaded: mod2011
Downloaded: mod2012
Downloaded: mod2013
Downloaded: mod2014
Downloaded: mod2015
Downloaded: mod2016
Downloaded: mod2017
Downloaded: mod2018
Downloaded: mod2019


In [None]:
# Need to Get Phy Working in step 1.
def regrid 

In [51]:
#Load TPCA data
seawifs_tpca=xr.open_mfdataset('/g/data/xv83/np1383/external_data/chl/TPCA/seawifs/*nc').rename({'chl_tpca':'seawifs_tpca'})
modis_tpca=xr.open_mfdataset('/g/data/xv83/np1383/external_data/chl/TPCA/modis/*nc').rename({'chl_tpca':'modis_tpca'})
phy=xr.open_mfdataset('/g/data/xv83/np1383/processed_data/cafe/eqpac/phy_ensmean_1982_15m_.nc')

#Make TPCA mean data
tpca=xr.merge([seawifs_tpca,modis_tpca])
mean = tpca.to_array(dim='new').mean('new')
tpca_m=tpca.assign(tpca=mean)

#Calculate monthly average
tpca_m=tpca_m.resample({'time':'M'}).mean(dim='time')
tpca_m['time']=tpca_m.time.astype('datetime64[M]')

#Regrid TPCA to Model grid
regridder = xe.Regridder(tpca_m, phy, 'bilinear',reuse_weights=False)
#tpca_m_regrid=regridder(tpca_m) 
# FOR SOME REASON THIS BROKE AND NOW WE NEED TO USE THE BELOW SOLUTION

# Hack solution because xESMF v 5.1 or something doesnt accept more than 1 variable???
regrid=[]
for k in tpca_m.data_vars:
    regridded=regridder(tpca_m[k])
    regridded.name=k
    regrid.append(regridded)
tpca_m_regrid=xr.merge(regrid)

#Fill 0 to NAN
tpca_m_regrid = tpca_m_regrid.where(tpca_m_regrid > 0)
tpca_m_regrid = tpca_m_regrid.bfill('time')
tpca_m_regrid.to_netcdf('/g/data/xv83/np1383/processed_data/obs/TPCA_month_regrid.nc')

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
  tmp = blockwise(
  tmp = blockwise(
  tmp = blockwise(


KilledWorker: ("('block-info-_trim-217b11223458c93fc1def66cf2eb808e', 42, 0, 0)", <WorkerState 'tcp://10.0.128.4:46447', name: SLURMCluster-1-0, memory: 0, processing: 530>)