In [3]:
%matplotlib notebook 
import scipy.stats as stats
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.cm as cm
from scipy import integrate
import glob,sys,os

from mpl_toolkits.basemap import Basemap, shiftgrid

import MV2 as MV
import cdms2 as cdms
import genutil
import cdutil
import cdtime

sys.path.append("/Users/kmarvel/Google Drive/python-utils")
import Plotting

import requests
import pandas as pd

### Set classic Netcdf (ver 3)
cdms.setNetcdfShuffleFlag(0)
cdms.setNetcdfDeflateFlag(0)
cdms.setNetcdfDeflateLevelFlag(0)

In [4]:
def check_availability(curr_mod,curr_var,experiment_id):
    ingrid_cmip6 = pd.read_csv('mary_cmip6_101419.csv')
    df1 = ingrid_cmip6[(ingrid_cmip6.source_id==curr_mod)\
                       & (ingrid_cmip6.variable_id==curr_var)\
                       & (ingrid_cmip6.experiment_id == experiment_id)]
    return(df1)
def get_members(curr_mod,curr_var,experiment_id):
    df1=check_availability(curr_mod,curr_var,experiment_id)
    return(np.unique(df1.member_id))

In [7]:
def pull_data(curr_mod,curr_var,experiment_id,member_id,overwrite=False):
    # Baseline directory
    base_dir   = 'http://mary.ldeo.columbia.edu:81/CMIP6/.'
    # Write directory
    base_write_dir = '/Users/kmarvel/Documents/DATA/dester/piControlRaw/'

    df_proclist = pd.DataFrame(columns=['model','sim','ensemble','variable'])
    ingrid_cmip6 = pd.read_csv('mary_cmip6_101419.csv')


    write_dir = base_write_dir+curr_var+"/"+curr_mod+"/"
    write_stem = curr_var+"."+experiment_id+"."+curr_mod+"."+member_id+".*.nc"
    
    #If the directory doesn't exist already, make it
    os.makedirs(os.path.join(base_write_dir, curr_var, curr_mod),exist_ok=True)
    
    if not overwrite:
        already_exist=glob.glob(write_dir+write_stem)
        if len(already_exist)!=0:
            return
    #rips=np.unique(np.array(df1.member_id))

    df1 = ingrid_cmip6[(ingrid_cmip6.source_id==curr_mod)\
                                   & (ingrid_cmip6.variable_id==curr_var) & \
                                   (ingrid_cmip6.experiment_id == experiment_id)&\
                                  (ingrid_cmip6.member_id == member_id)]

    #Construct openDAP link
    nfiles,nidentifiers=df1.shape
    times=np.sort(np.array(df1.time_range))
    i_ens=np.where(df1.time_range==times[0])[0]
    time_range=times[0]

    for time_range in times:
        # Construct Remote OpenDAP Link
        i_ens=np.where(df1.time_range==time_range)[0][0]
        nc_link = base_dir+df1.activity_id.iloc[i_ens]+'/.'+df1.institution_id.iloc[i_ens]+'/.'+curr_mod+'/.'+experiment_id+'/.'+df1.member_id.iloc[i_ens]+'/.'+df1.table_id.iloc[i_ens]+ \
                            '/.'+curr_var+'/.'+df1.grid_label.iloc[i_ens]+'/.'+df1.version.iloc[i_ens]+'/.'+df1.file_basename.iloc[i_ens]+'/.'+curr_var+'/dods'
        request = requests.get(nc_link)
        if request.status_code == 200:
            #Get the data
            f=cdms.open(nc_link)
            data=f(curr_var)
            tax=data.getTime()

            #reshape it to years and months
            ntime=data.shape[0]
            nyears=int(ntime/12)
            rdata=data.reshape((nyears,12)+data.shape[1:])
            for i in range(nyears):
                yeardata=rdata[i]
                #Make the time axis
                tax_trunc=cdms.createAxis(tax[12*i:12*(i+1)])
                tax_trunc.designateTime()
                for key in tax.attributes.keys():
                    setattr(tax_trunc,key,tax.attributes[key])
                yeardata.setAxis(0,tax_trunc)
                #get the start year for labeling purposes
                year=str(tax_trunc.asComponentTime()[0].year)
                writename = curr_var+"."+experiment_id+"."+curr_mod+"."+member_id+"."+year.zfill(4)+".nc"
                fw=cdms.open(write_dir+writename,"w")
                fw.write(yeardata)
                fw.close()
            f.close()


In [5]:
curr_mod="BCC-CSM2-MR"
curr_var="pr"
experiment_id="piControl"
member_id="r1i1p1f1"
pull_data(curr_mod,curr_var,experiment_id,member_id)

In [8]:
ingrid_cmip6 = pd.read_csv('mary_cmip6_101419.csv')

In [9]:
#variables=["pr","evspsbl","mrro","mrros","mrso","mrsos","pr","prsn","tas"]
variables=["mrsos","pr","prsn","tas"]
models=np.unique(ingrid_cmip6.source_id)
for variable in variables:
    for model in models:
        rips=get_members(model,variable,experiment_id)
        if len(rips)!=0:
            for rip in rips:
                if rip!="r1i1p1f1":
                    print("trying "+variable+" and "+ model +" "+rip)
                    try:
                        pull_data(model,variable,"piControl",rip)
                    except:
                        print(variable+" and "+ model +" are bad")

trying mrsos and CNRM-CM6-1 r1i1p1f2
trying mrsos and CNRM-ESM2-1 r1i1p1f2
trying mrsos and CanESM5 r1i1p2f1
trying mrsos and GISS-E2-1-G r102i1p1f1
trying mrsos and IPSL-CM6A-LR r1i2p1f1
trying mrsos and MIROC-ES2L r1i1p1f2
trying mrsos and UKESM1-0-LL r1i1p1f2


  dout = self.data[indx]
  mout = _mask[indx]


trying pr and CNRM-CM6-1 r1i1p1f2
trying pr and CNRM-ESM2-1 r1i1p1f2
trying pr and CanESM5 r1i1p2f1
trying pr and IPSL-CM6A-LR r1i2p1f1
trying pr and UKESM1-0-LL r1i1p1f2
trying prsn and CNRM-CM6-1 r1i1p1f2
trying prsn and CNRM-ESM2-1 r1i1p1f2
trying prsn and CanESM5 r1i1p2f1
trying prsn and IPSL-CM6A-LR r1i2p1f1
trying prsn and UKESM1-0-LL r1i1p1f2
trying tas and CNRM-CM6-1 r1i1p1f2
trying tas and CNRM-ESM2-1 r1i1p1f2
trying tas and CanESM5 r1i1p2f1
trying tas and IPSL-CM6A-LR r1i2p1f1
trying tas and UKESM1-0-LL r1i1p1f2
