In [32]:
# import cdsapi
# cds = cdsapi.Client(timeout=600, quiet=False, debug=True)
import cftime
import numpy as np
import os
# import requests
import zipfile
import tarfile
import subprocess
from datetime import date
import xarray as xr
import pandas as pd
import rioxarray
import geopandas as gpd
import matplotlib.pyplot as plt
from scipy import stats
from scipy.stats.mstats import theilslopes as sens_slope
from datetime import datetime

from itertools import product
from dask.distributed import wait, progress, Client, LocalCluster
from pathlib import Path
from urllib.request import urlopen
from glob import glob
import urllib

import warnings
warnings.filterwarnings('ignore', category=RuntimeWarning)

import logging
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)

# Define directories
ddir = Path('/data/climate_migration/data/era5/')
disdir = Path('/data/climate_migration/data/disasters/')
griddir = Path('/data/climate_migration/shp/global_sociodem_grids/')
tmpdir = ddir / 'tmp'
zsdir = ddir / 'adm1_zonestats'
shpdir = Path('/data/climate_migration/shp/')
gfdrawdir = shpdir / 'global_flood_database' / 'gfd_v1_4'
gfd_fdd_dir = shpdir / 'global_flood_database' / 'flood_duration_025deg'
gfd_fdd_popwtd_dir = shpdir / 'global_flood_database' / 'flood_duration_popwtd_025deg'
gfd_fdd_agwtd_dir = shpdir / 'global_flood_database' / 'flood_duration_agwtd_025deg'
shptmpdir = shpdir / 'tmp'
hrdir = ddir / 'hourly'
daydir = ddir / 'daily'
wkdir = ddir / 'weekly'
mondir = ddir / 'monthly'
yrdir = ddir / 'yearly'

# Define file paths
ADM1 = shpdir / 'gadm36_1.shp'
LANDSCAN = griddir / 'landscan-global-2020.tif'
WORLDCOVER = griddir / 'worldcover_2020_cropland_resampled_LSres_cultivated_km2.tif'
LANDSCAN_ERA5res = griddir / 'landscan-global-2020_ERA5grid.tif'
WORLDCOVER_ERA5res = griddir / 'worldcover_2020_cropland_resampled_025deg_cultivated_km2.tif'
ERA5GRID = shpdir / 'ERA5_025deg_globalgrid.shp'
EMDAT_GC_ADM = shpdir / 'emdat_nat_complex_disasters_adm1_int_2000_2022.gpkg'
SPI6 = mondir / 'spei' / 'ERA5_monthly_1980_2018_calib_1980_2018_spei_pearson_06.nc'
SPI12 = mondir / 'spei' / 'ERA5_monthly_1980_2018_calib_1980_2018_spei_pearson_12.nc'
SPI24 = mondir / 'spei' / 'ERA5_monthly_1980_2018_calib_1980_2018_spei_pearson_24.nc'

# NOTE: including current year can create some weirdness at the tail end of the time-series
SPI_SCALES = [6,12,24]
allyears = [
    '2016','2017','2018',
    '2019','2020','2021'
]

allmonths = [
            '01', '02', '03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
        ]

alldays = [
            '01', '02', '03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
            '13', '14', '15',
            '16', '17', '18',
            '19', '20', '21',
            '22', '23', '24',
            '25', '26', '27',
            '28', '29', '30',
            '31',
        ]

alltimes = [
            '00:00', '01:00', '02:00',
            '03:00', '04:00', '05:00',
            '06:00', '07:00', '08:00',
            '09:00', '10:00', '11:00',
            '12:00', '13:00', '14:00',
            '15:00', '16:00', '17:00',
            '18:00', '19:00', '20:00',
            '21:00', '22:00', '23:00',
        ]

all_yrmon = list(product(allyears,allmonths))[:-6]
n_yrmon = len(all_yrmon)
all_ymd = list(product(allyears,allmonths,alldays))
all_ym = list(product(allyears,allmonths))

AVGCLIMATE_Y0 = 1980
AVGCLIMATE_Y1 = 2018
AVGCLIMATE_YRS = [str(y) for y in np.arange(AVGCLIMATE_Y0,AVGCLIMATE_Y1+1)]
AVGCLIMATE_YMD = list(product(AVGCLIMATE_YRS, allmonths, alldays))

GFDYRS = np.arange(2000,2019)

In [2]:
# View dashboard by forwarding remote port 7777 to local port (e.g. 9000), open localhost:9000 in browser
cluster = LocalCluster(
    dashboard_address = "localhost:45285",
    n_workers = 60, 
    local_directory = "/data/tmp/snair/tmp/dask-worker-space"
)
client = Client(cluster)

In [None]:
### Load list of all files in AWS bucket
## (https://nex-gddp-cmip6.s3.us-west-2.amazonaws.com/index.html)

In [49]:
filelist = pd.read_csv("gddp-cmip6-files.csv")


#Some cleaning of file urls
filelist = filelist.rename(columns = {' fileURL':"fileURL"})
filelist['tas'] = filelist['fileURL'].apply(lambda x: 'tas_day' in x)
filelist = filelist[filelist['tas'] == 1]

historical = filelist[filelist['fileURL'].apply(lambda x: 'historical' in x)].copy()

# filelist = filelist[filelist['fileURL'].apply(lambda x: 'historical' not in x)]
filelist['experiment'] = filelist['fileURL'].apply(lambda x: x.split("/")[4])
filelist['model'] = filelist['fileURL'].apply(lambda x: x.split("/")[5])
filelist['filestub'] = filelist.fileURL.apply(lambda x:x.split("/")[-1]).apply(lambda x: x.split('_')[-2])
filelist['type'] = filelist.fileURL.apply(lambda x: x.split("/")[6])
filelist['filename'] = filelist.fileURL.apply(lambda x: x.split("/")[-1])
master = filelist[['experiment', 'model', 'filestub', 'type']].drop_duplicates().sort_values(['experiment', 'model'])
                                                    

In [140]:
### Create directories

for experiment in filelist['experiment'].unique():
    if not os.path.isdir(f"/data/climate_migration/data/cmip6/nex_gddp_cmip6/{experiment}"):
        print(experiment)
        os.mkdir(f"/data/climate_migration/data/cmip6/nex_gddp_cmip6/{experiment}")

In [157]:
### Check file numbers
for experiment in filelist.experiment.unique():
    flist = glob.glob(f"/data/climate_migration/data/cmip6/nex_gddp_cmip6/{experiment}/*.nc")
    print(experiment, len(flist), filelist[filelist.experiment == experiment].groupby('experiment').size()[0])

ACCESS-CM2 409 409
ACCESS-ESM1-5 409 409
BCC-CSM2-MR 409 409
CanESM5 409 409
CESM2 409 409
CESM2-WACCM 237 237
CMCC-CM2-SR5 409 409
CMCC-ESM2 408 409
CNRM-CM6-1 409 409
CNRM-ESM2-1 409 409
EC-Earth3 409 409
EC-Earth3-Veg-LR 409 409
FGOALS-g3 409 409
GFDL-CM4_gr2 237 237
GFDL-CM4 229 237
GFDL-ESM4 400 409
GISS-E2-1-G 403 409
HadGEM3-GC31-LL 314 323
HadGEM3-GC31-MM 231 237
IITM-ESM 398 404
INM-CM4-8 403 409
INM-CM5-0 404 409
IPSL-CM6A-LR 403 409
KACE-1-0-G 401 409
KIOST-ESM 285 323
MIROC6 366 409
MIROC-ES2L 371 409
MPI-ESM1-2-HR 378 409
MPI-ESM1-2-LR 374 409
MRI-ESM2-0 368 409
NESM3 276 323
NorESM2-LM 366 409
NorESM2-MM 370 409
TaiESM1 364 409
UKESM1-0-LL 364 409


In [85]:
def get_files2(url, experiment):
    ##Helper function to download URLS
    
    filename = url.split("/")[-1]
    outpath = f"/data/climate_migration/data/cmip6/nex_gddp_cmip6/{experiment}"
    if not os.path.isfile(f"{outpath}/{filename}"):
        urllib.request.urlretrieve(url, f"{outpath}/{filename}")

                            

In [145]:
### Downloads

future_dict= {}

for experiment in filelist.experiment.unique():
    print(experiment)
    flist = glob.glob(f"/data/climate_migration/data/cmip6/nex_gddp_cmip6/{experiment}/*.nc")
    urllist = filelist[filelist.experiment == experiment].fileURL.values
    
    if len(flist) != len(urllist):
        print('Missing files!') ## Wrote it this way so that we can use the same code for missing files later
        
        future_dict[experiment] = client.map(get_files2, urllist, experiment = experiment)

ACCESS-CM2
Missing files!
ACCESS-ESM1-5
Missing files!
BCC-CSM2-MR
Missing files!
CanESM5
Missing files!
CESM2
Missing files!
CESM2-WACCM
Missing files!
CMCC-CM2-SR5
Missing files!
CMCC-ESM2
Missing files!
CNRM-CM6-1
Missing files!
CNRM-ESM2-1
Missing files!
EC-Earth3
Missing files!
EC-Earth3-Veg-LR
Missing files!
FGOALS-g3
Missing files!
GFDL-CM4_gr2
Missing files!
GFDL-CM4
Missing files!
GFDL-ESM4
Missing files!
GISS-E2-1-G
Missing files!
HadGEM3-GC31-LL
Missing files!
HadGEM3-GC31-MM
Missing files!
IITM-ESM
Missing files!
INM-CM4-8
Missing files!
INM-CM5-0
Missing files!
IPSL-CM6A-LR
Missing files!
KACE-1-0-G
Missing files!
KIOST-ESM
Missing files!
MIROC6
Missing files!
MIROC-ES2L
Missing files!
MPI-ESM1-2-HR
Missing files!
MPI-ESM1-2-LR
Missing files!
MRI-ESM2-0
Missing files!
NESM3
Missing files!
NorESM2-LM
Missing files!
NorESM2-MM
Missing files!
TaiESM1
Missing files!
UKESM1-0-LL
Missing files!


In [89]:
check_files

{'ACCESS-CM2': [],
 'ACCESS-ESM1-5': [],
 'BCC-CSM2-MR': [],
 'CanESM5': [],
 'CESM2': [],
 'CESM2-WACCM': [],
 'CMCC-CM2-SR5': [],
 'CMCC-ESM2': [],
 'CNRM-CM6-1': [],
 'CNRM-ESM2-1': [],
 'EC-Earth3': [],
 'EC-Earth3-Veg-LR': [],
 'FGOALS-g3': [],
 'GFDL-CM4_gr2': [],
 'GFDL-CM4': [],
 'GFDL-ESM4': [],
 'GISS-E2-1-G': [],
 'HadGEM3-GC31-LL': [],
 'HadGEM3-GC31-MM': [],
 'IITM-ESM': [],
 'INM-CM4-8': [],
 'INM-CM5-0': [],
 'IPSL-CM6A-LR': [],
 'KACE-1-0-G': [],
 'KIOST-ESM': [],
 'MIROC6': [],
 'MIROC-ES2L': [],
 'MPI-ESM1-2-HR': [],
 'MPI-ESM1-2-LR': [],
 'MRI-ESM2-0': [],
 'NESM3': [],
 'NorESM2-LM': [],
 'NorESM2-MM': [],
 'TaiESM1': [],
 'UKESM1-0-LL': []}

In [87]:
for experiment, flist in check_files.items():
    if len(flist) > 0:
        print(experiment, flist)
        urls = [filelist[filelist.filename == f].fileURL.values[0] for f in flist]
        for f in flist:
            os.remove(f"/data/climate_migration/data/cmip6/nex_gddp_cmip6/{experiment}/{f}")
            
        print(urls)
        for url in urls:
            filename = url.split("/")[-1]
            urllib.request.urlretrieve(url, f"/data/climate_migration/data/cmip6/nex_gddp_cmip6/{experiment}/{filename}")
        
        

CMCC-CM2-SR5 ['tas_day_CMCC-CM2-SR5_ssp585_r1i1p1f1_gn_2015.nc']
[' https://nex-gddp-cmip6.s3.us-west-2.amazonaws.com/NEX-GDDP-CMIP6/CMCC-CM2-SR5/ssp585/r1i1p1f1/tas/tas_day_CMCC-CM2-SR5_ssp585_r1i1p1f1_gn_2015.nc']
MPI-ESM1-2-LR ['tas_day_MPI-ESM1-2-LR_ssp126_r1i1p1f1_gn_2092.nc']
[' https://nex-gddp-cmip6.s3.us-west-2.amazonaws.com/NEX-GDDP-CMIP6/MPI-ESM1-2-LR/ssp126/r1i1p1f1/tas/tas_day_MPI-ESM1-2-LR_ssp126_r1i1p1f1_gn_2092.nc']
