# cut file along dimension
cuts a file into many files along some dimension

In [2]:
import os
import shutil
import numpy as np
import xarray as xr
import pandas as pd

# Split files

In [28]:
# load data
data_dir='/local/data/artemis/workspace/gloege/data/MPI_GE/wind10'
ds = xr.open_dataset(f'{data_dir}/hcp85_woa13cp_1850_2099_ens_1-100.wind10.nc')

# Cut data
for ens in ds.ens.values:
    # output file
    fl=f'hcp850{ens+1:03d}_wind10_woa13cp_2d_mm_1850_2099.nc'
    print(fl)
    
    # Save to netcdf
    ds.sel(ens=0).to_netcdf(f'{data_dir}/{fl}')

hcp850001_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850002_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850003_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850004_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850005_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850006_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850007_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850008_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850009_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850010_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850011_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850012_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850013_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850014_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850015_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850016_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850017_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850018_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850019_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850020_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850021_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850022_wind10_woa13cp_2d_mm_1850_2099.nc
hcp850023_wind10_woa13cp_2d_mm_1

# Process Files
You will need to change variable names at various places here

In [108]:
# define ensemble length
for ens in np.arange(1,101):
    # define directories
    in_dir = '/local/data/artemis/workspace/gloege/data/MPI_GE/aps'

    # define files
    ifile = f'{in_dir}/raw/hcp850{ens:03d}_aps_woa13cp_2d_mm_1850_2099.nc'
    ofile = f'{in_dir}/processed/pATM_2D_mon_MPI{ens:03d}_1x1_198201-201701.nc'
    print(f'{ifile}')

    # Load file
    ds = xr.open_dataset(ifile)

    # Define date range
    dates = pd.date_range(start='1850-01-01T00:00:00.000000000', 
                          end='2099-12-01T00:00:00.000000000',freq='MS')+ np.timedelta64(14, 'D')

    # make dates be datetime64
    ds['time'] = dates

    # convert pascal to atmosphere
    Pa_in_atm = 101325
    ds['aps'] = ds['aps']/Pa_in_atm
    ds.aps.attrs = {'long_name':'surface pressure', 
                    'units':'atmospheres'}

    # rename variables
    ds = ds.rename({'lon':'xlon', 
               'lat':'ylat', 
               'aps':'pATM'}).sel(time=slice('1982-01','2017-01'))
    
    # Save netcdf
    ds.to_netcdf(f'{ofile}')

/local/data/artemis/workspace/gloege/data/MPI_GE/aps/raw/hcp850001_aps_woa13cp_2d_mm_1850_2099.nc
/local/data/artemis/workspace/gloege/data/MPI_GE/aps/raw/hcp850002_aps_woa13cp_2d_mm_1850_2099.nc
/local/data/artemis/workspace/gloege/data/MPI_GE/aps/raw/hcp850003_aps_woa13cp_2d_mm_1850_2099.nc
/local/data/artemis/workspace/gloege/data/MPI_GE/aps/raw/hcp850004_aps_woa13cp_2d_mm_1850_2099.nc
/local/data/artemis/workspace/gloege/data/MPI_GE/aps/raw/hcp850005_aps_woa13cp_2d_mm_1850_2099.nc
/local/data/artemis/workspace/gloege/data/MPI_GE/aps/raw/hcp850006_aps_woa13cp_2d_mm_1850_2099.nc
/local/data/artemis/workspace/gloege/data/MPI_GE/aps/raw/hcp850007_aps_woa13cp_2d_mm_1850_2099.nc
/local/data/artemis/workspace/gloege/data/MPI_GE/aps/raw/hcp850008_aps_woa13cp_2d_mm_1850_2099.nc
/local/data/artemis/workspace/gloege/data/MPI_GE/aps/raw/hcp850009_aps_woa13cp_2d_mm_1850_2099.nc
/local/data/artemis/workspace/gloege/data/MPI_GE/aps/raw/hcp850010_aps_woa13cp_2d_mm_1850_2099.nc
/local/data/artemis/

## Process siconc

In [11]:
# define ensemble length
for ens in np.arange(1,101):
    # define directories
    in_dir = '/local/data/artemis/workspace/gloege/data/MPI_GE/siconc'

    # define files
    ifile = f'{in_dir}/raw/hcp850{ens:03d}_siconc_woa13cp_2d_mm_1850_2099.nc'
    ofile = f'{in_dir}/processed/iceFrac_2D_mon_MPI{ens:03d}_1x1_198201-201701.nc'
    print(f'{ifile}')

    # Load file
    ds = xr.open_dataset(ifile)
    
    # Drop un-needed variables 
    ds = ds.squeeze().drop('depth')
    
    # Define date range
    dates = pd.date_range(start='1850-01-01T00:00:00.000000000', 
                          end='2099-12-01T00:00:00.000000000',freq='MS')+ np.timedelta64(14, 'D')

    # make dates be datetime64
    ds['time'] = dates

    # rename variables
    ds = ds.rename({'lon':'xlon', 
                    'lat':'ylat', 
                    'siconc':'iceFrac'}).sel(time=slice('1982-01','2017-01'))
    
    # Save netcdf
    ds.to_netcdf(f'{ofile}')

/local/data/artemis/workspace/gloege/data/MPI_GE/siconc/raw/hcp850001_siconc_woa13cp_2d_mm_1850_2099.nc
/local/data/artemis/workspace/gloege/data/MPI_GE/siconc/raw/hcp850002_siconc_woa13cp_2d_mm_1850_2099.nc
/local/data/artemis/workspace/gloege/data/MPI_GE/siconc/raw/hcp850003_siconc_woa13cp_2d_mm_1850_2099.nc
/local/data/artemis/workspace/gloege/data/MPI_GE/siconc/raw/hcp850004_siconc_woa13cp_2d_mm_1850_2099.nc
/local/data/artemis/workspace/gloege/data/MPI_GE/siconc/raw/hcp850005_siconc_woa13cp_2d_mm_1850_2099.nc
/local/data/artemis/workspace/gloege/data/MPI_GE/siconc/raw/hcp850006_siconc_woa13cp_2d_mm_1850_2099.nc
/local/data/artemis/workspace/gloege/data/MPI_GE/siconc/raw/hcp850007_siconc_woa13cp_2d_mm_1850_2099.nc
/local/data/artemis/workspace/gloege/data/MPI_GE/siconc/raw/hcp850008_siconc_woa13cp_2d_mm_1850_2099.nc
/local/data/artemis/workspace/gloege/data/MPI_GE/siconc/raw/hcp850009_siconc_woa13cp_2d_mm_1850_2099.nc
/local/data/artemis/workspace/gloege/data/MPI_GE/siconc/raw/hcp8

# Move files

In [109]:
# Members list
mpi_members = ['006', '009', '014', '020', '022',
              '024', '025', '027', '038', '043',
              '045', '046', '057', '060', '064',
              '070', '075', '077', '078', '080',
              '081', '083', '091', '095', '098']

In [106]:
for mem in mpi_members:
    print(mem)
    
    # define directories 
    in_dir = '/local/data/artemis/workspace/gloege/data/MPI_GE/siconc'
    out_dir = f'/local/data/artemis/workspace/gloege/SOCAT-LE/data/raw/large_ensembles/MPI/member_{mem}'

    # define files
    ifile = f'{in_dir}/processed/iceFrac_2D_mon_MPI{mem}_1x1_198201-201701.nc'
    ofile = f'{out_dir}/iceFrac_2D_mon_MPI{mem}_1x1_198201-201701.nc'

    # move files
    shutil.copy(f'{ifile}', f'{ofile}')

006
009
014
020
022
024
025
027
038
043
045
046
057
060
064
070
075
077
078
080
081
083
091
095
098


In [110]:
for mem in mpi_members:
    print(mem)
    
    # define directories 
    in_dir = '/local/data/artemis/workspace/gloege/data/MPI_GE/aps'
    out_dir = f'/local/data/artemis/workspace/gloege/SOCAT-LE/data/raw/large_ensembles/MPI/member_{mem}'

    # define files
    ifile = f'{in_dir}/processed/pATM_2D_mon_MPI{mem}_1x1_198201-201701.nc'
    ofile = f'{out_dir}/pATM_2D_mon_MPI{mem}_1x1_198201-201701.nc'

    # move files
    shutil.copy(f'{ifile}', f'{ofile}')

006
009
014
020
022
024
025
027
038
043
045
046
057
060
064
070
075
077
078
080
081
083
091
095
098


In [111]:
for mem in mpi_members:
    print(mem)
    
    # define directories 
    in_dir = '/local/data/artemis/workspace/gloege/data/MPI_GE/wind10'
    out_dir = f'/local/data/artemis/workspace/gloege/SOCAT-LE/data/raw/large_ensembles/MPI/member_{mem}'

    # define files
    ifile = f'{in_dir}/processed/U10_2D_mon_MPI{mem}_1x1_198201-201701.nc'
    ofile = f'{out_dir}/U10_2D_mon_MPI{mem}_1x1_198201-201701.nc'

    # move files
    shutil.copy(f'{ifile}', f'{ofile}')

006
009
014
020
022
024
025
027
038
043
045
046
057
060
064
070
075
077
078
080
081
083
091
095
098
