# Save files from bucket to bucket
<div class="alert alert-danger alert-info">
    <b>It is important to save your results in a place that can last longer than a few days/weeks!</b>
</div>
- When you have saved data locally on your JupyterLab instance and you want to mak a backup on https://forces2021.uiogeo-apps.sigma2.no/

In [None]:
import os
import pathlib
import s3fs
import xarray as xr

## Connect to bucket (anonymous login for public data only)

In [None]:
fs = s3fs.S3FileSystem(
    anon=True, client_kwargs={"endpoint_url": "https://climate.uiogeo-apps.sigma2.no/"}
)

## Get data into xarray

In [None]:
s3path = "s3://ESGF/CMIP6/GeoMIP/MPI-M/*/G6sulfur/*/day/tasmin/gn/*/*.nc"

In [None]:
remote_files = fs.glob(s3path)

In [None]:
remote_files

In [None]:
# Iterate through remote_files to create a fileset
fileset = [fs.open(file) for file in remote_files]

# This works
dset = xr.open_mfdataset(fileset, combine="by_coords", use_cftime=True)

In [None]:
dset

## Check the size (MB) of our dataset

In [None]:
dset.nbytes / 1e6

Our dataset is bit more than 2.4 GB

## Save file from memory to bucket

In [None]:
%%time
dset.load()

## Save your results to Remote private object storage
- your credentials are in `$HOME/.aws/credentials` 
- check with your instructor to get the secret access key (replace XXX by the right key)

```
[default]
aws_access_key_id=forces2021-work
aws_secret_access_key=XXXXXXXXXXXX
aws_endpoint_url=https://forces2021.uiogeo-apps.sigma2.no/
```

In [None]:
target = s3fs.S3FileSystem(
    anon=False,
    client_kwargs={"endpoint_url": "https://forces2021.uiogeo-apps.sigma2.no/"},
)

## Save as netCDF
- netCDF is not a cloud-optimized format so it may be slow

In [None]:
s3_path = "s3://work/annefou/tasmin_day_MPI-ESM1-2-LR_G6sulfur_r1i1p1f1_gn.nc"
print(s3_path)

In [None]:
with target.open(s3_path, "wb") as f:
    f.write(dset.to_netcdf(None))

## Then you can use the remote file

In [None]:
remote_file = ["work/annefou/tasmin_day_MPI-ESM1-2-LR_G6sulfur_r1i1p1f1_gn.nc"]

In [None]:
fileset = [target.open(file) for file in remote_file]

In [None]:
%%time
ds_check = xr.open_mfdataset(fileset, combine="by_coords", use_cftime=True)
ds_check

In [None]:
%%time
ds_seas = ds_check.groupby("time.season").mean("time", keep_attrs=True, skipna=True)

## Save as Zarr
- it usually takes longer to save but it is much faster to read

In [None]:
dset.load()

In [None]:
s3_path = "s3://work/annefou/tasmin_day_MPI-ESM1-2-LR_G6sulfur_r1i1p1f1_gn.zarr"
print(s3_path)

In [None]:
store = s3fs.S3Map(root=s3_path, s3=target, check=False)

In [None]:
%%time
dset.to_zarr(store=store, mode="w", consolidated=True, compute=True)

## Then you can use the remote file
- loading Zarr is usually faster, especially with large datasets

In [None]:
%%time
ds_check = xr.open_zarr(store=store, consolidated=True)
ds_check

In [None]:
%%time
ds_seas = ds_check.groupby("time.season").mean("time", keep_attrs=True, skipna=True)