# Combine results into single datasets

_Dataset:_ Supplementary data for Megill and Grewe (2024): "Investigating the limiting aircraft design-dependent and environmental factors of persistent contrail formation".

_Authors:_

- Liam Megill (1, 2), https://orcid.org/0000-0002-4199-6962   
- Volker Grewe (1, 2), https://orcid.org/0000-0002-8012-6783  

_Affiliation (1)_: Deutsches Zentrum für Luft- und Raumfahrt (DLR), Institut für Physik der Atmosphäre, Oberpfaffenhofen, Germany

_Affiliation (2)_: Delft University of Technology (TU Delft), Faculty of Aerospace Engineering, Section Aircraft Noise and Climate Effects (ANCE), Delft, The Netherlands

_Corresponding author_: Liam Megill, liam.megill@dlr.de

_doi_: https://doi.org/10.5194/egusphere-2024-3398

---


### Summary


### Inputs


### Outputs


---

### Copyright

Copyright © 2024 Liam Megill

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

## Non-border limiting factors

The first combination is for the non-border limiting factors. To run, set `project_dir` to the top-level directory. Set `save_to_file` to True if you want the results to be saved - make sure to also modify `savename` accordingly.

In [None]:
import xarray as xr
import numpy as np
import datetime
from helper import generate_season_years

# options
save_to_file = False

# define directories
project_dir = ""
processed_data_dir = project_dir + "data/processed/limfac/"

# define corrections
corrs = ["uncor", "cor-98p", "cor-95p", "cor-90p"]
corrs_str = ["uncor", "RHi-cor-98p", "RHi-cor-95p", "RHi-cor-90p"]

season_years = generate_season_years(2010, 2019)

# load and combine data
ds_arr_full = np.empty(len(corrs), dtype="object")
for j, corr in enumerate(corrs):
    ds_arr = np.empty(len(season_years), dtype="object")
    for i_sy, sy in enumerate(season_years):
        files = [f"{processed_data_dir}all/nonborder_limfac_r1M_{sy}-{i_mon}_ERA5_GRIB_{corr}.nc" for i_mon in range(3)]
        ds_i = xr.open_mfdataset(files, concat_dim=["mon"], combine="nested")
        n_time = ds_i.n_time
        ds_i_mean = (ds_i.drop_vars("n_time") * n_time).sum(dim="mon") / n_time.sum(dim="mon")
        ds_i_mean["n_time"] = n_time.sum(dim="mon")
        ds_arr[i_sy] = ds_i_mean

    ds = xr.combine_nested(ds_arr.tolist(), concat_dim=["sy"])
    n_time = ds.n_time
    ds_tot = (ds.drop_vars("n_time") * n_time).sum(dim="sy") / n_time.sum(dim="sy")
    ds_arr_full[j] = ds_tot

ds_full = xr.combine_nested(ds_arr_full.tolist(), concat_dim=["corr"])
ds_full["corr"] = corrs_str

# update attributes
ds_full.attrs.update({"author": "Liam Megill",
                      "institution": "Deutsches Zentrum für Luft- und Raumfahrt, Institute of Atmospheric Physics",
                      "description": "Total non-border contrail limiting factors, calculated using 2160 random hours (numpy random seed) within the 2010 decade of ERA5 GRIB data stored on DKRZ Levante",
                      "seed": 42,
                      "timespan": "2010DJF - 2019SON",
                      "created": "{} CET".format(datetime.datetime.today().strftime("%Y-%m-%d %H:%M:%S"))})
ds_full.corr.attrs.update({"description": "Correction applied to ERA5 RHi"})

if save_to_file:
    savename = f"{processed_data_dir}nonborder_limfac_allAC_rmS_ERA5_GRIB_allcorr.nc"
    ds_full.to_netcdf(savename)

ds_full

## Horizontal limiting factors

Next, we combine the horizontal limiting factors (borders). To run, set `project_dir` to the top-level directory. Set `save_to_file` to True if you want the results to be saved - make sure to also modify `savename` accordingly.

In [None]:
import xarray as xr
import numpy as np
import datetime
from helper import generate_season_years

# project directories
project_dir = ""  # set top-level directory path
processed_data_dir = project_dir + "data/processed/limfac/"

# create list to load data using xarray
ac_ids = ["AC0", "AC1", "AC2", "AC3", "AC4", "AC7", "AC8"]
corrs = ["uncor", "RHi-cor-98p", "RHi-cor-95p", "RHi-cor-90p"]
save_to_file = False

season_years = generate_season_years(2010, 2019)

# create array
ds_arr = np.empty((len(ac_ids), len(corrs)), dtype="object")
for i, ac_id in enumerate(ac_ids):
    for j, corr in enumerate(corrs):
        files = [f"{processed_data_dir}{ac_id}/limfac_{ac_id}_r1S_{sy}_ERA5_GRIB_{corr}.nc" for sy in season_years]
        ds_i = xr.open_mfdataset(files, concat_dim=["sy"], combine="nested")
        n_time = ds_i.n_time
        ds_i_mean = (ds_i.drop_vars("n_time") * n_time).sum(dim="sy") / n_time.sum(dim="sy")

        ds_arr[i, j] = ds_i_mean


ds = xr.combine_nested(ds_arr.tolist(), concat_dim=["AC", "corr"])
ds["AC"] = ac_ids
ds["corr"] = corrs
ds.attrs.update({"author": "Liam Megill",
                 "institution": "Deutsches Zentrum für Luft- und Raumfahrt, Institute of Atmospheric Physics",
                 "description": "Contrail limiting factors, calculated using 2160 random hours (numpy random seed) within the 2010 decade of ERA5 GRIB data stored on DKRZ Levante",
                 "seed": 42,
                 "timespan": "2010DJF - 2019SON",
                 "created": "{} CET".format(datetime.datetime.today().strftime("%Y-%m-%d %H:%M:%S"))})
ds.corr.attrs.update({"description": "Correction applied to ERA5 RHi"})
ds.AC.attrs.update({"description": "Aircraft ID"})

if save_to_file:
    savename = f"{processed_data_dir}limfac_allAC_rmS_ERA5_GRIB_allcorr_v2.nc"
    ds.to_netcdf(savename)

ds

## Vertical limiting factors

Next, we combine the vertical limiting factors (borders). To run, set `project_dir` to the top-level directory. Set `save_to_file` to True if you want the results to be saved - make sure to also modify `savename` accordingly.

In [None]:
import xarray as xr
import numpy as np
import datetime
from helper import generate_season_years

# project directories
project_dir = ""  # set top-level directory path
processed_data_dir = project_dir + "data/processed/limfac/"

# create list to load data using xarray
ac_ids = ["AC0", "AC1", "AC3", "AC4", "AC7", "AC8"]
corrs = ["uncor", "RHi-cor-98p", "RHi-cor-95p", "RHi-cor-90p"]
save_to_file = False

season_years = generate_season_years(2010, 2019)

# create array
ds_arr = np.empty((len(ac_ids), len(corrs)), dtype="object")
for i, ac_id in enumerate(ac_ids):
    for j, corr in enumerate(corrs):
        files = [f"{processed_data_dir}{ac_id}/vert_limfac_{ac_id}_r1S_{sy}_ERA5_GRIB_{corr}.nc" for sy in season_years]
        ds_i = xr.open_mfdataset(files, concat_dim=["sy"], combine="nested")
        n_time = ds_i.n_time
        ds_i_mean = (ds_i.drop_vars("n_time") * n_time).sum(dim="sy") / n_time.sum(dim="sy")

        ds_arr[i, j] = ds_i_mean


ds = xr.combine_nested(ds_arr.tolist(), concat_dim=["AC", "corr"])
ds["AC"] = ac_ids
ds["corr"] = corrs
ds.attrs.update({"author": "Liam Megill",
                 "institution": "Deutsches Zentrum für Luft- und Raumfahrt, Institute of Atmospheric Physics",
                 "description": "Vertical contrail limiting factors, calculated using 2160 random hours (numpy random seed) within the 2010 decade of ERA5 GRIB data stored on DKRZ Levante",
                 "seed": 42,
                 "timespan": "2010DJF - 2019SON",
                 "created": "{} CET".format(datetime.datetime.today().strftime("%Y-%m-%d %H:%M:%S"))})
ds.corr.attrs.update({"description": "Correction applied to ERA5 RHi"})
ds.AC.attrs.update({"description": "Aircraft ID"})

print("File created. Saving...")

if save_to_file:
    savename = f"{processed_data_dir}vert_limfac_allAC_rmS_ERA5_GRIB_allcorr_v2.nc"
    ds.to_netcdf(savename)

ds

## Gmax

Finally, we combine the histograms of the mixing line slope $G_{max}$, as calculated with `15-lm-Gmax_grib.ipynb` into monthly and seasonal datasets. To run, set `project_dir` to the top-level directory. Set `save` to True if you want the results to be saved - make sure to also modify `savename_allS` and `savename_allM` accordingly.

In [None]:
import xarray as xr
import numpy as np
import datetime

# define directories
project_dir = ""  # set top-level directory path
processed_data_dir = project_dir + "data/processed/ppcf/"

# set range of years and months to combine
start_date = datetime.date(2009, 12, 1)
end_date = datetime.date(2019, 12, 1)
monthly = True
seasonal = True
save = False
savename_allS = "ppcfhist_S_2010s_ERA5_GRIB_v2.nc"
savename_allM = "ppcfhist_M_2010s_ERA5_GRIB_v2.nc"

# set correction list
corrs = ["", "_RHi_cor_98p", "_RHi_cor_95p", "_RHi_cor_90p"]
corrs_str = ["uncor", "RHi-cor-98p", "RHi-cor-95p", "RHi-cor-90p"]

def season_from_date(date):
    year = date.year
    month = date.month
    if month in [12, 1, 2]:
        season = 'DJF'
        year = year if month != 12 else year + 1
    elif month in [3, 4, 5]:
        season = 'MAM'
    elif month in [6, 7, 8]:
        season = 'JJA'
    else:
        season = 'SON'
    return f'{year}{season}'


def find_starts_of_months(start_date, end_date):
    starts_of_months = []
    current_date = start_date
    while current_date <= end_date:
        current_date = current_date.replace(day=1)
        starts_of_months.append(current_date)
        current_date += datetime.timedelta(days=32)
    return starts_of_months


# find months and seasons
starts_of_months = find_starts_of_months(start_date, end_date)
formatted_starts = [date.strftime("%Y-%m") for date in starts_of_months]
season_strings = [season_from_date(date) for date in starts_of_months]

# get data
ds_arr = np.empty(len(corrs), dtype="object")
for j, corr in enumerate(corrs):
    files = [processed_data_dir+f"ppcfhist_1M_{monstart}_ERA5_GRIB{corr}.nc" for monstart in formatted_starts]
    datasets = []
    num_vals_arr = np.empty(len(files))
    for i, file in enumerate(files):
        ds_i = xr.open_dataset(file)
        ds_i.assign_coords(month=formatted_starts[i])
        datasets.append(ds_i)
        num_vals_arr[i] = (int(ds_i.attrs["num_vals"]))
    ds_j = xr.concat(datasets, dim="month")
    ds_j = ds_j.assign_coords(month=("month", formatted_starts))
    ds_j = ds_j.assign_coords(season=("month", season_strings))
    ds_j["num_vals"] = ("month", num_vals_arr)

    ds_arr[j] = ds_j


# combine and save to file
ds = xr.combine_nested(ds_arr.tolist(), concat_dim=["corr"])
ds["corr"] = corrs_str

# monthly
if monthly:
    ds_allM = ds
    ds_allM.attrs.update({"author": "Liam Megill",
                          "institution": "Deutsches Zentrum für Luft- und Raumfahrt, Institute of Atmospheric Physics",
                          "timespan": f"{formatted_starts[0]} - {formatted_starts[-1]}",
                          "created": "{} CET".format(datetime.datetime.today().strftime("%Y-%m-%d %H:%M:%S"))})
    ds_allM.corr.attrs.update({"description": "Correction applied to ERA5 RHi"})
    if save:
        ds_allM.to_netcdf(processed_data_dir+savename_allM)

# seasonal
if seasonal:
    ds_allS = ds.groupby("season").sum(dim="month")
    ds_allS.attrs.update({"author": "Liam Megill",
                          "institution": "Deutsches Zentrum für Luft- und Raumfahrt, Institute of Atmospheric Physics",
                          "description": "Seasonal non-density histogram of G_max, calculated using ERA5 GRIB data stored on DKRZ Levante. The attribute num_vals is per season.",
                          "timespan": f"{formatted_starts[0]} - {formatted_starts[-1]}",
                          "created": "{} CET".format(datetime.datetime.today().strftime("%Y-%m-%d %H:%M:%S"))})
    ds_allS.corr.attrs.update({"description": "Correction applied to ERA5 RHi"})
    if save:
        ds_allS.to_netcdf(processed_data_dir+savename_allS)