# Combine S2S-Reforecasts with Different Initializations into One File
Version 19 January 2024, Selina Kiefer

### Input: grib-files
S2S reforecasts of one meteorological predictor field at a specific timestep in grib format (e.g. from https://apps.ecmwf.int/datasets/data/s2s-reforecasts-daily-averaged-ecmf/levtype=sfc/type=cf/)
### Output: csv-file
continuous timeseries of S2S reforecasts of one meteorological predictor field in csv-format

#### Define the paths' and files' names

In [None]:
# Set the needed path and file names.
PATH_defined_functions = './Defined_Functions/'

PATH_input_data = './Data_in_grib_Format/'
ifiles_input_data_control = ['s2s_control_u10_05_Oct_2000_2020',
                      's2s_control_u10_08_Oct_2000_2020',
                      's2s_control_u10_12_Oct_2000_2020',
                      's2s_control_u10_15_Oct_2000_2020',
                     's2s_control_u10_19_Oct_2000_2020',
                     's2s_control_u10_22_Oct_2000_2020', 
                     's2s_control_u10_26_Oct_2000_2020', 
                     's2s_control_u10_29_Oct_2000_2020', 
                     's2s_control_u10_02_Nov_2000_2020',
                     's2s_control_u10_05_Nov_2000_2020',
                     's2s_control_u10_09_Nov_2000_2020',
                     's2s_control_u10_12_Nov_2000_2020', 
                     's2s_control_u10_16_Nov_2000_2020',
                     's2s_control_u10_19_Nov_2000_2020', 
                     's2s_control_u10_23_Nov_2000_2020', 
                     's2s_control_u10_26_Nov_2000_2020',
                     's2s_control_u10_30_Nov_2000_2020',
                     's2s_control_u10_03_Dec_2000_2020', 
                     's2s_control_u10_07_Dec_2000_2020',
                     's2s_control_u10_10_Dec_2000_2020',
                     's2s_control_u10_14_Dec_2000_2020',
                     's2s_control_u10_17_Dec_2000_2020',
                     's2s_control_u10_21_Dec_2000_2020',
                     's2s_control_u10_24_Dec_2000_2020',
                     's2s_control_u10_28_Dec_2000_2020',
                     's2s_control_u10_31_Dec_2000_2020',
                     's2s_control_u10_04_Jan_2000_2020',
                     's2s_control_u10_07_Jan_2000_2020',
                     's2s_control_u10_11_Jan_2000_2020',
                     's2s_control_u10_14_Jan_2000_2020',
                     's2s_control_u10_18_Jan_2000_2020',
                     's2s_control_u10_21_Jan_2000_2020',
                     's2s_control_u10_25_Jan_2000_2020',
                     's2s_control_u10_28_Jan_2000_2020',
                     's2s_control_u10_01_Feb_2000_2020',
                     's2s_control_u10_04_Feb_2000_2020',
                     's2s_control_u10_08_Feb_2000_2020',
                     's2s_control_u10_11_Feb_2000_2020',
                     's2s_control_u10_15_Feb_2000_2020',
                     's2s_control_u10_18_Feb_2000_2020',
                     's2s_control_u10_22_Feb_2000_2020',
                     's2s_control_u10_25_Feb_2000_2020',
                     's2s_control_u10_01_Mar_2000_2020',
                     's2s_control_u10_04_Mar_2000_2020',
                     's2s_control_u10_08_Mar_2000_2020',
                     's2s_control_u10_11_Mar_2000_2020',
                     's2s_control_u10_15_Mar_2000_2020',
                     's2s_control_u10_18_Mar_2000_2020',
                     's2s_control_u10_22_Mar_2000_2020',
                     's2s_control_u10_25_Mar_2000_2020',
                     's2s_control_u10_29_Mar_2000_2020',
                     's2s_control_u10_01_Apr_2000_2020']#,
                     #'s2s_control_u10_05_Apr_2000_2020',
                     #'s2s_control_u10_08_Apr_2000_2020']#,
                     #'s2s_control_u10_12_Apr_2000_2020',
                     #'s2s_control_u10_15_Apr_2000_2020']

ifiles_input_data_perturbed = ['s2s_perturbed_u10_05_Oct_2000_2020',
                      's2s_perturbed_u10_08_Oct_2000_2020',
                      's2s_perturbed_u10_12_Oct_2000_2020',
                      's2s_perturbed_u10_15_Oct_2000_2020',
                     's2s_perturbed_u10_19_Oct_2000_2020',
                     's2s_perturbed_u10_22_Oct_2000_2020', 
                     's2s_perturbed_u10_26_Oct_2000_2020', 
                     's2s_perturbed_u10_29_Oct_2000_2020', 
                     's2s_perturbed_u10_02_Nov_2000_2020',
                     's2s_perturbed_u10_05_Nov_2000_2020',
                     's2s_perturbed_u10_09_Nov_2000_2020',
                     's2s_perturbed_u10_12_Nov_2000_2020', 
                     's2s_perturbed_u10_16_Nov_2000_2020',
                     's2s_perturbed_u10_19_Nov_2000_2020', 
                     's2s_perturbed_u10_23_Nov_2000_2020', 
                     's2s_perturbed_u10_26_Nov_2000_2020',
                     's2s_perturbed_u10_30_Nov_2000_2020',
                     's2s_perturbed_u10_03_Dec_2000_2020', 
                     's2s_perturbed_u10_07_Dec_2000_2020',
                     's2s_perturbed_u10_10_Dec_2000_2020',
                     's2s_perturbed_u10_14_Dec_2000_2020',
                     's2s_perturbed_u10_17_Dec_2000_2020',
                     's2s_perturbed_u10_21_Dec_2000_2020',
                     's2s_perturbed_u10_24_Dec_2000_2020',
                     's2s_perturbed_u10_28_Dec_2000_2020',
                     's2s_perturbed_u10_31_Dec_2000_2020',
                     's2s_perturbed_u10_04_Jan_2000_2020',
                     's2s_perturbed_u10_07_Jan_2000_2020',
                     's2s_perturbed_u10_11_Jan_2000_2020',
                     's2s_perturbed_u10_14_Jan_2000_2020',
                     's2s_perturbed_u10_18_Jan_2000_2020',
                     's2s_perturbed_u10_21_Jan_2000_2020',
                     's2s_perturbed_u10_25_Jan_2000_2020',
                     's2s_perturbed_u10_28_Jan_2000_2020',
                     's2s_perturbed_u10_01_Feb_2000_2020',
                     's2s_perturbed_u10_04_Feb_2000_2020',
                     's2s_perturbed_u10_08_Feb_2000_2020',
                     's2s_perturbed_u10_11_Feb_2000_2020',
                     's2s_perturbed_u10_15_Feb_2000_2020',
                     's2s_perturbed_u10_18_Feb_2000_2020',
                     's2s_perturbed_u10_22_Feb_2000_2020',
                     's2s_perturbed_u10_25_Feb_2000_2020',
                     's2s_perturbed_u10_01_Mar_2000_2020',
                     's2s_perturbed_u10_04_Mar_2000_2020',
                     's2s_perturbed_u10_08_Mar_2000_2020',
                     's2s_perturbed_u10_11_Mar_2000_2020',
                     's2s_perturbed_u10_15_Mar_2000_2020',
                     's2s_perturbed_u10_18_Mar_2000_2020',
                     's2s_perturbed_u10_22_Mar_2000_2020',
                     's2s_perturbed_u10_25_Mar_2000_2020',
                     's2s_perturbed_u10_29_Mar_2000_2020',
                     's2s_perturbed_u10_01_Apr_2000_2020']#,
                     #'s2s_perturbed_u10_05_Apr_2000_2020',
                     #'s2s_perturbed_u10_08_Apr_2000_2020']#,
                     #'s2s_perturbed_u10_12_Apr_2000_2020',
                     #'s2s_perturbed_u10_15_Apr_2000_2020']

PATH_output_file = './Data_in_csv_Format/'
file_name_output_file = 's2s_reforecasts_u10_60W_60E_20N_80N_2000_2020_lead_time_28d.csv'

#### Import the necessary packages and functions

In [None]:
# Import the necessary python packages.
import yaml
import cfgrib
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

In [None]:
# Read in the necessary defined functions.
import sys
sys.path.insert(1, PATH_defined_functions)
from read_in_csv_data import *

#### Read in the data

In [None]:
# Read in the control run of the S2S reforecasts and show its variables.
input_data_control = xr.open_dataset(PATH_input_data+ifiles_input_data_control[0], engine='cfgrib')
df_input_data_control = input_data_control.to_dataframe()
df_input_data_control = df_input_data_control.reset_index() 
df_input_data_control

In [None]:
# Read in the perturbed runs of the S2S reforecasts and show their variables.
input_data_perturbed = xr.open_dataset(PATH_input_data+ifiles_input_data_perturbed[0], engine='cfgrib')
df_input_data_perturbed = input_data_perturbed.to_dataframe()
df_input_data_perturbed = df_input_data_perturbed.reset_index() 
df_input_data_perturbed

#### Combine Control and Perturbed Run of S2S-Reforecasts

In [None]:
# Create an ensemble of S2S reforecasts.
df_s2s_reforecasts = pd.DataFrame()

for i in tqdm(range(len(ifiles_input_data_control))):
    # Read in the control runs.
    input_data_control = xr.open_dataset(PATH_input_data+ifiles_input_data_control[i], engine='cfgrib')
    df_input_data_control = input_data_control.to_dataframe()
    df_input_data_control = df_input_data_control.reset_index() 

    # At first, drop the columns containing unnecessary information. Then, select the area comprising Central Europe and
    # sort the data y longitude and latitude.
    df_input_data_control = df_input_data_control.drop(['time', 'step'], axis=1)
    df_input_data_control_lon_1 = df_input_data_control.where(df_input_data_control['longitude']>299)
    df_input_data_control_lon_2 = df_input_data_control.where(df_input_data_control['longitude']<61)
    df_input_data_control = pd.concat([df_input_data_control_lon_1, df_input_data_control_lon_2])
    df_input_data_control = df_input_data_control.where(df_input_data_control['latitude']>19)
    df_input_data_control = df_input_data_control.where(df_input_data_control['latitude']<81)
    df_input_data_control = df_input_data_control.dropna()
    df_input_data_control = df_input_data_control.sort_values(by=['longitude', 'latitude'])
    
    # Read in the perturbed runs.
    input_data_perturbed = xr.open_dataset(PATH_input_data+ifiles_input_data_perturbed[i], engine='cfgrib')
    df_input_data_perturbed = input_data_perturbed.to_dataframe()
    df_input_data_perturbed = df_input_data_perturbed.reset_index() 
    
    # Again, drop the columns containing unnecessary information. Then, select the area comprising Central Europe and
    # sort the data y longitude and latitude.
    df_input_data_perturbed = df_input_data_perturbed.drop(['time', 'step'], axis=1)
    df_input_data_perturbed_lon_1 = df_input_data_perturbed.where(df_input_data_perturbed['longitude']>299)
    df_input_data_perturbed_lon_2 = df_input_data_perturbed.where(df_input_data_perturbed['longitude']<61)
    df_input_data_perturbed = pd.concat([df_input_data_perturbed_lon_1, df_input_data_perturbed_lon_2])
    df_input_data_perturbed = df_input_data_perturbed.where(df_input_data_perturbed['latitude']>19)
    df_input_data_perturbed = df_input_data_perturbed.where(df_input_data_perturbed['latitude']<81)
    df_input_data_perturbed = df_input_data_perturbed.dropna()
    df_input_data_perturbed = df_input_data_perturbed.sort_values(by=['longitude', 'latitude'])  
    
    # Combine the control and the perturbed runs.
    df_s2s_reforecasts = pd.concat([df_s2s_reforecasts, df_input_data_perturbed, df_input_data_control], ignore_index=True, sort=False)
    
# Sort the S2S reforecast in a nice way.
df_s2s_reforecasts = df_s2s_reforecasts.sort_values(by=['valid_time', 'number'])
df_s2s_reforecasts = df_s2s_reforecasts.reset_index()
df_s2s_reforecasts = df_s2s_reforecasts.drop(['index'], axis=1)  
     

#### Doublecheck the representation of the data

In [None]:
# Check if everything is reshaped and sorted correctly.
df_s2s_reforecasts.head()

In [None]:
# Also check if everything is sorted, renamed or removed correctly at the end of the
# dataframe.
df_s2s_reforecasts.tail()

#### Save the data in csv format

In [None]:
# Save the pandas dataframe in csv-format.
df_s2s_reforecasts.to_csv(PATH_output_file+file_name_output_file)

In [None]:
# End of Program