### Notebook for producing calc_time features
#### Inspired by Windgrid paper by Andrade and Bessa

In [1]:
import __fix_relative_imports  # noqa: F401
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from mscEidalVesetrudUnofficial.global_constants import DATA_FOLDER

In [2]:
df = pd.read_csv(f"{DATA_FOLDER}/calc_time_all_data_combined.csv", index_col=0)
df.index = pd.to_datetime(df.index)
df.head()

Unnamed: 0_level_0,wind_direction_10m_6400_1007_13,wind_direction_10m_6400_1007_14,wind_direction_10m_6400_1007_15,wind_direction_10m_6400_1007_16,wind_direction_10m_6400_1007_17,wind_direction_10m_6400_1007_18,wind_direction_10m_6400_1007_19,wind_direction_10m_6400_1007_20,wind_direction_10m_6400_1007_21,wind_direction_10m_6400_1007_22,...,wind_speed_80m_roan_37,wind_speed_80m_roan_38,wind_speed_80m_roan_39,wind_speed_80m_roan_40,wind_speed_80m_roan_41,wind_speed_80m_roan_42,wind_speed_80m_roan_43,wind_speed_80m_roan_44,wind_speed_80m_roan_45,wind_speed_80m_roan_46
calc_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-02-10 06:00:00,276.58356,271.50293,243.53305,234.43805,247.2864,246.11003,231.32303,258.11102,201.2633,193.80275,...,8.321431,7.664421,8.125598,9.180001,9.720137,8.311477,7.242864,9.079603,9.288702,9.282009
2020-02-11 06:00:00,175.6626,162.00728,150.32025,163.98663,181.51323,206.12262,236.61069,267.6601,171.34752,172.94994,...,6.437271,6.382809,5.789454,9.527142,13.787766,14.945532,15.283508,14.14822,12.612029,13.564857
2020-02-12 06:00:00,237.51917,230.39326,222.67162,225.42088,219.5847,54.59539,12.903846,318.4591,333.06488,11.151851,...,5.31379,6.197147,5.007715,5.174086,5.867104,5.74056,5.013686,5.363545,4.327097,4.483484
2020-02-13 06:00:00,266.99283,264.14426,262.1433,256.4905,246.14258,196.33336,206.75117,195.92134,187.96024,173.27232,...,12.342651,12.594443,12.530774,13.659336,11.66494,13.99156,12.970082,13.27108,13.452687,12.894286
2020-02-14 06:00:00,115.861786,115.111115,114.88602,115.944046,121.85888,125.48886,135.22278,136.62468,147.59204,176.11656,...,14.544831,15.619975,17.22895,18.983667,19.669666,20.763447,20.384132,19.141619,18.673313,20.5186


#### Define constants

In [None]:
power_plant: str = "roan"
default_variables = [
    "wind_speed",
    "wind_direction",
    "u",
    "v",
]
default_height_levels = ["10m", "80m", "120m"]
temporal_variance_window_sizes = [1, 3, 5]  # really [3, 7, 11], but these are *2+1
path_prefix = "/"  # path_prefix
cos_sin_transform = True  # cos_sin_transform

default_pred_hours = list(range(18, 42))  # 24 hours covering day-ahead forecast
extended_pred_hours = list(range(13, 47))  # 34 hours covering day-ahead forecast and 1
# 10 hours of buffer (5 hours before and after)
default_pred_hours = [str(x) for x in default_pred_hours]
extended_pred_hours = [str(x) for x in extended_pred_hours]

#### Must produce wind components u and v first

In [None]:
def make_wind_features(df: pd.DataFrame) -> pd.DataFrame:
    """
    Helper method.
    This method calculates the Azimuthal and Meridional wind components
    from the wind speed and direction (u and v components).
    :param df: The data frame containing the wind speed and direction
    :return: The data frame with the added columns for
    the Azimuthal and Meridional wind components
    """
    cols = df.columns

    # Get all unique "rest of the name" parts
    names = {
        col.split("_", 2)[-1]
        for col in cols
        if col.startswith("wind_speed_") or col.startswith("wind_direction_")
    }

    # Create an empty dictionary for the new columns
    new_cols = {}
    for name in names:
        speed_col = f"wind_speed_{name}"
        dir_col = f"wind_direction_{name}"

        # Convert wind direction from degrees to radians and store in a new column
        rad_dir_col = df[dir_col].apply(np.deg2rad)
        new_cols[f"u_{name}"] = -df[speed_col] * np.sin(rad_dir_col)
        new_cols[f"v_{name}"] = -df[speed_col] * np.cos(rad_dir_col)

    # Convert the dictionary to a DataFrame
    df_new = pd.DataFrame(new_cols)
    df = pd.concat([df, df_new], axis=1)

    return df


new_df = make_wind_features(df.copy())
new_df.head()

Unnamed: 0_level_0,wind_direction_10m_6400_1007_13,wind_direction_10m_6400_1007_14,wind_direction_10m_6400_1007_15,wind_direction_10m_6400_1007_16,wind_direction_10m_6400_1007_17,wind_direction_10m_6400_1007_18,wind_direction_10m_6400_1007_19,wind_direction_10m_6400_1007_20,wind_direction_10m_6400_1007_21,wind_direction_10m_6400_1007_22,...,u_10m_6407_1069_29,v_10m_6407_1069_29,u_10m_6412_1007_18,v_10m_6412_1007_18,u_120m_6427_1059_25,v_120m_6427_1059_25,u_10m_6405_1038_16,v_10m_6405_1038_16,u_10m_6423_1043_27,v_10m_6423_1043_27
calc_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-02-08 06:00:00,,,,,,,,,,,...,,,,,,,,,,
2020-02-09 06:00:00,,,,,,117.91883,131.67363,186.89098,170.4278,147.97461,...,12.312557,0.323007,-3.321704,6.410182,0.408735,8.855601,,,10.53685,1.470185
2020-02-10 06:00:00,276.58356,271.50293,243.53305,234.43805,247.2864,246.11003,231.32303,258.11102,201.2633,193.80275,...,2.434502,3.901889,8.216866,5.719173,3.518824,8.056312,6.116025,2.023056,0.486557,5.943236
2020-02-11 06:00:00,175.6626,162.00728,150.32025,163.98663,181.51323,206.12262,236.61069,267.6601,171.34752,172.94994,...,4.211381,4.152889,2.775822,8.217536,6.919986,5.316765,0.434312,1.110843,2.161223,5.921262
2020-02-12 06:00:00,237.51917,230.39326,222.67162,225.42088,219.5847,54.59539,12.903846,318.4591,333.06488,11.151851,...,4.865835,-1.348582,-3.787609,-9.02888,3.037116,-3.898297,5.1773,2.934522,-0.975233,3.251184


#### Spatial features

In [None]:
def spatial_standard_deviation(df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate the spatial standard deviation for the weather data.
    :param df: The data frame containing the weather data
    :return: The data frame with added columns for the spatial standard deviation
    """
    new_columns = []

    # Handle wind_direction variable first by translating it into linear features
    wind_direction_columns = [col for col in df.columns if "wind_direction" in col]

    sin_cos_columns = {}
    for col in wind_direction_columns:
        sin_cos_columns[f'{col.replace("wind_direction", "wind_direction_sin")}'] = (
            np.sin(np.radians(df[col]))
        )
        sin_cos_columns[f'{col.replace("wind_direction", "wind_direction_cos")}'] = (
            np.cos(np.radians(df[col]))
        )

    df = df.drop(columns=wind_direction_columns)
    df = pd.concat([df, pd.DataFrame(sin_cos_columns)], axis=1)

    alt_variables = default_variables.copy()
    alt_variables.remove("wind_direction")
    alt_variables.extend(["wind_direction_sin", "wind_direction_cos"])

    # Iterate over each feature type, height, and time
    # for variable in default_variables:
    for variable in alt_variables:
        for height in default_height_levels:
            for time in default_pred_hours:
                # Create a list to store the column names that match
                # the current feature, height, and time
                # time_str = f"_{time}"
                matching_columns = [
                    col
                    for col in df.columns
                    if all(x in col for x in [variable, height, f"_{time}"])
                ]
                std = df[matching_columns].apply(
                    lambda row: np.sqrt(
                        ((row - row.mean()) ** 2).sum() / (row.count() - 1)
                    ),
                    axis=1,
                )
                std.name = f"{variable}_{height}_spatial_std_{time}"
                new_columns.append(std)

    # Concatenate all new columns to the original DataFrame
    new_df = pd.concat(new_columns, axis=1)
    new_df.index = df.index
    return new_df


new_df2 = spatial_standard_deviation(new_df.copy())
# new_df2 = new_df2.dropna()
new_df2.head()

Unnamed: 0_level_0,wind_speed_10m_spatial_std_18,wind_speed_10m_spatial_std_19,wind_speed_10m_spatial_std_20,wind_speed_10m_spatial_std_21,wind_speed_10m_spatial_std_22,wind_speed_10m_spatial_std_23,wind_speed_10m_spatial_std_24,wind_speed_10m_spatial_std_25,wind_speed_10m_spatial_std_26,wind_speed_10m_spatial_std_27,...,wind_direction_cos_120m_spatial_std_32,wind_direction_cos_120m_spatial_std_33,wind_direction_cos_120m_spatial_std_34,wind_direction_cos_120m_spatial_std_35,wind_direction_cos_120m_spatial_std_36,wind_direction_cos_120m_spatial_std_37,wind_direction_cos_120m_spatial_std_38,wind_direction_cos_120m_spatial_std_39,wind_direction_cos_120m_spatial_std_40,wind_direction_cos_120m_spatial_std_41
calc_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-02-08 06:00:00,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
2020-02-09 06:00:00,1.658738,2.013945,2.25751,2.035337,1.754992,2.240134,2.237275,1.706325,1.790319,1.763589,...,0.058566,0.062474,0.06755,0.03252,0.042104,0.034615,0.039124,0.061228,0.197952,0.136166
2020-02-10 06:00:00,1.896502,1.801747,1.977907,1.92936,1.500807,2.028742,1.8462,1.756214,1.936213,1.303864,...,0.029969,0.035773,0.04725,0.056612,0.063503,0.071409,0.050898,0.028797,0.010549,0.006993
2020-02-11 06:00:00,1.963084,1.166733,1.711913,1.406433,1.30506,1.454023,1.876501,1.835714,1.807673,1.811012,...,0.175268,0.150299,0.094955,0.088301,0.164177,0.170923,0.18144,0.253549,0.338999,0.210279
2020-02-12 06:00:00,2.318661,1.735049,1.520963,1.220902,1.666443,1.524142,1.254068,1.039289,0.971016,1.765823,...,0.062056,0.060686,0.060418,0.065611,0.109172,0.134428,0.11393,0.107946,0.069609,0.025914


In [None]:
new_df2.to_csv("../../data/calc_time_spatial_std.csv")

In [None]:
def spatial_smoothing(df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate the spatial smoothing for the weather data.
    NOTE: This feature is not calculated for wind direction.
    The authors of the paper do the same.
    :param df: The data frame containing the weather data
    :return: The data frame with added columns for the spatial smoothing
    """
    new_columns = []

    variables = [var for var in default_variables if var != "wind_direction"]

    # Iterate over each feature type, height, and time
    for variable in variables:
        for height in default_height_levels:
            for time in default_pred_hours:
                # Create a list to store the column names that match
                # the current feature, height, and time
                matching_columns = [
                    col
                    for col in df.columns
                    if all(x in col for x in [variable, height, f"_{time}"])
                    and "spatial_std" not in col
                ]

                # Calculate the spatial smoothing of the matching columns
                if matching_columns:  # Only calculate if there are matching columns
                    smoothing = df[matching_columns].apply(
                        lambda row: row.mean(),
                        axis=1,
                    )
                    smoothing.name = f"{variable}_{height}_spatial_smoothing_{time}"
                    new_columns.append(smoothing)

    # Concatenate all new columns to the original DataFrame
    new_df = pd.concat(new_columns, axis=1)
    new_df.index = df.index
    return new_df


new_df3 = spatial_smoothing(new_df.copy())
new_df3.head()

Unnamed: 0_level_0,wind_speed_10m_spatial_smoothing_18,wind_speed_10m_spatial_smoothing_19,wind_speed_10m_spatial_smoothing_20,wind_speed_10m_spatial_smoothing_21,wind_speed_10m_spatial_smoothing_22,wind_speed_10m_spatial_smoothing_23,wind_speed_10m_spatial_smoothing_24,wind_speed_10m_spatial_smoothing_25,wind_speed_10m_spatial_smoothing_26,wind_speed_10m_spatial_smoothing_27,...,v_120m_spatial_smoothing_32,v_120m_spatial_smoothing_33,v_120m_spatial_smoothing_34,v_120m_spatial_smoothing_35,v_120m_spatial_smoothing_36,v_120m_spatial_smoothing_37,v_120m_spatial_smoothing_38,v_120m_spatial_smoothing_39,v_120m_spatial_smoothing_40,v_120m_spatial_smoothing_41
calc_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-02-08 06:00:00,,,,,,,,,,,...,,,,,,,,,,
2020-02-09 06:00:00,6.167969,5.873656,4.958964,4.529929,4.597856,7.600244,8.591042,9.064639,9.073807,10.587472,...,-2.536059,-0.67816,0.145993,0.766653,0.437713,0.549035,0.461351,0.098456,1.439288,1.06678
2020-02-10 06:00:00,7.438261,6.595885,6.965009,5.103023,4.455348,4.901342,5.861078,5.882543,5.825489,5.210095,...,7.983211,8.644612,8.813993,8.01871,7.312026,7.205393,6.925136,7.366172,8.18533,8.451153
2020-02-11 06:00:00,6.567341,5.444202,5.195945,4.321394,3.579321,3.935695,4.929239,5.305981,5.249035,5.803367,...,0.182554,3.822217,6.441943,6.131918,4.661366,3.05385,2.073472,1.482652,2.34506,-8.646026
2020-02-12 06:00:00,3.958607,4.674505,3.943657,2.451878,2.301535,2.215149,2.161627,1.9871,2.384545,3.539361,...,-1.092865,-0.801328,-0.069159,0.997667,2.186795,2.939412,4.417769,4.157869,4.396684,4.784958


In [None]:
new_df3.to_csv("../../data/calc_time_spatial_smooth.csv")

#### Temporal features

In [None]:
def temporal_variance(df: pd.DataFrame) -> pd.DataFrame:
    """
    This method calculates the temporal variance for each variable and height level.
    Temporal variance is a centered rolling window variance. The window size is
    based on values from the paper. The temporal variance is calculated for only
    the newest calculation time for each forecast time.
    """
    new_columns = []

    # Handle wind_direction variable first by translating it into linear features
    wind_direction_columns = [col for col in df.columns if "wind_direction" in col]

    sin_cos_columns = {}
    for col in wind_direction_columns:
        sin_cos_columns[f'{col.replace("wind_direction", "wind_direction_sin")}'] = (
            np.sin(np.radians(df[col]))
        )
        sin_cos_columns[f'{col.replace("wind_direction", "wind_direction_cos")}'] = (
            np.cos(np.radians(df[col]))
        )

    df = df.drop(columns=wind_direction_columns)
    df = pd.concat([df, pd.DataFrame(sin_cos_columns)], axis=1)

    alt_variables = default_variables.copy()
    alt_variables.remove("wind_direction")
    alt_variables.extend(["wind_direction_sin", "wind_direction_cos"])

    for variable in alt_variables:
        for height in default_height_levels:
            for window_size in temporal_variance_window_sizes:
                for time in default_pred_hours:
                    # Create the list of column names for the current time window
                    current_interval = [
                        f"{variable}_{height}_{power_plant}_{t}"
                        for t in range(
                            int(time) - window_size, int(time) + window_size + 1
                        )
                    ]

                    # Check if all columns in the current interval
                    # exist in the dataframe
                    if all(col in df.columns for col in current_interval):
                        # Transpose the dataframe, apply rolling, and transpose back
                        rolling_variance = (
                            df[current_interval]
                            .T.rolling(window=2 * window_size + 1, center=True)
                            .var()
                            .T
                        )

                        # Select the column corresponding to the current time
                        variance_column = rolling_variance.iloc[:, window_size]

                        # Rename the column to include the window size
                        variance_column.name = (
                            f"{variable}_{height}_{power_plant}_temporal_variance_"
                            f"{window_size*2+1}_{time}"
                        )
                        new_columns.append(variance_column)

    new_df = pd.concat(new_columns, axis=1)
    new_df.index = df.index
    return new_df


# Example usage
new_df5 = temporal_variance(new_df.copy())
new_df5.head()

Unnamed: 0_level_0,wind_speed_10m_roan_temporal_variance_3_18,wind_speed_10m_roan_temporal_variance_3_19,wind_speed_10m_roan_temporal_variance_3_20,wind_speed_10m_roan_temporal_variance_3_21,wind_speed_10m_roan_temporal_variance_3_22,wind_speed_10m_roan_temporal_variance_3_23,wind_speed_10m_roan_temporal_variance_3_24,wind_speed_10m_roan_temporal_variance_3_25,wind_speed_10m_roan_temporal_variance_3_26,wind_speed_10m_roan_temporal_variance_3_27,...,wind_direction_cos_120m_roan_temporal_variance_11_32,wind_direction_cos_120m_roan_temporal_variance_11_33,wind_direction_cos_120m_roan_temporal_variance_11_34,wind_direction_cos_120m_roan_temporal_variance_11_35,wind_direction_cos_120m_roan_temporal_variance_11_36,wind_direction_cos_120m_roan_temporal_variance_11_37,wind_direction_cos_120m_roan_temporal_variance_11_38,wind_direction_cos_120m_roan_temporal_variance_11_39,wind_direction_cos_120m_roan_temporal_variance_11_40,wind_direction_cos_120m_roan_temporal_variance_11_41
calc_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-02-08 06:00:00,,,,,,,,,,,...,,,,,,,,,,
2020-02-09 06:00:00,,1.198344,0.852037,0.110217,3.351153,3.797343,1.293865,0.507163,0.73768,1.825533,...,0.012884,0.011372,0.009648,0.015928,0.011606,0.010262,0.0081,0.017524,0.018217,0.028848
2020-02-10 06:00:00,0.756103,0.268312,0.920528,0.205302,0.107636,0.408639,0.878468,0.129989,0.885146,0.370249,...,0.00418,0.004084,0.004704,0.003846,0.004397,0.004568,0.004554,0.004533,0.004411,0.004763
2020-02-11 06:00:00,0.26946,3.241542,1.788676,0.340268,0.046855,0.363258,0.754781,0.297273,0.265765,1.344674,...,0.120451,0.123128,0.124643,0.104262,0.171164,0.273419,0.324358,0.370991,0.361913,0.301572
2020-02-12 06:00:00,2.636746,1.610415,0.995034,1.783262,0.290902,0.027988,0.028344,0.232891,0.930404,2.159683,...,0.128737,0.192107,0.213785,0.229361,0.22828,0.214613,0.181428,0.122733,0.103067,0.098837


In [None]:
# save csv
new_df5.to_csv("../../data/calc_time_temporal_variance.csv")

In [None]:
def lags_and_leads(df: pd.DataFrame, num_lags_leads: int) -> pd.DataFrame:
    """
    This method calculates the lags and leads for each variable and height level.
    The lags and leads are calculated for all combinations of variables and height
    levels. The number of lags and leads are determined by the input parameter.

    NOTE: The exact number of lags and leads are not specified in the paper.
    Should experiment with different numbers of lags and leads.
    Values between 3 and 8 could be reasonable.

    :param df: The data frame containing the variables for each height level
    :param num_lags_leads: The number of lags and leads to calculate

    :return: The data frame containing the lags and leads for each variable and
    height level
    """
    new_columns = []

    # Handle wind_direction variable first by translating it into linear features
    wind_direction_columns = [col for col in df.columns if "wind_direction" in col]

    sin_cos_columns = {}
    for col in wind_direction_columns:
        sin_cos_columns[f'{col.replace("wind_direction", "wind_direction_sin")}'] = (
            np.sin(np.radians(df[col]))
        )
        sin_cos_columns[f'{col.replace("wind_direction", "wind_direction_cos")}'] = (
            np.cos(np.radians(df[col]))
        )

    df = df.drop(columns=wind_direction_columns)
    df = pd.concat([df, pd.DataFrame(sin_cos_columns)], axis=1)

    alt_variables = default_variables.copy()
    alt_variables.remove("wind_direction")
    alt_variables.extend(["wind_direction_sin", "wind_direction_cos"])

    for variable in alt_variables:
        for height in default_height_levels:
            for time in default_pred_hours:
                time = int(time)

                lag_column_name = f"{variable}_{height}_{power_plant}_lag_{0}_{time}"
                lag_column = df[f"{variable}_{height}_{power_plant}_{time}"].rename(
                    lag_column_name
                )
                new_columns.append(lag_column)

                for i in range(1, num_lags_leads + 1):
                    lag_hour = time - i
                    lead_hour = time + i

                    lag_column_name = (
                        f"{variable}_{height}_{power_plant}_lag_{i}_{time}"
                    )
                    lag_column = df[
                        f"{variable}_{height}_{power_plant}_{lag_hour}"
                    ].rename(lag_column_name)
                    new_columns.append(lag_column)

                    lead_column_name = (
                        f"{variable}_{height}_{power_plant}_lead_{i}_{time}"
                    )
                    lead_column = df[
                        f"{variable}_{height}_{power_plant}_{lead_hour}"
                    ].rename(lead_column_name)
                    new_columns.append(lead_column)

    # Concatenate all new columns at once
    lags_leads_df = pd.concat(new_columns, axis=1)
    lags_leads_df.index = df.index
    return lags_leads_df


# Example usage
new_df6 = lags_and_leads(new_df.copy().dropna(), num_lags_leads=3)
new_df6.head()

Unnamed: 0_level_0,wind_speed_10m_roan_lag_0_18,wind_speed_10m_roan_lag_1_18,wind_speed_10m_roan_lead_1_18,wind_speed_10m_roan_lag_2_18,wind_speed_10m_roan_lead_2_18,wind_speed_10m_roan_lag_3_18,wind_speed_10m_roan_lead_3_18,wind_speed_10m_roan_lag_0_19,wind_speed_10m_roan_lag_1_19,wind_speed_10m_roan_lead_1_19,...,wind_direction_cos_120m_roan_lead_2_40,wind_direction_cos_120m_roan_lag_3_40,wind_direction_cos_120m_roan_lead_3_40,wind_direction_cos_120m_roan_lag_0_41,wind_direction_cos_120m_roan_lag_1_41,wind_direction_cos_120m_roan_lead_1_41,wind_direction_cos_120m_roan_lag_2_41,wind_direction_cos_120m_roan_lead_2_41,wind_direction_cos_120m_roan_lag_3_41,wind_direction_cos_120m_roan_lead_3_41
calc_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-02-10 06:00:00,7.002601,5.75744,7.431442,7.450503,6.400315,7.623015,5.514391,7.431442,7.002601,6.400315,...,-0.966902,-0.848574,-0.924184,-0.999782,-0.997769,-0.966902,-0.965082,-0.924184,-0.892778,-0.95055
2020-02-11 06:00:00,6.8856,7.115335,6.123658,5.818812,3.456809,5.504304,4.61145,6.123658,6.8856,3.456809,...,0.726756,-0.384106,0.59335,0.569556,-0.375115,0.726756,-0.223563,0.59335,-0.328979,0.648591
2020-02-12 06:00:00,2.532124,5.666822,4.834655,5.74375,4.608056,7.101002,3.004792,4.834655,2.532124,4.608056,...,-0.988205,-0.534675,-0.920093,-0.997729,-0.955701,-0.988205,-0.861161,-0.920093,-0.787519,-0.755259
2020-02-13 06:00:00,3.480158,3.416225,3.066148,3.047227,3.067296,2.873545,3.216281,3.066148,3.480158,3.067296,...,-0.853724,-0.773291,-0.898326,-0.872864,-0.901631,-0.853724,-0.856088,-0.898326,-0.831815,-0.965191
2020-02-14 06:00:00,9.144572,8.974858,9.305416,6.993844,8.485807,6.839881,8.382877,9.305416,9.144572,8.485807,...,-0.845136,-0.806269,-0.830414,-0.878697,-0.902104,-0.845136,-0.827869,-0.830414,-0.788271,-0.845596


In [None]:
new_df6.to_csv("../../data/calc_time_lags_and_leads.csv")