In [3]:
## utils 
def load_config(file_path):
    with open(file_path, 'r') as file:
        config = yaml.safe_load(file)
    return config

from typing import Dict

def load_dataset(config: Dict, typ: str = "train", number_file: str = "001") -> pd.DataFrame:
    # Motornummer, Zeitschritt
    index_columns_names: List[str] =  ["UnitNumber","Cycle"]
    # 3 betriebsbereite Sensoreinstellungen
    operational_settings_columns_names: List[str] = ["Operation Setting "+str(i) for i in range(1,4)]
    # 21 Sensormessungen
    sensor_measure_columns_names: List[str] = ["Sensor Measure"+str(i) for i in range(1,22)]

    input_file_column_names: List[str] = index_columns_names + operational_settings_columns_names + sensor_measure_columns_names
    print(f"In total {len(input_file_column_names)} columns defined as {input_file_column_names}")

    ###############################
    ### load train data
    if typ == "train":
        type_path = config["dataloading"]["train_path"]
    elif typ == "test":
        type_path = config["dataloading"]["train_path"]
    elif "rul" in typ.lower():
        type_path = config["dataloading"]["RUL_path"]
    else:
        raise ValueError

    data: pd.DataFrame = pd.read_csv(config["dataloading"]["path_to_data"] + config["dataloading"]["train_path"] + number_file +'.txt', sep='\s+', header=None)
    data.columns = input_file_column_names
    return data


In [2]:
###############################
## imports
###############################
import yaml
import os
import sys



import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Set path 
os.chdir("../")
pd.set_option('display.max_columns', 500)
print("Current working directory:", os.getcwd())

###############################
## load config
###############################
config_file_path = './configs/config.yaml'
config = load_config(config_file_path)

In [9]:
###############################
## load data
###############################
train_data = load_dataset(config=config, typ="train", number_file="001")
train_data.head()

In [80]:
def min_max(values):
    return values.max() - values.min()


In [81]:
window_size = 3
functs = ["mean", "sum", min_max]
# Compute the rolling mean for each column
rolling_mean = apply_rolling_window(train_data, window_size = window_size, functs=functs)

rolling_mean.head()

In [72]:
def apply_rolling_window(df, window_size, functs, exclude = ["UnitNumber", "Cycle"]):
    # Exclude "UnitNumber" from the aggregation dictionary
    
    agg_dict = {col: functs for col in df.columns if col not in exclude}
    
    # Apply rolling window and aggregation grouped by "UnitNumber"
    rolling_data = df.groupby("UnitNumber").apply(lambda x: x.rolling(window=window_size).agg(agg_dict)).dropna()
    
    # Merge back "UnitNumber" to the dataset
    rolling_data = rolling_data.reset_index()
    rolling_data = rolling_data.drop(columns=["level_1"])
    rolling_data["Cycle"] = df["Cycle"]

    ## refactor 2-levels of column names to only one level
    rolling_data.columns = ['_'.join(col).strip() if col[1] != "" else col[0] for col in rolling_data.columns.values]
    
    return rolling_data