In [1]:
import pandas as pd
import os

# Paths
data_folder = r"C:\Users\aligi\OneDrive\Documents\Desktop\Turbo Fan Engines"
output_path = r"C:\Users\aligi\OneDrive\Documents\Desktop\CombinedData"
os.makedirs(output_path, exist_ok=True)

columns = [
    'unit_number', 'time_in_cycles', 'operational_setting_1', 'operational_setting_2', 'operational_setting_3',
    'sensor_measurement_1', 'sensor_measurement_2', 'sensor_measurement_3', 'sensor_measurement_4', 'sensor_measurement_5',
    'sensor_measurement_6', 'sensor_measurement_7', 'sensor_measurement_8', 'sensor_measurement_9', 'sensor_measurement_10',
    'sensor_measurement_11', 'sensor_measurement_12', 'sensor_measurement_13', 'sensor_measurement_14', 'sensor_measurement_15',
    'sensor_measurement_16', 'sensor_measurement_17', 'sensor_measurement_18', 'sensor_measurement_19', 'sensor_measurement_20',
    'sensor_measurement_21'
]

files_info = {
    "FD001": {"file": "train_FD001.txt", "offset": 0},
    "FD002": {"file": "train_FD002.txt", "offset": 100},
    "FD003": {"file": "train_FD003.txt", "offset": 360},
    "FD004": {"file": "train_FD004.txt", "offset": 460},
}

df_list = []

for dataset, info in files_info.items():
    file_path = os.path.join(data_folder, info["file"])
    df = pd.read_csv(file_path, sep=r'\s+', header=None)
    df.columns = columns
    df["unit_number"] += info["offset"]
    df["dataset"] = dataset

    # Convert operational and sensor columns to float
    float_cols = columns[2:]  # all columns except first two (unit_number, time_in_cycles)
    df[float_cols] = df[float_cols].astype(float)

    # Keep unit_number and time_in_cycles as int
    df['unit_number'] = df['unit_number'].astype(int)
    df['time_in_cycles'] = df['time_in_cycles'].astype(int)

    df.dropna(inplace=True)

    # Calculate RUL per unit_number
    max_cycles = df.groupby('unit_number')['time_in_cycles'].transform('max')
    df['RUL'] = max_cycles - df['time_in_cycles']

    # Create binary failure label (1 if RUL <= 30, else 0)
    failure_threshold = 30
    df['Failure'] = (df['RUL'] <= failure_threshold).astype(int)

    df_list.append(df)

combined_df = pd.concat(df_list, ignore_index=True)

combined_csv_path = os.path.join(output_path, "combined_turbo_fan_data_with_RUL_and_Failure.csv")
combined_df.to_csv(combined_csv_path, index=False)
print(f"Combined CSV with binary failure label saved at: {combined_csv_path}")


Combined CSV with binary failure label saved at: C:\Users\aligi\OneDrive\Documents\Desktop\CombinedData\combined_turbo_fan_data_with_RUL_and_Failure.csv
