In [3]:
import pandas as pd
import numpy as np

# -------------------------------
# 1. Load cleaned CMAPSS training data
# -------------------------------
df = pd.read_csv('cmapss_cleaned_train_FD001.csv')
print("Cleaned dataset shape:", df.shape)

# -------------------------------
# 2. Feature columns
# -------------------------------
exclude_cols = ['engine_id', 'cycle']
feature_cols = [col for col in df.columns if col not in exclude_cols]

# Ensure temporal order
df = df.sort_values(['engine_id', 'cycle']).reset_index(drop=True)

# -------------------------------
# 3. Generate Rolling Window Sequences
# -------------------------------
def generate_rolling_windows(data, engine_col, features, window_size=30):
    sequences = []
    engine_ids = []
    cycle_ids = []
    
    for engine in data[engine_col].unique():
        engine_data = data[data[engine_col] == engine]
        engine_features = engine_data[features].values
        
        # Generate sequences with rolling window
        for i in range(window_size - 1, len(engine_data)):
            seq = engine_features[i - window_size + 1 : i + 1]
            sequences.append(seq)
            engine_ids.append(engine)
            cycle_ids.append(engine_data.iloc[i]['cycle'])
            
    sequences = np.array(sequences)
    return sequences, engine_ids, cycle_ids

window_size = 30
sequences, engine_ids, cycle_ids = generate_rolling_windows(df, 'engine_id', feature_cols, window_size)

print("Shape of rolling window sequences:", sequences.shape)
print("Example sequence shape:", sequences[0].shape)

# -------------------------------
# 4. Validation
# -------------------------------
assert sequences.shape[1] == window_size, "Sequence window length mismatch"
assert all(cycle_ids[i] > cycle_ids[i-1] or engine_ids[i] != engine_ids[i-1] 
           for i in range(1, len(cycle_ids))), "Cycle order violation"
print("Validation checks passed.")

# -------------------------------
# 5. Save sequences & metadata
# -------------------------------
np.save('rolling_window_sequences.npy', sequences)
pd.DataFrame({'engine_id': engine_ids, 'cycle': cycle_ids}).to_csv('sequence_metadata.csv', index=False)
print("Sequences and metadata saved successfully.")


Cleaned dataset shape: (20631, 26)
Shape of rolling window sequences: (17731, 30, 24)
Example sequence shape: (30, 24)
Validation checks passed.
Sequences and metadata saved successfully.
