In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [None]:
# === CONFIGURATION ===
input_csv = "batch_dataset_v1.csv"  # Your original dataset
window_seconds = 2.5  # Length of input buffer (in seconds)
step_seconds = 0.25   # Slide forward every N seconds
timestep_interval = 0.025  # How far apart each original timestep is (s)

# === LOAD DATA ===
df_full = pd.read_csv(input_csv)

# Fill NaNs with 0 (post-apogee padding)
df_full.fillna(0, inplace=True)

# Count initial number of columns
initial_cols = df_full.shape[1]

# Drop all-zero columns
df = df_full.loc[:, (df_full != 0).any(axis=0)]

# Count and report how many were removed
removed_cols = initial_cols - df_full.shape[1]
print(f"Removed {removed_cols} all-zero columns.")

Removed 0 all-zero columns.


In [3]:
# Compute sliding window size and stride
window_size = int(window_seconds / timestep_interval)
stride = int(step_seconds / timestep_interval)

# Define feature groups
features_per_timestep = ["Vertical velocity", "Vertical acceleration", "Total velocity", "Altitude"]
feature_groups = {
    label: sorted([col for col in df_full.columns if col.startswith(label)])
    for label in features_per_timestep
}

# Split entire flights (rows) into train/test
train_df, test_df = train_test_split(df_full, test_size=0.2, random_state=42)

In [4]:
# Sliding window function
def generate_windows(df):
    samples = []
    targets = []
    for _, row in df.iterrows():
        # Create a 2D array: shape (num_features, num_timesteps)
        series = np.vstack([
            row[feature_groups["Vertical velocity"]],
            row[feature_groups["Vertical acceleration"]],
            row[feature_groups["Total velocity"]],
            row[feature_groups["Altitude"]],
        ])
        apogee = row["Apogee altitude (m)"]
        max_start = series.shape[1] - window_size

        for start in range(0, max_start + 1, stride):
            window = series[:, start:start + window_size]
            if window.shape[1] == window_size:
                samples.append(window.flatten())
                targets.append(apogee)

    return pd.DataFrame(samples), pd.Series(targets)


In [5]:
# Generate sliding windows
X_train, y_train = generate_windows(train_df)
X_test, y_test = generate_windows(test_df)

# Combine into labeled datasets
train_set = X_train.copy()
train_set["Apogee"] = y_train
test_set = X_test.copy()
test_set["Apogee"] = y_test

In [6]:
# Save for future use
train_set.to_csv("sliding_train_by_flight.csv", index=False)
test_set.to_csv("sliding_test_by_flight.csv", index=False)