In [2]:
import numpy as np
import pandas as pd
import json
import random

In [3]:
# --- Step 1: Load original CSV and parse items ---
df = pd.read_csv("thermal_data.csv")
df['parsed_items'] = df['items'].apply(json.loads)
sensor_df = pd.DataFrame(df['parsed_items'].to_list())

In [4]:
# --- Step 2: Interpolate sensors to 1000 rows ---
sensor_df.index = pd.Index(range(len(sensor_df)))
new_index = pd.Index(np.linspace(0, len(sensor_df) - 1, 1000))
sensor_df_1000 = sensor_df.reindex(new_index).interpolate(method='linear').round(2)
sensor_df_1000 = sensor_df_1000.reset_index(drop=True)


In [5]:
# Estimate std deviation per sensor (column-wise) from the original data
std_per_sensor = sensor_df.std()
# Inject small Gaussian noise scaled to each sensor
noise = np.random.normal(loc=0, scale=std_per_sensor/25, size=sensor_df_1000.shape)
sensor_df_1000_noisy = (sensor_df_1000 + noise).round(2)

In [6]:
# Choose 1%–2% of rows to insert anomalies into (e.g., 10–20 points)
num_anomalies = 50
anomaly_rows = random.sample(range(len(sensor_df_1000_noisy)), num_anomalies)

for row in anomaly_rows:
    # Generate one random spike value per sensor (20–62°C)
    spike_values = np.random.uniform(20, 62, size=sensor_df_1000_noisy.shape[1])
    
    # Inject all values at once into the row
    sensor_df_1000_noisy.iloc[row] = np.round(spike_values, 2)

In [7]:
anomaly_rows

[329,
 371,
 970,
 823,
 189,
 136,
 372,
 545,
 663,
 466,
 338,
 183,
 392,
 616,
 134,
 486,
 910,
 697,
 103,
 728,
 262,
 245,
 759,
 331,
 496,
 480,
 928,
 235,
 859,
 390,
 695,
 82,
 843,
 963,
 206,
 867,
 401,
 866,
 683,
 802,
 829,
 565,
 658,
 636,
 446,
 258,
 544,
 101,
 35,
 459]

In [266]:
# --- Step 3: Create 1000 timestamps in descending order ---
df['packet_time'] = pd.to_datetime(df['packet_time'])
start_time = df['packet_time'].iloc[0]  # latest time
end_time = df['packet_time'].iloc[-1]   # oldest time

In [267]:
# Generate 1000 evenly spaced times between start and end
packet_times = pd.date_range(start=start_time, end=end_time, periods=1000)

In [268]:
# --- Step 4: Rebuild final DataFrame ---
df_expanded = pd.DataFrame({
    'packet_time': packet_times,
    'items': sensor_df_1000_noisy.apply(lambda row: json.dumps(row.to_dict()), axis=1)
})

In [269]:
df_expanded['boot_count'] = 6450
df_expanded['spacecraft'] = "DEFAULT"
df_expanded['target'] = "THERMAL"
df_expanded['packet'] = "THERMAL_HK"


In [274]:
df_expanded.to_csv('1000_thermal_data.csv', index=False)