In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import os

# Preprocess data

In [3]:
import pandas as pd
import numpy as np
from pathlib import Path
import os

# --- PATH CONFIGURATION ---
DATA_DIR = Path('../data')
OUTPUT_FILE = DATA_DIR / 'final_data.csv'

# Check if data directory exists
if not DATA_DIR.exists():
    print(f"ERROR: Directory {DATA_DIR.resolve()} does not exist!")
    
# --- MAIN LOGIC ---
# Check if the output file already exists
if OUTPUT_FILE.exists():
    print(f"File {OUTPUT_FILE} already exists. Skipping data generation.")
    
    # Optional: load the ready file to ensure the 'merged' variable is available
    merged = pd.read_csv(OUTPUT_FILE)
    print("Data loaded from existing file.")

else:
    print(f"File {OUTPUT_FILE} does not exist. Starting processing...")

    # --- 1. LOADING DATA ---
    # Setpoint data (PC -> Inv)
    data = pd.read_csv(DATA_DIR / 'setpoints.csv')
    data.rename(columns={'value': 'setpoint_fr', 'value2':'setpoint_rl', 'value3' : 'setpoint_rr'}, inplace=True)

    # Electrical data
    data2 = pd.read_csv(DATA_DIR / 'fsp_endu_current.csv')
    data2.rename(columns={'value': 'current', 'value2':'voltage'}, inplace=True)

    # Helper function for wheel data
    def load_wheel_data(filename, col_name):
        df = pd.read_csv(DATA_DIR / filename).drop(columns=['topic', 'timestamp'])
        df.rename(columns={'value': col_name}, inplace=True)
        df[col_name] = abs(df[col_name])
        return df

    data_fr = load_wheel_data('front_right_data.csv', 'speed_fr')
    data_rl = load_wheel_data('rear_left_data.csv', 'speed_rl')
    data_rr = load_wheel_data('rear_right_data.csv', 'speed_rr')

    # --- 2. MERGING AND SYNCHRONIZATION ---
    merged_wheels = pd.merge_asof(data_fr.sort_values('elapsed time'), 
                                  data_rl.sort_values('elapsed time'), 
                                  on='elapsed time', direction='nearest', tolerance=0.1)
    merged_wheels = pd.merge_asof(merged_wheels, 
                                  data_rr.sort_values('elapsed time'), 
                                  on='elapsed time', direction='nearest', tolerance=0.1)
    merged_wheels.interpolate(method='linear', inplace=True)

    # Merge everything
    merged = pd.merge_asof(data.sort_values('elapsed time'), 
                           data2.sort_values('elapsed time'), 
                           on='elapsed time', direction='nearest', tolerance=0.1)
    merged = pd.merge_asof(merged, merged_wheels, on='elapsed time', direction='nearest', tolerance=0.1)

    # --- 3. CLEANING AND FEATURE ENGINEERING ---
    merged['power'] = merged['voltage'] * merged['current']
    merged.drop(columns=['elapsed time', 'voltage', 'current'], inplace=True)
    merged.dropna(inplace=True)
    merged = merged[merged['power'] >= 0]

    # --- 4. SAVING RESULTS ---
    merged.to_csv(OUTPUT_FILE, index=False)
    print(f"Success! New file saved to: {OUTPUT_FILE}")

File ../data/final_data.csv does not exist. Starting processing...
Success! New file saved to: ../data/final_data.csv
