# UCR-01 Data Processing, Revisited
Some data was collected on the UCR-01, and although it is mostly unusable one day seems to have decent data. As the UCR-02 is unlikely to have significant data collected, we revisit the UCR-01 data to see if any conclusions can be garnered.

### Imports

In [55]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

### Opening the data .csvs as a dictionary of dataframes

In [56]:
df_dict = {}
directory = 'Decoded Data Revised'
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        filepath = os.path.join(directory, filename)
        df_dict[filename] = pd.read_csv(filepath)

### Keeping the columns useful for cooling system evaluation and renaming accordingly

In [57]:
keep_cols = {
    'Timestamp': 'time',
    'BamocarCurrent': 'controller_current',
    'ControllerTempRaw': 'controller_temp',
    'FrontLeft': 'fl_wheelspeed',
    'FrontRight': 'fr_wheelspeed',
    'MotorTempRaw': 'motor_temp',
    'PumpsCurrent': 'pumps_current',
    'RearLeft': 'rl_wheelspeed',
    'RearRight': 'rr_wheelspeed',
    'SteeringAngle': 'steering_angle',
    'TotalCurrent': 'total_current',
    'TractiveVoltage': 'total_voltage',
    'east_vel': 'east_vel',
    'gnss_height': 'gnss_height',
    'gnss_lat': 'gnss_lat',
    'gnss_long': 'gnss_long',
    'north_vel': 'north_vel',
    'up_vel': 'up_vel',
}

cleaned_dfs = {}

for name, df in df_dict.items():
    if len(df) > 25000:
        columns_to_keep = [col for col in df.columns if col in keep_cols.keys()]  
        df = df[columns_to_keep]
        df = df.rename(columns=keep_cols)
        cleaned_dfs[name] = df

### Dropping empty rows

In [58]:
ignore_cols = ['time', 'steering_angle']

for df in cleaned_dfs.values():
    data_cols = [c for c in df.columns if c not in ignore_cols]
    df = df[~((df[data_cols].fillna(0) == 0).all(axis=1))]

### Interpolating empty values

In [59]:
for key, df in cleaned_dfs.items():
    
    df['time'] = pd.to_numeric(df['time'], errors='coerce')
    df = df.set_index('time')
    df = df.interpolate(method='values')
    df = df.reset_index()

    #df = df.iloc[5000:].reset_index(drop=True) dont think its needed anymore
    if len(df) > 0: 
        timestamp_offset = df.loc[0, 'time']
        df['time'] = df['time'] - timestamp_offset
    
    cleaned_dfs[key] = df

### Converting thermistor raw values

In [60]:
motor_calibration = pd.read_csv(r'Thermistor Plots\motor_thermistors.csv')
controller_calibration = pd.read_csv(r'Thermistor Plots\bamocar_thermistors.csv')

for df in cleaned_dfs.values():
    if 'motor_temp' in df.columns:
        df['motor_temp'] = np.interp(df['motor_temp'], 
                                     motor_calibration['Value'], 
                                     motor_calibration['Temperature'])
    if 'controller_temp' in df.columns:
        df['controller_temp'] = np.interp(df['controller_temp'], 
                                          controller_calibration['Value'], 
                                          controller_calibration['Temperature'])

### Saving the processed data

In [61]:
with pd.ExcelWriter('10HZ-Revision/decoded_10Hz_data.xlsx') as writer:
    for name, df in cleaned_dfs.items():
        df = df.set_index('time')
        df['timestep'] = (df.index / 0.1).round().astype(int) * 0.1
        df_10hz = df.groupby('timestep').mean().reset_index().rename(columns={'timestep': 'time'})
        
        temp_name = name.split("Logfile")[1].split(".")[0]
        df_10hz.to_excel(writer, sheet_name=temp_name, index=False)