In [1]:
# Import necessary libraries
import pandas as pd
import ast
from datetime import datetime
import numpy as np
import plotly.graph_objects as go
import plotly.io as pio

In [2]:
# Set Plotly renderer to display interactive plots
pio.renderers.default = 'iframe_connected'

In [3]:
# Load metadata CSV file into a DataFrame
metadata_path = "cleaned_dataset/metadata.csv"
metadata_df = pd.read_csv(metadata_path)

In [4]:
# Print available column names in the dataset
print(metadata_df.columns)


In [5]:
# Filter the metadata to extract impedance data
impedance_data = metadata_df[metadata_df['type'] == 'impedance'][['start_time', 'Re', 'Rct', 'battery_id']]

In [6]:
# Extract file details for impedance-related data
file_details = metadata_df[metadata_df['type'] == 'impedance'][['start_time', 'filename', 'battery_id']]


In [7]:
# Function to parse 'start_time' column into proper datetime objects
def convert_start_time(value):
    try:
        if isinstance(value, str):  # Ensure the value is a string
            value = value.strip("[]").replace(",", "")  # Clean the string format
            components = [float(x) for x in value.split()]  # Split and convert to floats
            if len(components) == 6:  # Ensure the time has 6 components
                year, month, day, hour, minute = map(int, components[:5])
                second = int(components[5])
                return datetime(year, month, day, hour, minute, second)
    except (ValueError, SyntaxError, TypeError):  # Handle parsing errors
        return pd.NaT  # Return 'Not a Time' for invalid entries
    return pd.NaT

In [8]:
# Apply the conversion function to start_time in both DataFrames
file_details['start_time'] = file_details['start_time'].apply(convert_start_time)
impedance_data['start_time'] = impedance_data['start_time'].apply(convert_start_time)


In [9]:
# Drop invalid rows and sort the data by start_time
impedance_data = impedance_data.dropna(subset=['start_time']).sort_values(by='start_time')
file_details = file_details.dropna(subset=['start_time']).sort_values(by='start_time')


In [10]:
# Display unique battery IDs available in impedance data
print(impedance_data['battery_id'].unique())

In [11]:
# Plot 'Re' and 'Rct' resistance values for each battery
for battery in impedance_data['battery_id'].unique():
    battery_subset = impedance_data[impedance_data['battery_id'] == battery]  # Filter data for the current battery
    
    # Create a line plot for Re and Rct values
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=battery_subset['start_time'], 
        y=battery_subset['Re'], 
        mode='lines', 
        name='Re (Ohms)',
        line=dict(color='green')
    ))
    fig.add_trace(go.Scatter(
        x=battery_subset['start_time'], 
        y=battery_subset['Rct'], 
        mode='lines', 
        name='Rct (Ohms)',
        line=dict(color='blue')
    ))
    
    # Update layout with title and axis labels
    fig.update_layout(
        title=f"Resistance Over Time (Battery ID: {battery})",
        xaxis_title="Start Time",
        yaxis_title="Resistance (Ohms)",
        xaxis=dict(tickangle=45),
        legend_title="Legend",
        template="plotly"
    )
    fig.show()


In [12]:
# Initialize dictionary to store battery impedance details
battery_impedance_info = {}


In [13]:
# Process each row in the file details DataFrame
for idx, row in file_details.iterrows():
    battery_id = row['battery_id']  # Extract battery ID
    filename = row['filename']      # Extract file name
    start_time = row['start_time']  # Extract start time
    
    # Construct the full file path
    file_path = f"cleaned_dataset/data/{filename}"
    
    try:
        # Load the data file for the current battery
        data = pd.read_csv(file_path)
        
        # Check if 'Battery_impedance' column exists
        if 'Battery_impedance' in data.columns:
            # Extract the real part of impedance values
            impedance_values = data['Battery_impedance'].apply(lambda x: complex(x).real)
            avg_impedance = impedance_values.mean()  # Calculate the average impedance
            
            # Update dictionary with the battery's impedance data
            if battery_id not in battery_impedance_info:
                battery_impedance_info[battery_id] = {'start_time': [], 'battery_impedance': []}
            battery_impedance_info[battery_id]['start_time'].append(start_time)
            battery_impedance_info[battery_id]['battery_impedance'].append(avg_impedance)
        else:
            print(f"Column 'Battery_impedance' not found in file: {filename}")
    except Exception as e:
        # Handle file loading errors
        print(f"Error occurred while processing {filename}: {e}")


In [14]:
# Plot average battery impedance over time for each battery
for battery, impedance_data in battery_impedance_info.items():
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=impedance_data['start_time'],
        y=impedance_data['battery_impedance'],
        mode='lines',
        name=f'Battery {battery}'
    ))
    
    # Update layout with proper titles and labels
    fig.update_layout(
        title=f"Average Impedance Over Time (Battery ID: {battery})",
        xaxis_title="Start Time",
        yaxis_title="Battery Impedance (Ohms)",
        xaxis=dict(tickangle=45),
        template='plotly'
    )
    fig.show()