Load and Explore the Dataset

In [1]:
import pandas as pd

# Step 1: Load the dataset
file_path = "metadata.csv"  # Replace with the path to your CSV file
battery_data = pd.read_csv(file_path)

# Step 2: Display the first few rows
print("First 5 rows of the dataset:")
print(battery_data.head())

# Step 3: Check column names and data types
print("\nColumn names and their data types:")
print(battery_data.dtypes)

# Step 4: Check for missing values
print("\nMissing values in each column:")
print(battery_data.isnull().sum())

# Step 5: Get a summary of numerical columns
print("\nStatistical summary of numerical columns:")
print(battery_data.describe())

# Step 6: Identify unique values in relevant columns
print("\nUnique values for key columns:")
print("Battery IDs:", battery_data['battery_id'].unique())
print("Test IDs:", battery_data['test_id'].unique())
print("Ambient Temperatures:", battery_data['ambient_temperature'].unique())


First 5 rows of the dataset:
        type                                         start_time  \
0  discharge  [2010.       7.      21.      15.       0.    ...   
1  impedance  [2010.       7.      21.      16.      53.    ...   
2     charge  [2010.       7.      21.      17.      25.    ...   
3  impedance                    [2010    7   21   20   31    5]   
4  discharge  [2.0100e+03 7.0000e+00 2.1000e+01 2.1000e+01 2...   

   ambient_temperature battery_id  test_id  uid   filename  \
0                    4      B0047        0    1  00001.csv   
1                   24      B0047        1    2  00002.csv   
2                    4      B0047        2    3  00003.csv   
3                   24      B0047        3    4  00004.csv   
4                    4      B0047        4    5  00005.csv   

             Capacity                   Re                  Rct  
0  1.6743047446975208                  NaN                  NaN  
1                 NaN  0.05605783343888099  0.20097016584458333

data clensing

In [2]:
# Step 1: Drop irrelevant columns (if necessary)
# For example, if 'uid' and 'filename' are not needed:
battery_data = battery_data.drop(columns=['uid', 'filename'])

# Step 2: Handle missing values
# Drop rows with missing values (simplest method)
battery_data = battery_data.dropna()

# Alternatively, fill missing values with a strategy (e.g., mean for numerical data)
# battery_data['Re'] = battery_data['Re'].fillna(battery_data['Re'].mean())
# battery_data['Rct'] = battery_data['Rct'].fillna(battery_data['Rct'].mean())

# Step 3: Confirm that missing values are handled
print("\nMissing values after cleaning:")
print(battery_data.isnull().sum())



Missing values after cleaning:
type                   0.0
start_time             0.0
ambient_temperature    0.0
battery_id             0.0
test_id                0.0
Capacity               0.0
Re                     0.0
Rct                    0.0
dtype: float64


To analyze how Re and Rct evolve over time or cycles, ensure your data is sorted properly by start_time or an equivalent aging metric.

In [3]:
# Step 1: Convert 'start_time' to datetime (if not already in datetime format)
battery_data['start_time'] = pd.to_datetime(battery_data['start_time'])

# Step 2: Sort the data by 'start_time' or 'Capacity' (depending on aging measure)
battery_data = battery_data.sort_values(by=['start_time'])

# Step 3: Reset index after sorting
battery_data = battery_data.reset_index(drop=True)


In [4]:
pip install plotly


Note: you may need to restart the kernel to use updated packages.


In [5]:
import plotly.express as px

fig_re = px.line(
    battery_data, 
    x='Capacity', 
    y='Re', 
    title='Electrolyte Resistance (Re) vs Battery Capacity',
    labels={'Capacity': 'Battery Capacity (Ah)', 'Re': 'Electrolyte Resistance (Ohms)'},
    color='battery_id'  # Optional: Differentiate batteries by ID
)
fig_re.show()


In [6]:
fig_time = px.line(
    battery_data, 
    x='start_time', 
    y=['Re', 'Rct'], 
    title='Re and Rct Over Time',
    labels={'value': 'Resistance (Ohms)', 'start_time': 'Time'},
    color_discrete_map={'Re': 'blue', 'Rct': 'red'}
)
fig_time.show()


In [7]:
battery_data.to_csv("cleaned_battery_data.csv", index=False)
