<a href="https://colab.research.google.com/github/nilnida/DSA210-Term-Project/blob/main/data_process.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Processing

In [44]:
from google.colab import drive
drive.mount('/content/drive')

import scipy.io
import numpy as np
import pandas as pd

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Charging, Discharging and Impedance Data over Cycles

This data was extracted from a randomly selected battery in the NASA dataset, originally stored in a MATLAB file accessed from my Drive folder. The data was categorized based on cycle type: "charge", "discharge", and "impedance". For each type, time-series measurements such as voltage, current, temperature, and impedance parameters were collected into Python dictionaries and converted into separate data frames.

In [45]:
file_path1 = "/content/drive/MyDrive/dsa210/battery_dataset/B0005.mat"

mat_data = scipy.io.loadmat(file_path1)
b0005 = mat_data['B0005']
cycle_struct = b0005[0, 0]['cycle']

charge_records_B0005 = []
discharge_records_B0005 = []
impedance_records_B0005 = []

for i in range(cycle_struct.shape[1]):
  cycle = cycle_struct[0, i]
  cycle_type = cycle['type'][0]
  amb_temp = cycle['ambient_temperature'].flatten()[0]
  data = cycle['data']

  if cycle_type == "charge":
    voltage = data['Voltage_measured'][0, 0].flatten()
    current = data['Current_measured'][0, 0].flatten()
    temperature = data['Temperature_measured'][0, 0].flatten()
    current_charge = data['Current_charge'][0, 0].flatten()
    voltage_charge = data['Voltage_charge'][0, 0].flatten()
    time = data['Time'][0, 0].flatten()

    for j in range(len(voltage)):
      charge_records_B0005.append({
        "Cycle index": i+1,
        "Ambient temperature (°C)": amb_temp,
        "Time (secs)": time[j],
        "Battery terminal voltage (Volts)": voltage[j],
        "Battery output current (Amps)": current[j],
        "Battery temperature (°C)": temperature[j],
        "Current measured at charger (Amps)": current_charge[j],
        "Voltage measured at charger (Volts)": voltage_charge[j]
      })

  elif cycle_type == "discharge":
    voltage = data['Voltage_measured'][0, 0].flatten()
    current = data['Current_measured'][0, 0].flatten()
    temperature = data['Temperature_measured'][0, 0].flatten()
    current_load = data['Current_load'][0, 0].flatten()
    voltage_load = data['Voltage_load'][0, 0].flatten()
    time = data['Time'][0, 0].flatten()
    capacity = data['Capacity'][0, 0].flatten()[0]

    for j in range(len(voltage)):
      discharge_records_B0005.append({
        "Cycle index": i+1,
        "Ambient temperature (°C)": amb_temp,
        "Battery terminal voltage (Volts)": voltage[j],
        "Battery output current (Amps)": current[j],
        "Battery temperature (°C)": temperature[j],
        "Current measured at load (Amps)": current_load[j],
        "Voltage measured at load (Volts)": voltage_load[j],
        "Time (secs)": time[j],
        "Battery capacity (Ahr) for discharge till 2.7V": capacity
      })

  elif cycle_type == "impedance":
    sense_current = data['Sense_current'][0, 0].flatten()
    battery_current = data['Battery_current'][0, 0].flatten()
    current_ratio = data['Current_ratio'][0, 0].flatten()
    battery_impedance = data['Battery_impedance'][0, 0].flatten()
    rectified_impedance = data['Rectified_Impedance'][0, 0].flatten()
    Re = data['Re'][0, 0].flatten()[0]
    Rct = data['Rct'][0, 0].flatten()[0]

    for j in range(len(sense_current)):
      impedance_records_B0005.append({
        "Cycle index": i+1,
        "Ambient temperature (°C)": amb_temp,
        "Current in sense branch (Amps)": sense_current[j],
        "Current in battery branch (Amps)": battery_current[j],
        "Ratio of the above currents": current_ratio[j],
        "Battery impedance (Ohms) computed from raw data": battery_impedance[j],
        "Calibrated and smoothed battery impedance (Ohms)": rectified_impedance[j] if j < len(rectified_impedance) else None,
        "Estimated electrolyte resistance (Ohms)": Re,
        "Estimated charge transfer resistance (Ohms)": Rct
      })

df_charge_B0005 = pd.DataFrame(charge_records_B0005)
df_discharge_B0005 = pd.DataFrame(discharge_records_B0005)
df_impedance_B0005 = pd.DataFrame(impedance_records_B0005)

charge_output_file = "/content/drive/MyDrive/dsa210/df_charge_B0005.csv"
discharge_output_file = "/content/drive/MyDrive/dsa210/df_discharge_B0005.csv"
impedance_output_file = "/content/drive/MyDrive/dsa210/df_impedance_B0005.csv"

df_charge_B0005.to_csv(charge_output_file, index=False)
print(f"Data has been saved to {charge_output_file}")

df_discharge_B0005.to_csv(discharge_output_file, index=False)
print(f"Data has been saved to {discharge_output_file}")

df_impedance_B0005.to_csv(impedance_output_file, index=False)
print(f"Data has been saved to {impedance_output_file}")

Data has been saved to /content/drive/MyDrive/dsa210/df_charge_B0005.csv
Data has been saved to /content/drive/MyDrive/dsa210/df_discharge_B0005.csv
Data has been saved to /content/drive/MyDrive/dsa210/df_impedance_B0005.csv


In [46]:
print("Charge data sample:")
display(df_charge_B0005.head(10))
display(df_charge_B0005.tail(10))

Charge data sample:


Unnamed: 0,Cycle index,Ambient temperature (°C),Time (secs),Battery terminal voltage (Volts),Battery output current (Amps),Battery temperature (°C),Current measured at charger (Amps),Voltage measured at charger (Volts)
0,1,24,0.0,3.873017,-0.001201,24.655358,0.0,0.003
1,1,24,2.532,3.479394,-4.030268,24.66648,-4.036,1.57
2,1,24,5.5,4.000588,1.512731,24.675394,1.5,4.726
3,1,24,8.344,4.012395,1.509063,24.693865,1.5,4.742
4,1,24,11.125,4.019708,1.511318,24.705069,1.5,4.753
5,1,24,13.891,4.025409,1.512779,24.71814,1.498,4.758
6,1,24,16.672,4.030636,1.511838,24.731144,1.5,4.764
7,1,24,19.5,4.035349,1.510245,24.74129,1.5,4.769
8,1,24,22.282,4.039716,1.507796,24.759011,1.5,4.775
9,1,24,25.063,4.043541,1.507322,24.766891,1.5,4.775


Unnamed: 0,Cycle index,Ambient temperature (°C),Time (secs),Battery terminal voltage (Volts),Battery output current (Amps),Battery temperature (°C),Current measured at charger (Amps),Voltage measured at charger (Volts)
541163,613,24,10200.812,4.20702,0.035803,25.049182,0.031,4.24
541164,613,24,10203.687,4.207059,0.030939,25.026737,0.031,4.24
541165,613,24,10206.578,4.207501,0.027088,25.0445,0.031,4.24
541166,613,24,10209.406,4.206971,0.019631,25.035351,0.031,4.24
541167,613,24,10212.234,4.200056,-0.003214,25.046022,-0.002,0.003
541168,616,24,0.0,0.236356,-0.003484,23.372048,0.0,0.003
541169,616,24,2.547,0.003365,-0.001496,23.369434,0.0,0.003
541170,616,24,5.5,4.985137,0.000506,23.386535,0.0,5.002
541171,616,24,8.312,4.98472,0.000442,23.386983,-0.002,5.002
541172,616,24,12.656,4.21344,-0.000734,23.385061,-0.002,4.229


In [47]:
print("Discharge data sample:")
display(df_discharge_B0005.head(10))
display(df_discharge_B0005.tail(10))

Discharge data sample:


Unnamed: 0,Cycle index,Ambient temperature (°C),Battery terminal voltage (Volts),Battery output current (Amps),Battery temperature (°C),Current measured at load (Amps),Voltage measured at load (Volts),Time (secs),Battery capacity (Ahr) for discharge till 2.7V
0,2,24,4.191492,-0.004902,24.330034,-0.0006,0.0,0.0,1.856487
1,2,24,4.190749,-0.001478,24.325993,-0.0006,4.206,16.781,1.856487
2,2,24,3.974871,-2.012528,24.389085,-1.9982,3.062,35.703,1.856487
3,2,24,3.951717,-2.013979,24.544752,-1.9982,3.03,53.781,1.856487
4,2,24,3.934352,-2.011144,24.731385,-1.9982,3.011,71.922,1.856487
5,2,24,3.920058,-2.013007,24.909816,-1.9982,2.991,90.094,1.856487
6,2,24,3.907904,-2.0144,25.105884,-1.9982,2.977,108.281,1.856487
7,2,24,3.897036,-2.011603,25.317019,-1.9982,2.967,126.453,1.856487
8,2,24,3.887477,-2.018015,25.509423,-1.9982,2.959,144.641,1.856487
9,2,24,3.878959,-2.013135,25.703603,-1.9982,2.951,162.844,1.856487


Unnamed: 0,Cycle index,Ambient temperature (°C),Battery terminal voltage (Volts),Battery output current (Amps),Battery temperature (°C),Current measured at load (Amps),Voltage measured at load (Volts),Time (secs),Battery capacity (Ahr) for discharge till 2.7V
50275,614,24,3.56335,-0.000948,35.623242,0.0006,0.0,2732.359,1.325079
50276,614,24,3.566589,0.000416,35.479866,0.0006,0.0,2742.093,1.325079
50277,614,24,3.570132,-0.000338,35.345455,0.0006,0.0,2751.843,1.325079
50278,614,24,3.573139,0.001471,35.171253,0.0006,0.0,2761.687,1.325079
50279,614,24,3.576159,0.001138,34.966434,0.0006,0.0,2771.5,1.325079
50280,614,24,3.579262,-0.001569,34.864823,0.0006,0.0,2781.312,1.325079
50281,614,24,3.581964,-0.003067,34.81477,0.0006,0.0,2791.062,1.325079
50282,614,24,3.584484,-0.003079,34.676258,0.0006,0.0,2800.828,1.325079
50283,614,24,3.587336,0.001219,34.56558,0.0006,0.0,2810.64,1.325079
50284,614,24,3.589937,-0.000583,34.40592,0.0006,0.0,2820.39,1.325079


In [48]:
print("Impedance data sample:")
display(df_impedance_B0005.head(10))
display(df_impedance_B0005.tail(10))

Impedance data sample:


Unnamed: 0,Cycle index,Ambient temperature (°C),Current in sense branch (Amps),Current in battery branch (Amps),Ratio of the above currents,Battery impedance (Ohms) computed from raw data,Calibrated and smoothed battery impedance (Ohms),Estimated electrolyte resistance (Ohms),Estimated charge transfer resistance (Ohms)
0,41,24,-1.000000+ 1.000000j,-1.000000+ 1.000000j,1.000000+0.000000j,-0.438926-0.107298j,0.070069-0.000480j,0.044669,0.069456
1,41,24,820.609497- 36.234550j,337.091461- 82.920776j,2.320415+0.463305j,0.130088-0.197115j,0.068179-0.001190j,0.044669,0.069456
2,41,24,827.242188- 48.231228j,330.631561- 70.013718j,2.424193+0.367465j,0.058771+0.033307j,0.067933-0.000057j,0.044669,0.069456
3,41,24,827.193481- 56.195717j,330.808624- 61.734425j,2.447002+0.286778j,0.005814-0.060547j,0.066918-0.000879j,0.044669,0.069456
4,41,24,824.929504- 53.241478j,332.682678- 57.629013j,2.434305+0.261646j,0.126081-0.090444j,0.068071-0.000197j,0.044669,0.069456
5,41,24,824.959167- 62.825409j,333.561005- 49.793083j,2.446781+0.176901j,0.221055-0.004126j,0.066571-0.000584j,0.044669,0.069456
6,41,24,824.055908- 61.669991j,335.273010- 50.344555j,2.430679+0.181051j,0.244874+0.097775j,0.068418-0.000957j,0.044669,0.069456
7,41,24,822.727295- 66.035019j,334.791901- 44.279137j,2.440820+0.125578j,0.235114+0.165348j,0.067724-0.000477j,0.044669,0.069456
8,41,24,822.407410- 65.799271j,338.487885- 44.111298j,2.413987+0.120196j,0.223703+0.194456j,0.065793-0.000645j,0.044669,0.069456
9,41,24,821.634644- 65.822723j,337.377594- 43.389069j,2.420415+0.116181j,0.221354+0.193694j,0.066825-0.000803j,0.044669,0.069456


Unnamed: 0,Cycle index,Ambient temperature (°C),Current in sense branch (Amps),Current in battery branch (Amps),Ratio of the above currents,Battery impedance (Ohms) computed from raw data,Calibrated and smoothed battery impedance (Ohms),Estimated electrolyte resistance (Ohms),Estimated charge transfer resistance (Ohms)
13334,615,24,827.509094- 88.683716j,340.324738+ 31.897240j,2.386143-0.484229j,0.192459-0.019412j,0.050301+0.000447j,0.050036,0.074792
13335,615,24,842.819824- 96.874344j,320.907104+ 44.960667j,2.534325-0.656948j,0.198469-0.024918j,NaN+0.000000j,0.050036,0.074792
13336,615,24,862.067139-112.855103j,300.386597+ 62.504025j,2.675829-0.932482j,0.206349-0.030874j,NaN+0.000000j,0.050036,0.074792
13337,615,24,884.963745-116.482780j,275.235901+ 74.332909j,2.890191-1.203764j,0.216446-0.037173j,NaN+0.000000j,0.050036,0.074792
13338,615,24,902.337280-104.781624j,251.568756+ 84.970802j,3.093266-1.461306j,0.229172-0.043614j,NaN+0.000000j,0.050036,0.074792
13339,615,24,915.489014- 64.045120j,230.149506+ 91.909889j,3.334835-1.610038j,0.245024-0.049836j,NaN+0.000000j,0.050036,0.074792
13340,615,24,916.725525+ 2.986217j,212.188858+107.745811j,3.440393-1.732898j,0.264594-0.055235j,NaN+0.000000j,0.050036,0.074792
13341,615,24,914.619629+126.111481j,176.598038+131.682785j,3.670656-2.022960j,0.288571-0.058837j,NaN+0.000000j,0.050036,0.074792
13342,615,24,880.340820+293.825287j,136.847626+146.881027j,4.060164-2.210749j,0.317700-0.059127j,NaN+0.000000j,0.050036,0.074792
13343,615,24,801.361816+450.083099j,97.058853+150.046997j,4.550338-2.397324j,0.352680-0.053818j,NaN+0.000000j,0.050036,0.074792


## Remaining Useful Life Data over Cycles

The RUL dataset was loaded and cleaned by renaming the "Cycle_Index" column to "Cycle index" for consistency across datasets. The values in this column were also converted from float to integer to ensure proper merging and indexing.

In [49]:
file_path2 = '/content/drive/MyDrive/dsa210/battery_RUL.csv'
df2 = pd.read_csv(file_path2)

df2.rename(columns={"Cycle_Index": "Cycle index"}, inplace=True)
df2["Cycle index"] = df2["Cycle index"].astype(int)

output_path = "/content/drive/MyDrive/dsa210/df_RUL.csv"
df2.to_csv(output_path, index=False)
print(f"Data has been saved to: {output_path}")

display(df2.head(10))
display(df2.tail(10))

Data has been saved to: /content/drive/MyDrive/dsa210/df_RUL.csv


Unnamed: 0,Cycle index,Discharge Time (s),Decrement 3.6-3.4V (s),Max. Voltage Dischar. (V),Min. Voltage Charg. (V),Time at 4.15V (s),Time constant current (s),Charging time (s),RUL
0,1,2595.3,1151.4885,3.67,3.211,5460.001,6755.01,10777.82,1112
1,2,7408.64,1172.5125,4.246,3.22,5508.992,6762.02,10500.35,1111
2,3,7393.76,1112.992,4.249,3.224,5508.993,6762.02,10420.38,1110
3,4,7385.5,1080.320667,4.25,3.225,5502.016,6762.02,10322.81,1109
4,6,65022.75,29813.487,4.29,3.398,5480.992,53213.54,56699.65,1107
5,7,3301.18,1194.235077,3.674,3.504,5023.633636,5977.38,5977.38,1106
6,8,5955.3,1220.135329,4.013,3.501,5017.495,5967.55,5967.55,1105
7,9,5951.2,1220.135329,4.014,3.501,5017.496,5962.21,5962.21,1104
8,10,5945.44,1216.920914,4.014,3.501,5009.993667,5954.91,5954.91,1103
9,11,435251.49,263086.078,4.267,3.086,269.984,443700.02,443700.02,1102


Unnamed: 0,Cycle index,Discharge Time (s),Decrement 3.6-3.4V (s),Max. Voltage Dischar. (V),Min. Voltage Charg. (V),Time at 4.15V (s),Time constant current (s),Charging time (s),RUL
15054,1103,777.44,182.666667,3.775,3.741,929.913,1412.31,6685.56,9
15055,1104,777.12,182.666667,3.774,3.742,929.975,1412.38,6700.5,8
15056,1105,775.69,181.813333,3.773,3.742,926.312,1412.31,6668.31,7
15057,1106,774.44,181.888444,3.774,3.742,926.313,1412.31,6727.25,6
15058,1107,773.0,180.48,3.774,3.742,922.712,1412.31,6659.62,5
15059,1108,770.44,179.52381,3.773,3.742,922.775,1412.38,6678.88,4
15060,1109,771.12,179.52381,3.773,3.744,915.512,1412.31,6670.38,3
15061,1110,769.12,179.357143,3.773,3.742,915.513,1412.31,6637.12,2
15062,1111,773.88,162.374667,3.763,3.839,539.375,1148.0,7660.62,1
15063,1112,677537.27,142740.64,4.206,3.305,49680.004,599830.14,599830.14,0


## Impedance Data over State of Charge of the Battery

In the impedance processing code, the column names were updated for clarity, with labels like "SOC" and "FREQUENCY_ID" renamed to "State of Charge (%)" and "Frequency Index". The complex impedance values were converted from string format to Python complex numbers, and their real, imaginary, and magnitude components were extracted into separate columns. Frequency indices were mapped to their corresponding values in Hertz and added as a new column "Frequency (Hz)". The original "Complex Impedance (Ohms)" column was then removed to avoid redundancy. Finally, the positions of "Battery ID" and "State of Charge (%)" were swapped to improve logical organization, and the cleaned dataset was saved for further use.

In [50]:
file_path3 = '/content/drive/MyDrive/dsa210/impedance.csv'
df3 = pd.read_csv(file_path3)

df3.rename(columns={
    "MEASURE_ID": "Measurement ID",
    "SOC": "State of Charge (%)",
    "BATTERY_ID": "Battery ID",
    "FREQUENCY_ID": "Frequency (Hz)",
    "IMPEDANCE_VALUE": "Complex Impedance (Ohms)"
}, inplace=True)

frequencies_hz = [0.05, 0.1, 0.2, 0.4, 1, 2, 4, 10, 20, 40, 100, 200, 400, 1000]
df3["Frequency (Hz)"] = df3["Frequency (Hz)"].map(dict(enumerate(frequencies_hz)))

df3["Complex Impedance (Ohms)"] = df3["Complex Impedance (Ohms)"].apply(lambda z: complex(z))
df3["Re(Z) (Ohms)"] = df3["Complex Impedance (Ohms)"].apply(lambda z: round(z.real, 6))
df3["Im(Z) (Ohms)"] = df3["Complex Impedance (Ohms)"].apply(lambda z: round(z.imag, 6))
df3["|Z| (Ohms)"] = df3["Complex Impedance (Ohms)"].apply(lambda z: round(abs(z), 6))

df3.drop(columns=["Complex Impedance (Ohms)"], inplace=True)

cols = list(df3.columns)
battery_idx = cols.index("Battery ID")
soc_idx = cols.index("State of Charge (%)")
cols[battery_idx], cols[soc_idx] = cols[soc_idx], cols[battery_idx]
df3 = df3[cols]

output_path = "/content/drive/MyDrive/dsa210/df_complex_impedances.csv"
df3.to_csv(output_path, index=False)
print(f"Data has been saved to {output_path}")

display(df3.head(10))
display(df3.tail(10))

Data has been saved to /content/drive/MyDrive/dsa210/df_complex_impedances.csv


Unnamed: 0,Measurement ID,Battery ID,State of Charge (%),Frequency (Hz),Re(Z) (Ohms),Im(Z) (Ohms),|Z| (Ohms)
0,02_4,2,100,0.05,0.110974,-0.005473,0.111108
1,02_4,2,90,0.05,0.107767,-0.005977,0.107932
2,02_4,2,80,0.05,0.107165,-0.006332,0.107352
3,02_4,2,70,0.05,0.106559,-0.006489,0.106756
4,02_4,2,60,0.05,0.106086,-0.00559,0.106234
5,02_4,2,50,0.05,0.103768,-0.004256,0.103855
6,02_4,2,40,0.05,0.104545,-0.00387,0.104616
7,02_4,2,30,0.05,0.105084,-0.003881,0.105156
8,02_4,2,20,0.05,0.108914,-0.004577,0.10901
9,02_4,2,10,0.05,0.117933,-0.006453,0.11811


Unnamed: 0,Measurement ID,Battery ID,State of Charge (%),Frequency (Hz),Re(Z) (Ohms),Im(Z) (Ohms),|Z| (Ohms)
3350,06_8,6,100,1000.0,0.082788,-0.003166,0.082849
3351,06_8,6,90,1000.0,0.082948,-0.003066,0.083005
3352,06_8,6,80,1000.0,0.082873,-0.002947,0.082925
3353,06_8,6,70,1000.0,0.082766,-0.003054,0.082823
3354,06_8,6,60,1000.0,0.082964,-0.003023,0.083019
3355,06_8,6,50,1000.0,0.083183,-0.003074,0.08324
3356,06_8,6,40,1000.0,0.083685,-0.003092,0.083742
3357,06_8,6,30,1000.0,0.083959,-0.003115,0.084016
3358,06_8,6,20,1000.0,0.084703,-0.003329,0.084768
3359,06_8,6,10,1000.0,0.085719,-0.003444,0.085788


# Merged Battery Charge and RUL Dataset


This code merges the averaged charge cycle data with the corresponding Remaining Useful Life (RUL) values. First, the charge dataset and RUL dataset were grouped by "Cycle index" and averaged to reduce the data to a single representative entry per cycle. These summaries were then merged using a left join on "Cycle index". After merging, relevant columns such as "Battery temperature (°C)", "Voltage measured at charger (Volts)", "Time (secs)", and "RUL" are selected and renamed where necessary to ensure consistency.

In [51]:
df_charge_avg = df_charge_B0005.groupby("Cycle index", as_index=False).mean()
df2_avg = df2.groupby("Cycle index", as_index=False).mean()

df_merged = df_charge_avg.merge(df2_avg, on="Cycle index", how="left")

df_merged.rename(columns={"Battery temperature (°C)_charge": "Battery temperature (°C)"}, inplace=True)
df_merged.rename(columns={"Time (secs)_charge": "Time (secs)"}, inplace=True)

df_merged = df_merged[[
    "Cycle index",
    "Battery temperature (°C)",
    "Voltage measured at charger (Volts)",
    "Time (secs)",
    "RUL"
]]

merged_output_path = "/content/drive/MyDrive/dsa210/df_merged_charge_and_RUL.csv"
df_merged.to_csv(merged_output_path, index=False)
print(f"Data has been saved to {merged_output_path}")

display(df_merged.head(10))
display(df_merged.tail(10))

Data has been saved to /content/drive/MyDrive/dsa210/df_merged_charge_and_RUL.csv


Unnamed: 0,Cycle index,Battery temperature (°C),Voltage measured at charger (Volts),Time (secs),RUL
0,1,25.324079,4.359487,2725.952433,1109.357143
1,3,26.635623,4.430904,3685.339534,1107.357143
2,5,26.778176,4.402619,3665.636787,
3,7,26.703204,4.418979,3636.451544,1103.357143
4,9,26.617004,4.364055,3666.773646,1101.357143
5,11,26.518495,4.326279,3774.646442,1099.357143
6,13,26.620706,4.287875,3776.138971,1097.357143
7,15,26.737919,4.456064,3542.7558,1095.357143
8,17,26.741457,4.41053,3547.597802,1093.357143
9,19,26.70217,4.418719,3553.500567,1091.357143


Unnamed: 0,Cycle index,Battery temperature (°C),Voltage measured at charger (Volts),Time (secs),RUL
160,582,25.474169,4.30313,5040.454734,528.357143
161,586,25.517578,4.324241,5050.898851,524.357143
162,590,25.507755,4.241817,5179.534544,520.357143
163,594,25.501143,4.30548,5119.658392,516.357143
164,598,25.47614,4.429645,4985.484129,512.357143
165,602,25.506487,4.333942,5060.090055,508.357143
166,606,25.517453,4.252485,5195.061774,504.357143
167,610,25.664855,4.423386,4845.798688,500.357143
168,613,25.433647,4.431494,4999.656694,497.357143
169,616,23.380012,2.8478,5.803,494.357143


# Merged Battery Discharge and RUL Dataset

This code integrates the discharge cycle data with the Remaining Useful Life (RUL) values by first aggregating both datasets using the "Cycle index" as the key. The discharge data was averaged to summarize measurements like temperature, terminal voltage, capacity, and discharge duration for each cycle. These summaries were then merged with the averaged RUL data via a left join. After merging, column names were cleaned up for clarity, and only the most relevant fields are retained.

In [52]:
df_discharge_avg = df_discharge_B0005.groupby("Cycle index", as_index=False).mean()
df2_avg = df2.groupby("Cycle index", as_index=False).mean()

df_merged = df_discharge_avg.merge(df2_avg, on="Cycle index", how="left")

df_merged.rename(columns={"Battery temperature (°C)_discharge": "Battery temperature (°C)"}, inplace=True)
df_merged.rename(columns={"Time (secs)_discharge": "Time (secs)"}, inplace=True)

df_merged = df_merged[[
    "Cycle index",
    "Battery temperature (°C)",
    "Battery terminal voltage (Volts)",
    "Battery capacity (Ahr) for discharge till 2.7V",
    "Time (secs)",
    "RUL"
]]

merged_output_path = "/content/drive/MyDrive/dsa210/df_merged_discharge_and_RUL.csv"
df_merged.to_csv(output_path, index=False)
print(f"Data has been saved to {merged_output_path}")

display(df_merged.head(10))
display(df_merged.tail(10))

Data has been saved to /content/drive/MyDrive/dsa210/df_merged_discharge_and_RUL.csv


Unnamed: 0,Cycle index,Battery temperature (°C),Battery terminal voltage (Volts),Battery capacity (Ahr) for discharge till 2.7V,Time (secs),RUL
0,2,32.572328,3.529829,1.856487,1812.087497,1108.357143
1,4,32.725235,3.53732,1.846327,1803.143372,1106.357143
2,6,32.642862,3.543737,1.835349,1793.809379,1104.357143
3,8,32.514876,3.543666,1.835263,1784.359428,1102.357143
4,10,32.382349,3.542343,1.834646,1783.501351,1100.357143
5,12,32.434182,3.541335,1.835662,1793.497759,1098.357143
6,14,32.480416,3.541025,1.835146,1792.842246,1096.357143
7,16,32.410462,3.554133,1.825757,1756.570764,1094.357143
8,18,32.346141,3.552936,1.824774,1745.895568,1092.357143
9,20,32.276798,3.551206,1.824613,1746.023932,1090.357143


Unnamed: 0,Cycle index,Battery temperature (°C),Battery terminal voltage (Volts),Battery capacity (Ahr) for discharge till 2.7V,Time (secs),RUL
158,580,33.174058,3.470763,1.303033,1398.291227,530.357143
159,584,33.253985,3.468111,1.303357,1398.676913,526.357143
160,588,33.319809,3.464863,1.30341,1393.635275,522.357143
161,592,33.337658,3.467864,1.297887,1393.503728,518.357143
162,596,33.290767,3.464021,1.298074,1393.605275,514.357143
163,600,33.275688,3.466462,1.293464,1388.981387,510.357143
164,604,33.320678,3.468509,1.288003,1389.304451,506.357143
165,608,33.37315,3.466806,1.287453,1388.685185,502.357143
166,612,33.713519,3.471071,1.309015,1393.789745,498.357143
167,614,33.865318,3.475472,1.325079,1403.070957,496.357143
