In [None]:
from scipy import interpolate
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pprint
import os
from pathlib import Path
from matplotlib.colors import LinearSegmentedColormap
class Battery:
    def __init__(self,path='../Dataset_1_NCA_battery/CY25-1_1-#1.csv'):
        self.path = path
        self.df = pd.read_csv(path)
        file_name = os.path.basename(path)
        self.temperature = int(file_name[2:4])
        charge_c_rate_str = file_name.split('-')[1].split('_')[0]
        self.charge_c_rate = float(charge_c_rate_str) / 10
        self.discharge_c_rate = file_name.split('-')[1].split('_')[1]
        self.battery_id = file_name.split('#')[-1].split('.')[0]
        self.cycle_index = self._get_cycle_index()
        self.cycle_life = len(self.cycle_index)

    def _get_cycle_index(self):
        cycle_num = np.unique(self.df['cycle number'].values)
        return cycle_num

    def _check(self,cycle=None,variable=None):

        if cycle is not None:
            if cycle not in self.cycle_index:
                raise ValueError('cycle should be in [{},{}]'.format(int(self.cycle_index.min()),int(self.cycle_index.max())))
        if variable is not None:
            if variable not in self.df.columns:
                raise ValueError('variable should be in {}'.format(list(self.df.columns)))
        return True

    def get_cycle(self,cycle):

        self._check(cycle=cycle)
        cycle_df = self.df[self.df['cycle number']==cycle]
        return cycle_df



    def get_CC_stage(self,cycle,voltage_range=None):

        self._check(cycle=cycle)
        cycle_df = self.get_cycle(cycle)
        CC_df = cycle_df[cycle_df['control/mA']>0]
        

        if voltage_range is not None:
            CC_df = CC_df[CC_df['Ecell/V'].between(voltage_range[0],voltage_range[1])]

        return CC_df

    def extract_voltage_current(self, cycle, control_column='control/V/mA', voltage_column='Ecell/V', current_column='<I>/mA'):

        cycle_data = self.get_cycle(cycle)

        filtered_data = cycle_data[(cycle_data[control_column] == 0) & (cycle_data[voltage_column] > 4)]

        result = filtered_data[voltage_column].tolist()

        return result


    def generate_data(self, voltage_range=[4.0, 4.1], num_points=10, output_path=None):
        data = []
        start_voltage = voltage_range[0]
        end_voltage = voltage_range[1]
        for cycle in range(2, self.cycle_life + 1):
            cycle_data = self.get_cycle(cycle)
            charge_current = self.charge_c_rate
            discharge_current = self.discharge_c_rate
            temperature = self.temperature
            CC_df = self.get_CC_stage(cycle=cycle, voltage_range=voltage_range)
            voltage = CC_df['Ecell/V'].values
            capacity = CC_df['Q charge/mA.h'].values
            if len(voltage) < 2 or len(capacity) < 2:
                continue
            f_voltage = np.linspace(voltage_range[0], voltage_range[1], num_points)
            f_capacity = np.interp(f_voltage, voltage, capacity)
            f_capacity_increment = f_capacity - f_capacity[0]
            discharge_capacity = cycle_data['Q discharge/mA.h'].max()
            relaxation_voltage = self.extract_voltage_current(cycle)
            row = [
                cycle, 
                charge_current,  
                discharge_current,  
                temperature, 
                f_capacity_increment.tolist(),  
                relaxation_voltage, 
                discharge_capacity,  
                start_voltage, 
                end_voltage 
            ]
            data.append(row)
        df = pd.DataFrame(data, columns=[
            'Cycle', 'Charge_Current', 'Discharge_Current', 'Temperature',
            'Capacity_Increment', 'Relaxation_Voltage', 'Discharge_Capacity',
            'Start_Voltage', 'End_Voltage' 
        ])
        to_drop = []
        for i in range(1, len(df)):
            if abs(df.loc[i, 'Discharge_Capacity'] - df.loc[i - 1, 'Discharge_Capacity']) >= 100:
                to_drop.append(i)
        df.drop(to_drop, inplace=True)
        if output_path:
            df.to_csv(output_path, index=False)
        return df

In [None]:
import os
from pathlib import Path

input_folder = 'raw-dataset'
output_folder = 'dataset\UL-NCA'

Path(output_folder).mkdir(parents=True, exist_ok=True)

for file_name in os.listdir(input_folder):
    if file_name.endswith('.csv'):
        input_file_path = os.path.join(input_folder, file_name)
        output_file_path = os.path.join(output_folder, file_name)
        battery = Battery(path=input_file_path)
        df_cleaned = battery.generate_data(output_path=output_file_path)
        
        print(f'Processed file: {file_name} -> Saved to: {output_file_path}')

print("All files processed and saved successfully!")