In [None]:
from scipy import interpolate
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pprint
import os
from pathlib import Path
from matplotlib.colors import LinearSegmentedColormap
class Battery:
    def __init__(self,path=r''):
        self.path = path
        self.df = pd.read_csv(path)
        file_name = os.path.basename(path)

        self.cycle_index = self._get_cycle_index()
        self.cycle_life = len(self.cycle_index)


    def _get_cycle_index(self):
        cycle_num = np.unique(self.df['Cycle_Index'].values)
        return cycle_num

    def _check(self,cycle=None,variable=None):

        if cycle is not None:
            if cycle not in self.cycle_index:
                raise ValueError('cycle should be in [{},{}]'.format(int(self.cycle_index.min()),int(self.cycle_index.max())))
        if variable is not None:
            if variable not in self.df.columns:
                raise ValueError('variable should be in {}'.format(list(self.df.columns)))
        return True

    def get_cycle(self,cycle):

        self._check(cycle=cycle)
        cycle_df = self.df[self.df['Cycle_Index']==cycle]
        return cycle_df

    def get_degradation_trajectory(self):

        capacity = []
        for cycle in self.cycle_index:
            cycle_df = self.get_cycle(cycle)
            capacity.append(cycle_df['Discharge_Capacity(Ah)'].max())
        return capacity

    def get_value(self,cycle,variable):

        self._check(cycle=cycle,variable=variable)
        cycle_df = self.get_cycle(cycle)
        return cycle_df[variable].values

    def get_CC_stage(self,cycle,voltage_range=None):
        self._check(cycle=cycle)
        cycle_df = self.get_cycle(cycle)
        CC_df = cycle_df[cycle_df['Current(A)']>0]
        

        if voltage_range is not None:
            CC_df = CC_df[CC_df['Voltage(V)'].between(voltage_range[0],voltage_range[1])]

        return CC_df



    def extract_voltage_current(self, cycle, control_column='Current(A)', voltage_column='Voltage(V)', current_column='<I>/mA'):

        cycle_data = self.get_cycle(cycle)

        filtered_data = cycle_data[(cycle_data[control_column] == 0) & (cycle_data[voltage_column] > 4)]

        result = filtered_data[voltage_column].tolist()

        return result


    def generate_capacity_increment_data(self, voltage_range=[3.8, 4.1], num_points=50, output_path=None):
        data = []
        start_voltage = voltage_range[0]
        end_voltage = voltage_range[1]
        for cycle in range(2, self.cycle_life + 1):
            cycle_data = self.get_cycle(cycle)
            charge_current = 1.2
            discharge_current = 0.5
            temperature = 25
            CC_df = self.get_CC_stage(cycle=cycle, voltage_range=voltage_range)
            voltage = CC_df['Voltage(V)'].values
            capacity = CC_df['Charge_Capacity(Ah)'].values
            if len(voltage) < 2 or len(capacity) < 2:
                continue
            f_voltage = np.linspace(voltage_range[0], voltage_range[1], num_points)
            f_capacity = np.interp(f_voltage, voltage, capacity)
            f_capacity_increment = f_capacity - f_capacity[0]
            discharge_capacity = cycle_data['Charge_Capacity(Ah)'].max()
            relaxation_voltage = self.extract_voltage_current(cycle)
            row = [
                cycle,  
                charge_current,  
                discharge_current, 
                temperature, 
                f_capacity_increment.tolist(),  
                relaxation_voltage, 
                discharge_capacity,
                start_voltage, 
                end_voltage  
            ]
            data.append(row)

        df = pd.DataFrame(data, columns=[
            'Cycle', 'Charge_Current', 'Discharge_Current', 'Temperature',
            'Capacity_Increment', 'Relaxation_Voltage', 'Discharge_Capacity',
            'Start_Voltage', 'End_Voltage'  
        ])


        to_drop = []
        for i in range(1, len(df)):
            if abs(df.loc[i, 'Discharge_Capacity'] - df.loc[i - 1, 'Discharge_Capacity']) >= 100:
                to_drop.append(i)

        df.drop(to_drop, inplace=True)

        if output_path:
            df.to_csv(output_path, index=False)

        return df

    def generate_capacity_increment_data3(self, voltage_range=[3.8, 4.1], num_points=50, output_path=None):

        import re, os
        data = []
        start_voltage, end_voltage = voltage_range

        fname = os.path.basename(self.path)
        m = re.search(r'(\d+)', fname)
        bat_num = int(m.group(1)) if m else None

        skip_map = {
            3:   [221, 226],
            5:   [161, 166, 171],
            10:  [116],
            13:  [261, 266],
            14:  [266, 271, 276, 281],
            15:  [256, 261, 266, 271],
            16:  [266],
            17:  [81, 86],
            18:  [101, 106, 111, 116, 216],
            19:  [76, 81, 86, 236, 241],
            20:  [86, 91, 176, 196, 241, 246],
            21:  [821, 826, 831, 836, 1221, 1511],
            22:  [736, 741, 746, 751, 756, 761, 766, 811, 816, 1086],
            23:  [116, 721, 726, 731, 736, 741, 1106, 1111],
            24:  [946, 951, 956],
            27:  [316],
            29:  [156, 161, 166, 171, 176],
            30:  [216, 221, 226],
            31:  [281],
            32:  [231, 236],
            37:  [36, 41, 46],
            40:  [166, 171],
            41:  [206],
            42:  [196, 201],
            43:  [16, 266],
            44:  [16],
            47:  [56],
            48:  [56, 61],
            51:  [216, 221],
            52:  [216, 221],
            53:  [201],
            54:  [201],
            55:  [136],
            56:  [201],
            59:  [151, 201],
            60:  [241],
            63:  [106, 111],
            64:  [96, 101],
            68:  [276],
            69:  [346, 601],
            70:  [316, 561],
            73:  [106],
            74:  [226, 336, 491, 496],
            77:  [11, 511, 516],
            79:  [56, 61, 66, 446],
            80:  [56, 61, 66, 446],
            83:  [396],
            84:  [41],
            85:  [261],
            86:  [276],
            87:  [146, 151],
            88:  [191],
        }
        skip_set = set(skip_map.get(bat_num, []))

        cycles = np.arange(1, self.cycle_life + 1)
        discharge_capacity_series = np.full(self.cycle_life, np.nan, dtype=float)

        raw_anchor_idx = []
        raw_anchor_vals = []
        raw_anchor_cycles = []

        for i, c in enumerate(cycles):
            if (c == 1) or (c % 5 == 1):
                cycle_data = self.get_cycle(int(c))
                dc_true = cycle_data['Discharge_Capacity(Ah)'].max()
                discharge_capacity_series[i] = dc_true
                raw_anchor_idx.append(i)
                raw_anchor_vals.append(dc_true)
                raw_anchor_cycles.append(int(c))

        if len(raw_anchor_idx) == 0:
            return pd.DataFrame(columns=[
                'Cycle', 'Charge_Current', 'Discharge_Current', 'Temperature',
                'Capacity_Increment', 'Relaxation_Voltage', 'Discharge_Capacity',
                'Start_Voltage', 'End_Voltage'
            ])

        anchor_idx = []
        anchor_vals = []
        for i_idx, v_val, cyc in zip(raw_anchor_idx, raw_anchor_vals, raw_anchor_cycles):
            if cyc in skip_set:
                continue
            anchor_idx.append(i_idx)
            anchor_vals.append(v_val)

        if len(anchor_idx) == 0:

            return pd.DataFrame(columns=[
                'Cycle', 'Charge_Current', 'Discharge_Current', 'Temperature',
                'Capacity_Increment', 'Relaxation_Voltage', 'Discharge_Capacity',
                'Start_Voltage', 'End_Voltage'
            ])
        elif len(anchor_idx) == 1:
            discharge_capacity_series = np.full(self.cycle_life, float(anchor_vals[0]), dtype=float)
        else:
            all_idx = np.arange(self.cycle_life)
            discharge_capacity_series = np.interp(all_idx, np.array(anchor_idx, dtype=int), np.array(anchor_vals, dtype=float))
        for i, cycle in enumerate(cycles):
            cycle_data = self.get_cycle(int(cycle))

            charge_current = 1.2
            discharge_current = 0.5 if (cycle == 1 or cycle % 5 == 1) else 2.4
            temperature = 25

            CC_df = self.get_CC_stage(cycle=int(cycle), voltage_range=voltage_range)
            voltage = CC_df['Voltage(V)'].values
            capacity = CC_df['Charge_Capacity(Ah)'].values
            if len(voltage) < 2 or len(capacity) < 2:
                continue

            f_voltage = np.linspace(voltage_range[0], voltage_range[1], num_points)
            f_capacity = np.interp(f_voltage, voltage, capacity)
            f_capacity_increment = f_capacity - f_capacity[0]

            discharge_capacity = float(discharge_capacity_series[i])

            relaxation_voltage = self.extract_voltage_current(int(cycle))

            data.append([
                int(cycle), charge_current, discharge_current, temperature,
                f_capacity_increment.tolist(), relaxation_voltage, discharge_capacity,
                start_voltage, end_voltage
            ])

        df = pd.DataFrame(data, columns=[
            'Cycle', 'Charge_Current', 'Discharge_Current', 'Temperature',
            'Capacity_Increment', 'Relaxation_Voltage', 'Discharge_Capacity',
            'Start_Voltage', 'End_Voltage'
        ])

        if output_path:
            df.to_csv(output_path, index=False)

        return df


In [None]:
import os
from pathlib import Path

input_folder = r'D:\Gitload\Battery-dataset-preprocessing-code-library\Cell report\Second_life_phase'
output_folder = 'dataset\LSD'

Path(output_folder).mkdir(parents=True, exist_ok=True)

for file_name in os.listdir(input_folder):
    if file_name.endswith('.csv'):

        input_file_path = os.path.join(input_folder, file_name)
        
        output_file_path = os.path.join(output_folder, file_name)
        
        battery = Battery(path=input_file_path)
        df_cleaned = battery.generate_capacity_increment_data3(output_path=output_file_path)
        
        print(f'Processed file: {file_name} -> Saved to: {output_file_path}')

print("All files processed and saved successfully!")