In [1]:
import random
import scipy.io
import pandas as pd
import numpy as np

import matplotlib.pylab as plt 
from sklearn.preprocessing import MinMaxScaler
from pycaret.regression import RegressionExperiment

In [2]:
def to_df(mat_db):
    """Returns one pd.DataFrame per cycle type"""

    # Features common for every cycle
    cycles_cols = ['type', 'ambient_temperature', 'time']

    # Features monitored during the cycle
    features_cols = {
        'charge': ['Voltage_measured', 'Current_measured', 'Temperature_measured', 
                   'Current_charge', 'Voltage_charge', 'Time'],
        'discharge': ['Voltage_measured', 'Current_measured', 'Temperature_measured', 
                      'Current_charge', 'Voltage_charge', 'Time', 'Capacity'],
        'impedance': ['Sense_current', 'Battery_current', 'Current_ratio',
                      'Battery_impedance', 'Rectified_impedance', 'Re', 'Rct']
    }

    # Define one pd.DataFrame per cycle type
    df = {key: pd.DataFrame() for key in features_cols.keys()}

    # Get every cycle
    cycles = [[row.flat[0] for row in line] for line in mat_db[0][0][0][0]]

    # Get measures for every cycle
    for cycle_id, cycle_data in enumerate(cycles):
        tmp = pd.DataFrame()

        # Data series for every cycle
        features_x_cycle = cycle_data[-1]

        # Get features for the specific cycle type
        features = features_cols[cycle_data[0]]
        
        for feature, data in zip(features, features_x_cycle):
            if len(data[0]) > 1:
                # Correct number of records
                tmp[feature] = data[0]
            else:
                # Single value, so assign it to all rows
                tmp[feature] = data[0][0]
        
        # Add columns common to the cycle measurements
        tmp['id_cycle'] = cycle_id
        for k, col in enumerate(cycles_cols):
            tmp[col] = cycle_data[k]
        
        # Append cycle data to the right pd.DataFrame using pd.concat()
        cycle_type = cycle_data[0]
        df[cycle_type] = pd.concat([df[cycle_type], tmp], ignore_index=True)
    
    return df


def Mat2List(dfs_mat):
    # Example usage
    dfs_B = to_df(dfs_mat)
    df_cycle_charge = dfs_B['charge'] 
    df_cycle_dicharge = dfs_B['discharge'] 

    init_cap = float(df_cycle_dicharge.iloc[0,:]['Capacity'])
    total_result = []   
    X = []
    y = []

    for i in df_cycle_charge['id_cycle'].unique():
        # Filter charge data for the current cycle
        df = df_cycle_charge[df_cycle_charge['id_cycle'] == i]

        # Extract the required columns
        temperature = df['Temperature_measured'].tolist() 
        current = df['Current_measured'].tolist()
        voltage = df['Voltage_measured'].tolist()

        # Find corresponding discharge data
        dis = df_cycle_dicharge[df_cycle_dicharge['id_cycle'] == i + 1]
        
        # Fallback to next cycle if discharge data is empty
        if dis.empty:
            dis = df_cycle_dicharge[df_cycle_dicharge['id_cycle'] == i + 2]    

        # Calculate the label (mean capacity), handle if still empty
        label = dis['Capacity'].mean() /init_cap if not dis.empty else None

        if (label is None) or (label <= 0):
            continue

        else:
            result = [temperature[-1], current[-1],voltage[-1]], float(label)
            total_result.append(result)
            X.append([temperature[-1], current[-1],voltage[-1]])
            y.append(float(label))

    # return total_result
    return X,y

B0005 = scipy.io.loadmat('./DATA/1. BatteryAgingARC-FY08Q4/B0005.mat')
B0006 = scipy.io.loadmat('./DATA/1. BatteryAgingARC-FY08Q4/B0006.mat')
B0007 = scipy.io.loadmat('./DATA/1. BatteryAgingARC-FY08Q4/B0007.mat')
B0018 = scipy.io.loadmat('./DATA/1. BatteryAgingARC-FY08Q4/B0018.mat')

B0005 = B0005['B0005']
B0006 = B0006['B0006']
B0007 = B0007['B0007']
B0018 = B0018['B0018']
# Example usage
dfs_B0005 = to_df(B0005)
dfs_B0006 = to_df(B0006)
dfs_B0007 = to_df(B0007)
dfs_B0018 = to_df(B0018)

# Process data
batt_list = [B0005, B0006, B0007]
df_train = []
X_train = []
Y_trian = []
for batt in batt_list:
    # df_train += Mat2List(batt)
    X, Y= Mat2List(batt)
    X_train += X
    Y_trian += Y

# df_test = Mat2List(B0018)
tiv_df = pd.DataFrame(X_train, columns=['T','I','V'])
label_df = pd.DataFrame(Y_trian, columns=['SOH'])
df_train = pd.concat([tiv_df,label_df], axis= 1)

In [5]:
s = RegressionExperiment()
s.setup(df_train, target = 'SOH', session_id = 123, use_gpu = True) 


  File "c:\Users\ime203\anaconda3\envs\liion\lib\site-packages\joblib\externals\loky\backend\context.py", line 282, in _count_physical_cores
    raise ValueError(f"found {cpu_count_physical} physical cores < 1")


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Using GPU Device: Intel(R) UHD Graphics 770, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 16 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Start training from score 0.500000
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Using GPU Device: Intel(R) UHD Graphics 770, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 16 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Start training from score 0.500000
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] 

Unnamed: 0,Description,Value
0,Session id,123
1,Target,SOH
2,Target type,Regression
3,Original data shape,"(507, 4)"
4,Transformed data shape,"(507, 4)"
5,Transformed train set shape,"(354, 4)"
6,Transformed test set shape,"(153, 4)"
7,Numeric features,3
8,Preprocess,True
9,Imputation type,simple


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Using GPU Device: Intel(R) UHD Graphics 770, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 16 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Start training from score 0.500000
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Using GPU Device: Intel(R) UHD Graphics 770, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 16 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Start training from score 0.500000


<pycaret.regression.oop.RegressionExperiment at 0x2650b49f850>

In [6]:
best = s.compare_models()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
gbr,Gradient Boosting Regressor,0.0715,0.0081,0.0897,0.3247,0.0496,0.0898,0.057
et,Extra Trees Regressor,0.0689,0.0083,0.0905,0.322,0.0503,0.0871,0.085
lightgbm,Light Gradient Boosting Machine,0.0728,0.0086,0.0923,0.3008,0.0512,0.0918,0.838
ada,AdaBoost Regressor,0.0807,0.0089,0.0942,0.2887,0.052,0.1009,0.041
rf,Random Forest Regressor,0.071,0.0087,0.0926,0.2855,0.0513,0.0893,0.102
catboost,CatBoost Regressor,0.0709,0.0086,0.0922,0.2842,0.0509,0.0888,5.807
xgboost,Extreme Gradient Boosting,0.0741,0.0094,0.0962,0.225,0.0532,0.0932,0.198
br,Bayesian Ridge,0.0843,0.0101,0.1001,0.205,0.0553,0.1054,0.008
lr,Linear Regression,0.0842,0.0101,0.1002,0.2035,0.0553,0.1052,0.009
lar,Least Angle Regression,0.0842,0.0101,0.1002,0.2035,0.0553,0.1052,0.008


In [None]:
s.save_model(best, 'tuned_gbr')