In [1]:
import pandas as pd
import numpy as np
import random
import os
import time
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import xgboost as xgb
from sklearn.model_selection import train_test_split

from numpy import hstack, vstack
import itertools
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from itertools import product

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score

from torch.utils.data import DataLoader, TensorDataset

import warnings
warnings.filterwarnings(action='ignore')

In [2]:
def find_directory(foldername, filename = None, back_num = 0):
    cur = os.getcwd()
    for i in range(back_num):
        cur = os.path.abspath(os.path.join(cur, os.pardir))
    for folder in foldername:
        cur = os.path.join(cur, folder)
    if not os.path.exists(cur):
        os.makedirs(cur)
        print(f'{cur} created')
    if filename != None:
        cur = os.path.join(cur, filename)
    return cur

os.getcwd()

'c:\\Users\\Ansh\\Desktop\\MEST\\HMG3\\Rapid_fin\\deep learning modeling'

In [3]:
csv_add = find_directory(foldername = [], filename = 'SOC_Point_Data.csv')
dat = pd.read_csv(csv_add, index_col = (0,1,2,3,4))

In [4]:
def Get_Data(dat) :
    
    RPT_MODE = "0.1C"
    SOC_Range = [9,10,11,12]

    Time_Range = range(6, 15, 2)
    SOC_Range = [str(i) for i in SOC_Range]
    

    Data = dat

    X = Data.loc[RPT_MODE, "0" : "16"]
    Y = Data.loc[RPT_MODE, ["SOH", "Next_SOH", "Ratio_SOH", "Ratio_CYC"]].groupby(level = ["Next", "Path", "Number"]).mean()
    
    y = pd.Series(Y["Next_SOH"] - Y["SOH"], name = "Delta_SOH")
    
    Y = pd.concat([Y, y], axis = 1)

    X_seek = X.loc[X.index.get_level_values("Time").isin(Time_Range), SOC_Range]


    X_std = X_seek.groupby(level = ["Next", "Path", "Number"]).std()
    
    return X_std, Y

In [5]:
def Even_Split(X, y, test_size, rs) :

    X_M = X.xs(key = 'M', level = 'Next', drop_level = False)
    X_D = X.xs(key = 'D', level = 'Next', drop_level = False)
    X_H = X.xs(key = 'H', level = 'Next', drop_level = False)
    
    XX = {"M" : X_M, "D" : X_D, "H" : X_H}
    
    y_M = y.xs(key = 'M', level = 'Next', drop_level = False)
    y_D = y.xs(key = 'D', level = 'Next', drop_level = False)
    y_H = y.xs(key = 'H', level = 'Next', drop_level = False)
    
    yy = {"M" : y_M, "D" : y_D, "H" : y_H}
    
    
    XXX = {"M" : [], "D" : [], "H" : []}
    
    yyy = {"M" : [], "D" : [], "H" : []}
    
    
    for n in ["M", "D", "H"] :
        for path in range(1,5) :
            X_path = XX[n].loc[XX[n].index.get_level_values(level = 'Path').str.len() == path]
            y_path = yy[n].loc[yy[n].index.get_level_values(level = 'Path').str.len() == path]
            
            XXX[n].append(X_path)
            yyy[n].append(y_path)
            
            
    XX_tn = {"M" : [], "D" : [], "H" : []}
    XX_te = {"M" : [], "D" : [], "H" : []}
    
    yy_tn = {"M" : [], "D" : [], "H" : []}
    yy_te = {"M" : [], "D" : [], "H" : []}
        
    for n in ["M", "D", "H"] :
        for path in range(1,5) :
            X_temp = XXX[n][path-1]
            y_temp = yyy[n][path-1]
            
            X_tn, X_te, y_tn, y_te = train_test_split(X_temp, y_temp, test_size = test_size, random_state = rs)
            
            XX_tn[n].append(X_tn)
            XX_te[n].append(X_te)
            yy_tn[n].append(y_tn)
            yy_te[n].append(y_te)
                  
    for n in ["M", "D", "H"] :
        XX_tn[n] = pd.concat(XX_tn[n])
        XX_te[n] = pd.concat(XX_te[n])
        yy_tn[n] = pd.concat(yy_tn[n])
        yy_te[n] = pd.concat(yy_te[n])
        
        
    X_tn = pd.concat(XX_tn.values())
    X_te = pd.concat(XX_te.values())
    
    y_tn = pd.concat(yy_tn.values())
    y_te = pd.concat(yy_te.values())
    
    return X_tn, X_te, y_tn, y_te

In [6]:
def get_next_tensor(index_list):
    next_mapping = {'M': 0, 'D': 1, 'H': 2}
    next_index = [next_mapping[idx[0]] for idx in index_list] 
    next_tensor = torch.tensor(next_index)
    one_hot = torch.nn.functional.one_hot(next_tensor, num_classes=3).float()
    return one_hot

In [7]:
def setRandomSeed(random_seed=0):
    os.environ['PYTHONHASHSEED'] = str(random_seed)
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed(random_seed)
    torch.cuda.manual_seed_all(random_seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(random_seed)
    random.seed(random_seed)

In [8]:
def mape(y_true, y_pred, eps=1e-7):
    y_true = np.asarray(y_true, float)
    y_pred = np.asarray(y_pred, float)
    mask = np.abs(y_true) > eps
    if mask.sum() == 0:
        return np.nan
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

def mape_by_next(y_true, y_pred, next_labels):
    out = {}
    for k in ['M', 'D', 'H']:
        m = (next_labels == k)
        out[k] = mape(y_true[m], y_pred[m])
    return out

In [9]:
random_states = [100, 120, 140, 160, 180]

Max_depth     = [2, 4, 6, 8, 10]
N_estimators  = [1000]
Subsamples    = [0.8, 0.9, 1.0]
Colsample     = [0.9, 1.0]
Learning_rate = [0.001, 0.01, 0.1]

results_df = pd.DataFrame(columns = ['rs', 'depth', 'ne', 'sub', 'col', 'lr', 'Train MAPE', 'Val MAPE', 'Test MAPE', 'Next M MAPE', 'Next D MAPE', 'Next H MAPE'])

X, y = Get_Data(dat)

for rs, depth, ne, sub, col, lr in product(random_states, Max_depth, N_estimators, Subsamples, Colsample, Learning_rate):
    setRandomSeed(rs)

    X_tn, X_te, y_tn, y_te = Even_Split(X, y, 1/3, rs)
    
    X_tr, X_va, y_tr, y_va = Even_Split(X_tn, y_tn, 1/6, rs)

    next_map = {'M': 0, 'D': 1, 'H': 2}
    def get_next(idx):
        labels = np.array([next_map[i] for i in idx.get_level_values("Next")])
        onehot = np.eye(3, dtype=float)[labels]
        return onehot

    std_scaler = StandardScaler()
    X_tr_std   = std_scaler.fit_transform(X_tr)
    X_val_std  = std_scaler.transform(X_va)
    X_te_std   = std_scaler.transform(X_te)

    next_train = get_next(X_tr.index)
    next_val   = get_next(X_va.index)
    next_test  = get_next(X_te.index)
    
    X_train    = np.concatenate([X_tr_std, next_train], axis = 1)
    X_val      = np.concatenate([X_val_std, next_val], axis = 1)
    X_test     = np.concatenate([X_te_std, next_test], axis = 1)
    
    y_train    = y_tr["Next_SOH"].values
    y_val      = y_va["Next_SOH"].values
    y_test     = y_te["Next_SOH"].values

    model = xgb.XGBRegressor(
        max_depth = depth,
        n_estimators = ne,
        learning_rate = lr,
        subsample = sub,
        colsample_bytree = col,
        random_state = rs,
        eval_metric = 'rmse',
        early_stopping_rounds = 50
    )

    _ = model.fit(
        X_train, y_train,
        eval_set = [(X_val, y_val)],
        verbose = False
    )

    y_pred_tr = model.predict(X_train)
    y_pred_va = model.predict(X_val)
    y_pred_te = model.predict(X_test)

    train_mape = mape(y_train, y_pred_tr)
    val_mape   = mape(y_val,   y_pred_va)
    test_mape  = mape(y_test,  y_pred_te)


    next_test_labels = X_te.index.get_level_values("Next").to_numpy()
    by_next = mape_by_next(y_test, y_pred_te, next_test_labels)

    row = [rs, depth, ne, sub, col, lr, train_mape, val_mape, test_mape, by_next['M'], by_next['D'], by_next['H']]
    
    temp_df = pd.DataFrame([row], columns=results_df.columns)
    results_df = pd.concat([results_df, temp_df], ignore_index=True)



In [10]:
summary_df = results_df.groupby(['depth', 'ne', 'sub', 'col', 'lr'])[['Train MAPE', 'Val MAPE', 'Test MAPE', 'Next M MAPE', 'Next D MAPE', 'Next H MAPE']].mean().reset_index()
summary_df
summary_df.to_csv('XGB_next_info_sum.csv')
results_df.to_csv('XGB_next_info.csv')

Unnamed: 0,depth,ne,sub,col,lr,Train MAPE,Val MAPE,Test MAPE,Next M MAPE,Next D MAPE,Next H MAPE
0,2,1000,0.8,0.9,0.001,1.016107,1.092332,1.060781,1.183655,0.984109,1.014577
1,2,1000,0.8,0.9,0.010,0.687852,0.808056,0.820977,0.935189,0.756396,0.771345
2,2,1000,0.8,0.9,0.100,0.640689,0.810043,0.827024,0.940312,0.781551,0.759210
3,2,1000,0.8,1.0,0.001,1.013764,1.091534,1.060064,1.181907,0.983255,1.015031
4,2,1000,0.8,1.0,0.010,0.669771,0.806887,0.817729,0.936964,0.751382,0.764841
...,...,...,...,...,...,...,...,...,...,...,...
85,10,1000,1.0,0.9,0.010,0.186024,0.890970,0.877410,0.983996,0.834794,0.813440
86,10,1000,1.0,0.9,0.100,0.185443,0.904276,0.894729,0.992270,0.863404,0.828514
87,10,1000,1.0,1.0,0.001,0.794794,1.150288,1.074844,1.190681,1.002371,1.031480
88,10,1000,1.0,1.0,0.010,0.220841,0.959940,0.926989,1.039692,0.869580,0.871694
