In [1]:
#Library
import pandas as pd
from datetime import timedelta
from sklearn.svm import SVR
import matplotlib.pyplot as plt
import joblib
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error

#File That We Will Use
train_names = [ 
        
        '10degC/752_Mixed1', 
        '10degC/752_Mixed2',
        '10degC/756_Mixed3', 
        '10degC/756_Mixed4',
        '10degC/756_Mixed5', 
        '10degC/756_Mixed6',
        '10degC/756_Mixed7',
        '10degC/756_Mixed8',

        '25degC/734_Mixed1', 
        '25degC/734_Mixed2', 
        '25degC/740_Mixed3',
        '25degC/740_Mixed4',
        '25degC/740_Mixed5',
        '25degC/740_Mixed6',
        '25degC/740_Mixed7', 
        '25degC/740_Mixed8', 

        '40degC/710_Mixed1',
        '40degC/710_Mixed2',
        '40degC/722_Mixed3',
        '40degC/722_Mixed4', 
        '40degC/722_Mixed5',
        '40degC/722_Mixed6',
        '40degC/722_Mixed7',
        '40degC/722_Mixed8',
        
        ]

test_names = [

        '10degC/752_LA92'
        '10degC/752_UDDS'

        '25degC/734_LA92'
        '25degC/734_UDDS'
        
        '40degC/710_LA92'
        '40degC/710_UDDS'

        ]

path = 'C:/Kuliah/Skripsi/Dataset/Samsung INR21700 30T/'

In [2]:
# Function to Applied 3 Other Function into the Dataset
def get_discharge_whole_cycle(train_names, test_names, scale_test=False):
        train = _get_data(train_names)
        test = _get_data(test_names)
        train, test = _scale_x(train, test, scale_test=scale_test)        
        return (train, test)

# Function to Choose X and Y
def _get_data(names):
        cycles = []
        for name in names:
            cycle = pd.read_csv(path + name + '.csv', skiprows=30)
            cycle.columns = ['Time Stamp','Step','Status','Prog Time','Step Time','Cycle',
                            'Cycle Level','Procedure','Voltage','Current','Temperature','Capacity','WhAccu','Cnt','Empty']
            cycle = cycle[(cycle["Status"] == "TABLE") | (cycle["Status"] == "DCH")]

            max_discharge = abs(min(cycle["Capacity"]))
            cycle["SoC Capacity"] = max_discharge + cycle["Capacity"]
            cycle["SoC Percentage"] = cycle["SoC Capacity"] / max(cycle["SoC Capacity"])

            cycle['Prog Time'] = cycle['Prog Time'].apply(_time_string_to_seconds)
            cycle['Time in Seconds'] = cycle['Prog Time'] - cycle['Prog Time'][0]
            cycle['Time in Seconds'] = cycle['Time in Seconds'].round()

            cycle_per_second = cycle.groupby('Time in Seconds').agg({
                'Voltage': 'mean',
                'Current': 'mean',
                'Temperature': 'mean',
                'SoC Percentage': 'mean'  
            }).reset_index()

            x = cycle_per_second[["Voltage", "Current", "Temperature"]].to_numpy()
            y = cycle_per_second[["SoC Percentage"]].to_numpy()

            cycles.append((x, y))

        return cycles

# Function to Transform Time Sampling into Seconds
def _time_string_to_seconds(input_string):
    time_parts = input_string.split(':')
    second_parts = time_parts[2].split('.')
    return timedelta(hours=int(time_parts[0]), 
        minutes=int(time_parts[1]), 
        seconds=int(second_parts[0]), 
        microseconds=int(second_parts[1])).total_seconds()

# Function to Normalize Dataset
def _scale_x(train, test, scale_test=False):
    for index_feature in range(len(train[0][0][0])):
        feature_min = min([min(cycle[0][:,index_feature]) for cycle in train])
        feature_max = max([max(cycle[0][:,index_feature]) for cycle in train])
        for i in range(len(train)):
            train[i][0][:,index_feature] = (train[i][0][:,index_feature]-feature_min)/(feature_max-feature_min)
        if scale_test:
            for i in range(len(test)):
                test[i][0][:,index_feature] = (test[i][0][:,index_feature]-feature_min)/(feature_max-feature_min)

    return train, test

In [3]:
# Applying Every Function to Dataset That We Use
cycles = get_discharge_whole_cycle(train_names, test_names, scale_test=True)

In [4]:
# Splitting cycles into train and test sets
train_cycles, test_cycles = cycles
train_x = np.concatenate([x for x, _ in train_cycles])
train_y = np.concatenate([y for _, y in train_cycles]).ravel()  # Convert to 1D array
test_x = np.concatenate([x for x, _ in test_cycles])
test_y = np.concatenate([y for _, y in test_cycles]).ravel()  # Convert to 1D array

In [None]:
#Splitting into Train and Validation
X_train, X_val, y_train, y_val = train_test_split(train_x, train_y, test_size=0.2, random_state=0,shuffle = False)

In [None]:
# Train SVR model
svr_model = SVR(kernel='rbf', C=100, gamma=0.125) 
svr_model.fit(train_x, train_y)

In [None]:
# Save the trained model
joblib.dump(svr_model, 'svm_model8.pkl')

In [None]:
# Predict test_y
predicted_test_y = svr_model.predict(test_x)

# Plotting predictions vs actual data
plt.figure(figsize=(10, 6))
plt.plot(test_y, label='Actual')
plt.plot(predicted_test_y, label='Predicted')
plt.title('SVR RBF Kernel: Predicted vs Actual')
plt.xlabel('Index')
plt.ylabel('SoC Percentage')
plt.legend()
plt.show()

In [None]:
rmse = np.sqrt(mean_squared_error(test_y, predicted_test_y))
print("RMSE:", rmse)

# Calculate MAE
mae = mean_absolute_error(test_y, predicted_test_y)
print("MAE:", mae)