In [27]:
import numpy as np 
import matplotlib.pylab as plt 
import scipy.io
import pandas as pd

def to_df(mat_db):
    """Returns one pd.DataFrame per cycle type"""

    # Features common for every cycle
    cycles_cols = ['type', 'ambient_temperature', 'time']

    # Features monitored during the cycle
    features_cols = {
        'charge': ['Voltage_measured', 'Current_measured', 'Temperature_measured', 
                   'Current_charge', 'Voltage_charge', 'Time'],
        'discharge': ['Voltage_measured', 'Current_measured', 'Temperature_measured', 
                      'Current_charge', 'Voltage_charge', 'Time', 'Capacity'],
        'impedance': ['Sense_current', 'Battery_current', 'Current_ratio',
                      'Battery_impedance', 'Rectified_impedance', 'Re', 'Rct']
    }

    # Define one pd.DataFrame per cycle type
    df = {key: pd.DataFrame() for key in features_cols.keys()}

    # Get every cycle
    cycles = [[row.flat[0] for row in line] for line in mat_db[0][0][0][0]]

    # Get measures for every cycle
    for cycle_id, cycle_data in enumerate(cycles):
        tmp = pd.DataFrame()

        # Data series for every cycle
        features_x_cycle = cycle_data[-1]

        # Get features for the specific cycle type
        features = features_cols[cycle_data[0]]
        
        for feature, data in zip(features, features_x_cycle):
            if len(data[0]) > 1:
                # Correct number of records
                tmp[feature] = data[0]
            else:
                # Single value, so assign it to all rows
                tmp[feature] = data[0][0]
        
        # Add columns common to the cycle measurements
        tmp['id_cycle'] = cycle_id
        for k, col in enumerate(cycles_cols):
            tmp[col] = cycle_data[k]
        
        # Append cycle data to the right pd.DataFrame using pd.concat()
        cycle_type = cycle_data[0]
        df[cycle_type] = pd.concat([df[cycle_type], tmp], ignore_index=True)
    
    return df

B0005 = scipy.io.loadmat('./DATA/1. BatteryAgingARC-FY08Q4/B0005.mat')
B0006 = scipy.io.loadmat('./DATA/1. BatteryAgingARC-FY08Q4/B0006.mat')
B0007 = scipy.io.loadmat('./DATA/1. BatteryAgingARC-FY08Q4/B0007.mat')
B0018 = scipy.io.loadmat('./DATA/1. BatteryAgingARC-FY08Q4/B0018.mat')

B0005 = B0005['B0005']
B0006 = B0006['B0006']
B0007 = B0007['B0007']
B0018 = B0018['B0018']
# Example usage
dfs_B0005 = to_df(B0005)
dfs_B0006 = to_df(B0006)
dfs_B0007 = to_df(B0007)
dfs_B0018 = to_df(B0018)


def Mat2List(dfs_mat):
    # Example usage
    dfs_B0005 = to_df(dfs_mat)

    df_cycle_charge = dfs_B0005['charge'] #['id_cycle']
    df_cycle_dicharge = dfs_B0005['discharge'] #['id_cycle']
    
    total_result = []

    for i in df_cycle_charge['id_cycle'].unique():
        # Filter charge data for the current cycle
        df = df_cycle_charge[df_cycle_charge['id_cycle'] == i]

        # Extract the required columns
        temperature = df['Temperature_measured'].tolist()
        current = df['Current_measured'].tolist()
        voltage = df['Voltage_measured'].tolist()

        # Find corresponding discharge data
        dis = df_cycle_dicharge[df_cycle_dicharge['id_cycle'] == i + 1]
        
        # Fallback to next cycle if discharge data is empty
        if dis.empty:
            dis = df_cycle_dicharge[df_cycle_dicharge['id_cycle'] == i + 2]

        # Calculate the label (mean capacity), handle if still empty
        label = dis['Capacity'].mean() if not dis.empty else None

        # Skip if label is None
        if label is None:
            continue

        # Create the feature-label tuple
        # result = [list(zip(temperature, current, voltage)), label]
        result = [[temperature, current, voltage], label]
        # result = np.array(np.array(zip(temperature, current, voltage)), label)
        total_result.append(result)

    # Check the resulting dataset
    print(f"Total results: {len(total_result)}")

    return total_result


batt_list = [B0005,B0006,B0007]
# batt_list = [B0005,B0006,]
df_train = []
for i in batt_list:
    df_train+=Mat2List(i)

df_test = Mat2List(B0018)

Total results: 169
Total results: 169
Total results: 169
Total results: 133


In [26]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import numpy as np
import pandas as pd


# 데이터셋 정의
class SequenceDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sequence, label = self.data[idx]
        sequence = torch.tensor(sequence, dtype=torch.float32).unsqueeze(0)  # Add channel dimension
        label = torch.tensor(label, dtype=torch.float32)
        return sequence, label


# NaN 값 제거
def clean_data(data):
    return [[seq, label] for seq, label in data if not (np.isnan(seq).any() or np.isnan(label))]


# 패딩을 적용하는 collate_fn
def collate_fn(batch):
    sequences, labels = zip(*batch)
    max_length = max(seq.shape[2] for seq in sequences)  # 최대 길이 계산 (seq_length 위치)
    padded_sequences = [
        torch.nn.functional.pad(seq, (0, max_length - seq.shape[2], 0, 0))  # Feature_dim 위치 고려
        for seq in sequences
    ]
    padded_sequences = torch.stack(padded_sequences)  # (batch_size, channels, seq_length, feature_dim)
    labels = torch.tensor(labels, dtype=torch.float32)
    return padded_sequences, labels


# 모델 정의
class Conv3D2D_LSTM(nn.Module):
    def __init__(self, input_channels, conv3d_out_channels, conv2d_out_channels,
                 lstm_hidden_dim, lstm_num_layers, output_dim, dropout=0.1):
        super(Conv3D2D_LSTM, self).__init__()
        self.conv3d = nn.Sequential(
            nn.Conv3d(input_channels, conv3d_out_channels, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=(1, 2, 2))  # Adjust pooling size to match dimensions
        )
        self.conv2d = nn.Sequential(
            nn.Conv2d(conv3d_out_channels, conv2d_out_channels, kernel_size=(3, 3), padding=(1, 1)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2))
        )
        self.lstm = nn.LSTM(input_size=conv2d_out_channels, hidden_size=lstm_hidden_dim,
                            num_layers=lstm_num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(lstm_hidden_dim, output_dim)

    def forward(self, x):
        # x: (batch_size, channels, seq_length, feature_dim)
        x = x.permute(0, 1, 3, 2)  # (batch_size, channels, feature_dim, seq_length)
        x = x.unsqueeze(2)  # Add depth dimension for Conv3D
        x = self.conv3d(x)  # (batch_size, conv3d_out_channels, 1, reduced_feature_dim, reduced_seq_length)
        x = x.squeeze(2)  # Remove the depth dimension
        x = self.conv2d(x)  # (batch_size, conv2d_out_channels, reduced_height, reduced_width)
        x = x.flatten(start_dim=2).permute(0, 2, 1)  # Prepare for LSTM: (batch_size, seq_length, feature_dim)
        x, _ = self.lstm(x)  # LSTM output: (batch_size, seq_length, lstm_hidden_dim)
        x = self.fc(x[:, -1, :])  # Use the last time step's output
        return x


# 모델 학습 함수
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device):
    model.to(device)
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for sequences, labels in train_loader:
            sequences, labels = sequences.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(sequences)
            loss = criterion(outputs.squeeze(), labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        train_loss /= len(train_loader)

        # Validation phase
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for sequences, labels in val_loader:
                sequences, labels = sequences.to(device), labels.to(device)
                outputs = model(sequences)
                loss = criterion(outputs.squeeze(), labels)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f"Epoch [{epoch + 1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")


# 모델 평가 함수
def evaluate_model(model, data_loader, device):
    model.eval()
    predictions, true_labels = [], []
    with torch.no_grad():
        for sequences, labels in data_loader:
            sequences, labels = sequences.to(device), labels.to(device)
            outputs = model(sequences)
            predictions.extend(outputs.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    predictions, true_labels = np.array(predictions), np.array(true_labels)
    rmse = np.sqrt(np.mean((predictions - true_labels) ** 2))
    mape = np.mean(np.abs((true_labels - predictions) / true_labels)) * 100
    return rmse, mape


# 하이퍼파라미터 설정
input_channels = 1
conv3d_out_channels = 8
conv2d_out_channels = 16
lstm_hidden_dim = 32
lstm_num_layers = 2
output_dim = 1
dropout = 0.1
num_epochs = 20
learning_rate = 1e-3
batch_size = 5

# 장치 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 데이터 로드 및 전처리
train_data = clean_data(df_train)
val_data = clean_data(df_test)

train_dataset = SequenceDataset(train_data)
val_dataset = SequenceDataset(val_data)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

# 모델, 손실 함수 및 최적화기 초기화
model = Conv3D2D_LSTM(input_channels, conv3d_out_channels, conv2d_out_channels,
                      lstm_hidden_dim, lstm_num_layers, output_dim, dropout)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# 모델 학습
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device)

# 모델 평가
rmse, mape = evaluate_model(model, val_loader, device)
print(f"RMSE: {rmse:.4f}, MAPE: {mape:.2f}%")


RuntimeError: Given input size: (16x1925x1). Calculated output size: (16x962x0). Output size is too small

In [24]:
# 학습 및 평가
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device)

# 평가
rmse, mape = evaluate_model(model, val_loader, device)
print(f"RMSE: {rmse:.4f}, MAPE: {mape:.2f}%")


  return torch.tensor(sequence, dtype=torch.float32), torch.tensor(label, dtype=torch.float32)


RuntimeError: Given input size: (8x1x3x3843). Calculated output size: (8x0x1x1921). Output size is too small

In [25]:
for sequences, labels in train_loader:
    print(f"Input shape: {sequences.shape}")
    break


Input shape: torch.Size([5, 1, 3, 3900])


  return torch.tensor(sequence, dtype=torch.float32), torch.tensor(label, dtype=torch.float32)


In [22]:
# 하이퍼파라미터 설정
input_dim = 3  # 각 데이터 포인트의 차원
hidden_dim = 16
num_layers = 2
dropout = 0.1
num_epochs = 100
learning_rate = 1e-3

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 모델 초기화
model = Conv3D2D_LSTM(input_dim, hidden_dim, num_layers, dropout)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 학습
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device)

# 평가
rmse, mape = evaluate_model(model, val_loader, device)
print(f"RMSE: {rmse:.4f}")
print(f"MAPE: {mape:.2f}%")

TypeError: Conv3D2D_LSTM.__init__() missing 2 required positional arguments: 'lstm_num_layers' and 'output_dim'

In [8]:
# 하이퍼파라미터 설정
input_dim = 3  # 각 데이터 포인트의 차원
hidden_dim = 16
num_layers = 8
dropout = 0.1
num_epochs = 200
learning_rate = 1e-3

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 모델 초기화
model = LSTMRegressionModel(input_dim, hidden_dim, num_layers, dropout)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 학습
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device)

# 평가
rmse, mape = evaluate_model(model, val_loader, device)
print(f"RMSE: {rmse:.4f}")
print(f"MAPE: {mape:.2f}%")

  return torch.tensor(sequence, dtype=torch.float32), torch.tensor(label, dtype=torch.float32)
  sequences = [torch.tensor(seq, dtype=torch.float32) for seq in sequences]


Epoch [1/200], Train Loss: 1.1272, Val Loss: 0.0257
Epoch [2/200], Train Loss: 0.0455, Val Loss: 0.0255
Epoch [3/200], Train Loss: 0.0450, Val Loss: 0.0253
Epoch [4/200], Train Loss: 0.0447, Val Loss: 0.0270
Epoch [5/200], Train Loss: 0.0442, Val Loss: 0.0244
Epoch [6/200], Train Loss: 0.0461, Val Loss: 0.0246
Epoch [7/200], Train Loss: 0.0448, Val Loss: 0.0261
Epoch [8/200], Train Loss: 0.0454, Val Loss: 0.0243
Epoch [9/200], Train Loss: 0.0449, Val Loss: 0.0253
Epoch [10/200], Train Loss: 0.0452, Val Loss: 0.0315
Epoch [11/200], Train Loss: 0.0460, Val Loss: 0.0307
Epoch [12/200], Train Loss: 0.0441, Val Loss: 0.0265
Epoch [13/200], Train Loss: 0.0442, Val Loss: 0.0254
Epoch [14/200], Train Loss: 0.0450, Val Loss: 0.0299
Epoch [15/200], Train Loss: 0.0456, Val Loss: 0.0275
Epoch [16/200], Train Loss: 0.0457, Val Loss: 0.0242
Epoch [17/200], Train Loss: 0.0454, Val Loss: 0.0242
Epoch [18/200], Train Loss: 0.0462, Val Loss: 0.0255
Epoch [19/200], Train Loss: 0.0450, Val Loss: 0.0276
Ep