In [2]:
from bounds import bounds
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import ast
from collections import Counter
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Masking, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf

# %% [markdown]
# ### 1. Read Excel Data and Organize It

# %%
file_name = "DataOn2025Jan08.xlsx"
df1 = pd.read_excel(file_name, sheet_name="NES170K07Line2")
df2 = pd.read_excel(file_name, sheet_name="NES170K07Line1")
df = pd.concat([df1, df2], ignore_index=True)
print("Data shape:", df.shape)

# Get t5 thresholds from bounds dictionary
t5_lb = bounds["170K"][0]
t5_ub = bounds["170K"][1]

def safe_literal_eval(value):
    """Safely evaluate a string representation, replacing 'nan' with None."""
    if isinstance(value, str):
        value = value.replace("nan", "None")
    try:
        return ast.literal_eval(value)
    except (ValueError, SyntaxError):
        return None

def organized_data(df, t5_lb, t5_ub):
    """
    Process each row to extract the time-series (MDR), target t5,
    and assign a region label ('low', 'normal', 'high') based on thresholds.
    """
    data = {}
    for index, row in df.iterrows():
        if pd.isna(row['t5']):
            continue
        batch_number = row["batch_number"]
        data[batch_number] = {"MDR": None, "t5": row["t5"], "class": None}

        t_S1 = safe_literal_eval(row["MDRTorqueS1"])
        t_S2 = safe_literal_eval(row["MDRTorqueS2"])
        if t_S1 is not None and t_S2 is not None:
            # Unpack the tuples (time, value)
            t_vals, S1 = zip(*t_S1)
            t_vals, S2 = zip(*t_S2)
            t_vals, S1, S2 = list(t_vals), list(S1), list(S2)
            # Exclude the first element as indicated
            MDR = pd.DataFrame({
                "time": t_vals[1:],
                "S1": S1[1:],
                "S2": S2[1:],
            })
            MDR.interpolate(method="linear", inplace=True, limit_direction="both")
            MDR.fillna(method="bfill", inplace=True)
            MDR.fillna(method="ffill", inplace=True)
        else:
            continue

        data[batch_number]["MDR"] = MDR

        # Assign class label based on t5 thresholds
        if row["t5"] < t5_lb:
            data[batch_number]["class"] = "low"
        elif row["t5"] > t5_ub:
            data[batch_number]["class"] = "high"
        else:
            data[batch_number]["class"] = "normal"

    # Remove batches with empty MDR
    data = {k: v for k, v in data.items() if v["MDR"] is not None and not v["MDR"].empty}
    return data

data = organized_data(df, t5_lb, t5_ub)
print(f"# low: {len({k: v for k, v in data.items() if v['class']=='low'})}")
print(f"# high: {len({k: v for k, v in data.items() if v['class']=='high'})}")
print(f"# normal: {len({k: v for k, v in data.items() if v['class']=='normal'})}")

# %% [markdown]
# ### 2. Prepare Data for Training
# We convert the variable-length sequences to a padded format.
# We also scale the valid (non-padded) parts of the sequences.

# %%
X = []
y = []
# Map the labels to integers: low -> 0, normal -> 1, high -> 2
label_map = {"low": 0, "normal": 1, "high": 2}

for key, item in data.items():
    df_mdr = item["MDR"]
    # Use only the S1 and S2 columns as features
    sequence = df_mdr[["S1", "S2"]].values
    X.append(sequence)
    y.append(label_map[item["class"]])
y = np.array(y)

# Pad sequences (using -10 as pad value, so that Masking will ignore these)
max_len = max(seq.shape[0] for seq in X)
X_padded = pad_sequences(X, maxlen=max_len, dtype='float32', padding='post', truncating='post', value=-10.)

# Scale the data using a global scaler, ignoring the padded values.
all_points = []
for seq in X_padded:
    valid_rows = seq[~np.all(seq == -10., axis=1)]
    all_points.append(valid_rows)
all_points = np.concatenate(all_points, axis=0)

scaler = StandardScaler()
scaler.fit(all_points)

X_scaled = []
for seq in X_padded:
    seq_scaled = seq.copy()
    valid_mask = ~np.all(seq == -10., axis=1)
    if np.sum(valid_mask) > 0:
        seq_scaled[valid_mask] = scaler.transform(seq[valid_mask])
    X_scaled.append(seq_scaled)
X_scaled = np.array(X_scaled)

# %% [markdown]
# ### 3. Split Data into Train and Test Sets
# We'll split the dataset while preserving the class distribution.

# %%
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

# For later reporting, create an inverse label map
inv_label_map = {v: k for k, v in label_map.items()}

# %% [markdown]
# ### 4. Build and Train Individual Binary (One-vs-All) Models
# We build a binary classifier for each class.
# Each model outputs the probability that the input belongs to that class.
# We use binary cross-entropy loss and a sigmoid output.
# You can adjust the architecture, epochs, and other parameters as needed.

# %%
def build_binary_model(input_shape):
    model = Sequential()
    model.add(Masking(mask_value=-10., input_shape=input_shape))
    model.add(LSTM(64))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

binary_models = {}

# Train one binary model per class (one-vs-all)
for cls in range(3):
    print(f"\nTraining binary model for class {cls} ({inv_label_map[cls]})")
    # Create binary labels: 1 if the sample belongs to this class, else 0.
    y_train_bin = (y_train == cls).astype(int)
    
    # Compute class weights for the binary problem
    classes_bin = np.unique(y_train_bin)
    class_weights_bin = compute_class_weight(class_weight='balanced', classes=classes_bin, y=y_train_bin)
    class_weight_dict_bin = {cls_val: weight for cls_val, weight in zip(classes_bin, class_weights_bin)}
    
    model_bin = build_binary_model((max_len, 2))
    model_bin.fit(X_train, y_train_bin, 
                  epochs=20, 
                  batch_size=32, 
                  validation_split=0.1, 
                  class_weight=class_weight_dict_bin,
                  verbose=1)
    binary_models[cls] = model_bin

# %% [markdown]
# ### 5. Combine the Models for Final Decision
# For each test sample, we obtain the probability from each binary model.
# Then we select the class with the highest probability.
# (You could also apply thresholds if desired.)

# %%
# For each model, predict probability on test data.
y_pred_probs = np.zeros((len(X_test), 3))
for cls in range(3):
    y_pred_probs[:, cls] = binary_models[cls].predict(X_test).flatten()

# Final prediction: choose the class with the highest probability.
y_pred_combined = np.argmax(y_pred_probs, axis=1)

# %% [markdown]
# ### 6. Evaluate the Combined Model
# We compute the confusion matrix and report per-class accuracy.

# %%
cm = confusion_matrix(y_test, y_pred_combined)
print("\nConfusion Matrix:")
print(cm)

# Compute accuracy for each individual class
class_accuracies = {}
for i in range(3):
    if cm[i].sum() > 0:
        acc = cm[i, i] / cm[i].sum()
    else:
        acc = 0.0
    class_accuracies[i] = acc
    print(f"Accuracy for class {i} ({inv_label_map[i]}): {acc:.4f}")

avg_class_acc = np.mean(list(class_accuracies.values()))
print(f"\nAverage Classification Accuracy: {avg_class_acc:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred_combined, target_names=["low", "normal", "high"]))


Data shape: (20528, 43)
# low: 365
# high: 677
# normal: 7297

Training binary model for class 0 (low)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Training binary model for class 1 (normal)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Training binary model for class 2 (high)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Confusion Matrix:
[[  8  45  20]
 [ 54 966 440]
 [  3  82  50]]
Accuracy for class 0 (low): 0.1096
Accuracy for class

Data Augmentation

In [3]:
from bounds import bounds
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import ast
from collections import Counter
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Masking, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf

# %% [markdown]
# ### 1. Read Excel Data and Organize It

# %%
file_name = "DataOn2025Jan08.xlsx"
df1 = pd.read_excel(file_name, sheet_name="NES170K07Line2")
df2 = pd.read_excel(file_name, sheet_name="NES170K07Line1")
df = pd.concat([df1, df2], ignore_index=True)
print("Data shape:", df.shape)

# Get t5 thresholds from bounds dictionary
t5_lb = bounds["170K"][0]
t5_ub = bounds["170K"][1]

def safe_literal_eval(value):
    """Safely evaluate a string representation, replacing 'nan' with None."""
    if isinstance(value, str):
        value = value.replace("nan", "None")
    try:
        return ast.literal_eval(value)
    except (ValueError, SyntaxError):
        return None

def organized_data(df, t5_lb, t5_ub):
    """
    Process each row to extract the time-series (MDR), target t5,
    and assign a region label ('low', 'normal', 'high') based on thresholds.
    """
    data = {}
    for index, row in df.iterrows():
        if pd.isna(row['t5']):
            continue
        batch_number = row["batch_number"]
        data[batch_number] = {"MDR": None, "t5": row["t5"], "class": None}

        t_S1 = safe_literal_eval(row["MDRTorqueS1"])
        t_S2 = safe_literal_eval(row["MDRTorqueS2"])
        if t_S1 is not None and t_S2 is not None:
            # Unpack the tuples (time, value)
            t_vals, S1 = zip(*t_S1)
            t_vals, S2 = zip(*t_S2)
            t_vals, S1, S2 = list(t_vals), list(S1), list(S2)
            # Exclude the first element as indicated
            MDR = pd.DataFrame({
                "time": t_vals[1:],
                "S1": S1[1:],
                "S2": S2[1:],
            })
            MDR.interpolate(method="linear", inplace=True, limit_direction="both")
            MDR.fillna(method="bfill", inplace=True)
            MDR.fillna(method="ffill", inplace=True)
        else:
            continue

        data[batch_number]["MDR"] = MDR

        # Assign class label based on t5 thresholds
        if row["t5"] < t5_lb:
            data[batch_number]["class"] = "low"
        elif row["t5"] > t5_ub:
            data[batch_number]["class"] = "high"
        else:
            data[batch_number]["class"] = "normal"

    # Remove batches with empty MDR
    data = {k: v for k, v in data.items() if v["MDR"] is not None and not v["MDR"].empty}
    return data

data = organized_data(df, t5_lb, t5_ub)
print(f"# low: {len({k: v for k, v in data.items() if v['class']=='low'})}")
print(f"# high: {len({k: v for k, v in data.items() if v['class']=='high'})}")
print(f"# normal: {len({k: v for k, v in data.items() if v['class']=='normal'})}")

# %% [markdown]
# ### 2. Prepare Data for Training
# Convert the variable-length time-series sequences into padded arrays.
# We use -10 as the pad value so that the Masking layer can ignore it.
# After padding, we scale the valid (non-padded) values with a global StandardScaler.

# %%
X = []
y = []
# Map labels to integers: low -> 0, normal -> 1, high -> 2
label_map = {"low": 0, "normal": 1, "high": 2}
for key, item in data.items():
    df_mdr = item["MDR"]
    # Use only the S1 and S2 columns as features
    sequence = df_mdr[["S1", "S2"]].values
    X.append(sequence)
    y.append(label_map[item["class"]])
y = np.array(y)

# Pad sequences (padding/truncating at the end)
max_len = max(seq.shape[0] for seq in X)
X_padded = pad_sequences(X, maxlen=max_len, dtype='float32', padding='post', truncating='post', value=-10.)

# Scale the data: first, gather all valid (non-padded) rows
all_points = []
for seq in X_padded:
    valid_rows = seq[~np.all(seq == -10., axis=1)]
    all_points.append(valid_rows)
all_points = np.concatenate(all_points, axis=0)

scaler = StandardScaler()
scaler.fit(all_points)

# Apply the scaler to each sequence (only the valid timesteps)
X_scaled = []
for seq in X_padded:
    seq_scaled = seq.copy()
    valid_mask = ~np.all(seq == -10., axis=1)
    if np.sum(valid_mask) > 0:
        seq_scaled[valid_mask] = scaler.transform(seq[valid_mask])
    X_scaled.append(seq_scaled)
X_scaled = np.array(X_scaled)

# %% [markdown]
# ### 3. Split Data into Train and Test Sets
# We use stratification so that the class imbalance is preserved in the split.

# %%
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)
print("Training set shape:", X_train.shape)
print("Test set shape:", X_test.shape)

# For reporting purposes, create an inverse label map:
inv_label_map = {v: k for k, v in label_map.items()}

# %% [markdown]
# ### 4. Data Augmentation for Minority Classes
# We define a simple jittering function to add Gaussian noise to the valid parts
# of a time-series. Then, we oversample the minority classes (here, "low" and "high")
# to match the number of samples in the majority class (normally "normal").

# %%
def augment_time_series(sequence, noise_level=0.1):
    """
    Add Gaussian noise to valid (non-padded) timesteps in the sequence.
    The noise_level parameter controls the standard deviation of the noise.
    """
    augmented = sequence.copy()
    # Identify valid rows (those not equal to the pad value)
    valid_mask = ~np.all(sequence == -10., axis=1)
    if np.any(valid_mask):
        noise = np.random.normal(loc=0.0, scale=noise_level, size=augmented[valid_mask].shape)
        augmented[valid_mask] += noise
    return augmented

def augment_minority_class(X, y, target_class, target_count, noise_level=0.1):
    """
    For the given target_class, generate augmented samples using jittering
    until the total count for that class reaches target_count.
    """
    indices = np.where(y == target_class)[0]
    current_count = len(indices)
    num_to_augment = target_count - current_count
    augmented_X = []
    augmented_y = []
    for _ in range(num_to_augment):
        # Randomly choose one sample from the existing target_class samples
        idx = np.random.choice(indices)
        sample = X[idx]
        aug_sample = augment_time_series(sample, noise_level)
        augmented_X.append(aug_sample)
        augmented_y.append(target_class)
    return np.array(augmented_X), np.array(augmented_y)

# Check current training class distribution
unique, counts = np.unique(y_train, return_counts=True)
train_counts = dict(zip(unique, counts))
print("Training class distribution before augmentation:", train_counts)

# Define the target count as the count of the majority class
target_count = max(train_counts.values())

# Augment the minority classes (for example, classes 0 and 2)
augmented_X = []
augmented_y = []

for cls in [0, 2]:
    if train_counts[cls] < target_count:
        X_aug, y_aug = augment_minority_class(X_train, y_train, cls, target_count, noise_level=0.1)
        augmented_X.append(X_aug)
        augmented_y.append(y_aug)

if augmented_X:
    X_augmented = np.concatenate(augmented_X, axis=0)
    y_augmented = np.concatenate(augmented_y, axis=0)
    # Combine with the original training set
    X_train_aug = np.concatenate([X_train, X_augmented], axis=0)
    y_train_aug = np.concatenate([y_train, y_augmented], axis=0)
else:
    X_train_aug = X_train
    y_train_aug = y_train

# Shuffle the augmented training set
shuffle_idx = np.random.permutation(len(X_train_aug))
X_train_aug = X_train_aug[shuffle_idx]
y_train_aug = y_train_aug[shuffle_idx]

# Print new training set distribution
unique, counts = np.unique(y_train_aug, return_counts=True)
print("Training class distribution after augmentation:", dict(zip(unique, counts)))

# %% [markdown]
# ### 5. Build and Train the Model
# We use an LSTM-based network with a Masking layer (ignoring the padded value)
# and a Dense output layer with softmax activation. We train on the augmented dataset.

# %%
model = Sequential()
model.add(Masking(mask_value=-10., input_shape=(max_len, 2)))
model.add(LSTM(64))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

history = model.fit(
    X_train_aug, y_train_aug, 
    epochs=20, 
    batch_size=32, 
    validation_split=0.1,
    verbose=1
)

# %% [markdown]
# ### 6. Evaluate the Model
# We calculate the confusion matrix, per-class accuracy, and a full classification report.

# %%
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

# Predict on the test set
y_pred_prob = model.predict(X_test)
y_pred = np.argmax(y_pred_prob, axis=1)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(cm)

# Per-class accuracy
class_accuracies = {}
for i in range(3):
    if cm[i].sum() > 0:
        acc = cm[i, i] / cm[i].sum()
    else:
        acc = 0.0
    class_accuracies[i] = acc
    print(f"Accuracy for class {i} ({inv_label_map[i]}): {acc:.4f}")

avg_class_acc = np.mean(list(class_accuracies.values()))
print(f"\nAverage Classification Accuracy: {avg_class_acc:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=["low", "normal", "high"]))


Data shape: (20528, 43)
# low: 365
# high: 677
# normal: 7297
Training set shape: (6671, 304, 2)
Test set shape: (1668, 304, 2)
Training class distribution before augmentation: {0: 292, 1: 5837, 2: 542}
Training class distribution after augmentation: {0: 5837, 1: 5837, 2: 5837}
Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 masking_4 (Masking)         (None, 304, 2)            0         
                                                                 
 lstm_4 (LSTM)               (None, 64)                17152     
                                                                 
 dropout_4 (Dropout)         (None, 64)                0         
                                                                 
 dense_4 (Dense)             (None, 3)                 195       
                                                                 
Total params: 17,347
Trainable params: 

Focal Loss Function

In [None]:
from bounds import bounds
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import ast
from collections import Counter
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Masking, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf

# %% [markdown]
# ### 1. Define the Custom Focal Loss Function
# This focal loss is designed for sparse (integer) labels. It converts the labels to one-hot,
# applies clipping for numerical stability, computes cross-entropy, and weights it based on the focal loss idea.

def sparse_focal_loss(gamma=2., alpha=0.25):
    """
    Focal Loss for multi-class classification with sparse labels.
    
    Args:
        gamma (float): Focusing parameter for modulating factor (1-p).
        alpha (float): Weighting factor for the rare class.
    
    Returns:
        A loss function that computes the focal loss.
    """
    def loss_fn(y_true, y_pred):
        y_true = tf.cast(y_true, tf.int32)
        # Convert sparse labels to one-hot encoding.
        y_true_one_hot = tf.one_hot(y_true, depth=tf.shape(y_pred)[-1])
        epsilon = 1e-7
        y_pred = tf.clip_by_value(y_pred, epsilon, 1. - epsilon)
        # Compute cross-entropy loss.
        cross_entropy = -y_true_one_hot * tf.math.log(y_pred)
        # Compute the modulating factor.
        weights = alpha * tf.pow(1 - y_pred, gamma)
        loss = weights * cross_entropy
        # Sum the loss over classes, then average over the batch.
        return tf.reduce_mean(tf.reduce_sum(loss, axis=1))
    return loss_fn

# %% [markdown]
# ### 2. Read Excel Data and Organize It
# We read two sheets from the Excel file, concatenate them, and process each row to extract:
# - The multivariate time-series (MDR) with two columns S1 and S2.
# - The t5 scalar value.
# - The class label based on t5 thresholds (low, normal, high).

file_name = "DataOn2025Jan08.xlsx"
df1 = pd.read_excel(file_name, sheet_name="NES170K07Line2")
df2 = pd.read_excel(file_name, sheet_name="NES170K07Line1")
df = pd.concat([df1, df2], ignore_index=True)
print("Data shape:", df.shape)

# Get t5 thresholds from bounds dictionary
t5_lb = bounds["170K"][0]
t5_ub = bounds["170K"][1]

def safe_literal_eval(value):
    """Safely evaluate a string representation, replacing 'nan' with None."""
    if isinstance(value, str):
        value = value.replace("nan", "None")
    try:
        return ast.literal_eval(value)
    except (ValueError, SyntaxError):
        return None

def organized_data(df, t5_lb, t5_ub):
    """
    Process each row to extract the time-series (MDR), target t5,
    and assign a region label ('low', 'normal', 'high') based on thresholds.
    """
    data = {}
    for index, row in df.iterrows():
        if pd.isna(row['t5']):
            continue
        batch_number = row["batch_number"]
        data[batch_number] = {"MDR": None, "t5": row["t5"], "class": None}

        t_S1 = safe_literal_eval(row["MDRTorqueS1"])
        t_S2 = safe_literal_eval(row["MDRTorqueS2"])
        if t_S1 is not None and t_S2 is not None:
            # Unpack tuples (time, value)
            t_vals, S1 = zip(*t_S1)
            t_vals, S2 = zip(*t_S2)
            t_vals, S1, S2 = list(t_vals), list(S1), list(S2)
            # Exclude the first element as indicated
            MDR = pd.DataFrame({
                "time": t_vals[1:],
                "S1": S1[1:],
                "S2": S2[1:],
            })
            MDR.interpolate(method="linear", inplace=True, limit_direction="both")
            MDR.fillna(method="bfill", inplace=True)
            MDR.fillna(method="ffill", inplace=True)
        else:
            continue

        data[batch_number]["MDR"] = MDR

        # Assign class label based on t5 thresholds
        if row["t5"] < t5_lb:
            data[batch_number]["class"] = "low"
        elif row["t5"] > t5_ub:
            data[batch_number]["class"] = "high"
        else:
            data[batch_number]["class"] = "normal"

    # Remove batches with empty MDR
    data = {k: v for k, v in data.items() if v["MDR"] is not None and not v["MDR"].empty}
    return data

data = organized_data(df, t5_lb, t5_ub)
print(f"# low: {len({k: v for k, v in data.items() if v['class']=='low'})}")
print(f"# high: {len({k: v for k, v in data.items() if v['class']=='high'})}")
print(f"# normal: {len({k: v for k, v in data.items() if v['class']=='normal'})}")

# %% [markdown]
# ### 3. Prepare Data for Training
# We build a list of sequences (each sequence is a 2D array with S1 and S2) and a list of labels.
# Because the sequences have varying lengths, we pad them to the same length (using -10 as the pad value).
# Then, we scale the valid (non-padded) parts of the sequences using a global StandardScaler.

X = []
y = []
# Map labels to integers: low -> 0, normal -> 1, high -> 2
label_map = {"low": 0, "normal": 1, "high": 2}

for key, item in data.items():
    df_mdr = item["MDR"]
    sequence = df_mdr[["S1", "S2"]].values
    X.append(sequence)
    y.append(label_map[item["class"]])
y = np.array(y)

# Pad sequences so that all have the same length.
max_len = max(seq.shape[0] for seq in X)
X_padded = pad_sequences(X, maxlen=max_len, dtype='float32', 
                         padding='post', truncating='post', value=-10.)

# Scale valid (non-padded) points.
all_points = []
for seq in X_padded:
    valid_rows = seq[~np.all(seq == -10., axis=1)]
    all_points.append(valid_rows)
all_points = np.concatenate(all_points, axis=0)

scaler = StandardScaler()
scaler.fit(all_points)

X_scaled = []
for seq in X_padded:
    seq_scaled = seq.copy()
    valid_mask = ~np.all(seq == -10., axis=1)
    if np.sum(valid_mask) > 0:
        seq_scaled[valid_mask] = scaler.transform(seq[valid_mask])
    X_scaled.append(seq_scaled)
X_scaled = np.array(X_scaled)

# %% [markdown]
# ### 4. Split Data into Train and Test Sets
# We split the data using stratification to preserve the class imbalance in both sets.

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)
print("Training set shape:", X_train.shape)
print("Test set shape:", X_test.shape)

# Create an inverse label map for reporting.
inv_label_map = {v: k for k, v in label_map.items()}

# %% [markdown]
# ### 5. Build and Train the Model Using Focal Loss
# We build a simple LSTM model with a Masking layer to ignore the padded values.
# The output layer uses softmax activation for the three classes.
# We compile the model with our custom sparse focal loss function.

model = Sequential()
model.add(Masking(mask_value=-10., input_shape=(max_len, 2)))
model.add(LSTM(64))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))

# Use our custom focal loss in the compile step.
model.compile(loss=sparse_focal_loss(gamma=2.0, alpha=0.25), optimizer='adam', metrics=['accuracy'])
model.summary()

history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1, verbose=1)

# %% [markdown]
# ### 6. Evaluate the Model
# We evaluate on the test set, print the confusion matrix, per-class accuracies, and a full classification report.

test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

# Get predictions on the test set.
y_pred_prob = model.predict(X_test)
y_pred = np.argmax(y_pred_prob, axis=1)

# Compute confusion matrix.
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(cm)

# Calculate per-class accuracy.
class_accuracies = {}
for i in range(3):
    if cm[i].sum() > 0:
        acc = cm[i, i] / cm[i].sum()
    else:
        acc = 0.0
    class_accuracies[i] = acc
    print(f"Accuracy for class {i} ({inv_label_map[i]}): {acc:.4f}")

avg_class_acc = np.mean(list(class_accuracies.values()))
print(f"\nAverage Classification Accuracy: {avg_class_acc:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=["low", "normal", "high"]))


Ensemble Models

In [None]:
from bounds import bounds
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import ast
from collections import Counter
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Masking, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
import random

# Set seeds for reproducibility (across Python, NumPy and TensorFlow)
seed_value = 42
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

# %% [markdown]
# ### 1. Read Excel Data and Organize It
# We read the Excel sheets, combine them, and then process each row to extract:
# - The MDR time-series with S1 and S2
# - The t5 scalar value
# - The class label ("low", "normal", "high") based on t5 thresholds

file_name = "DataOn2025Jan08.xlsx"
df1 = pd.read_excel(file_name, sheet_name="NES170K07Line2")
df2 = pd.read_excel(file_name, sheet_name="NES170K07Line1")
df = pd.concat([df1, df2], ignore_index=True)
print("Data shape:", df.shape)

# Get t5 thresholds from bounds dictionary
t5_lb = bounds["170K"][0]
t5_ub = bounds["170K"][1]

def safe_literal_eval(value):
    """Safely evaluate a string representation, replacing 'nan' with None."""
    if isinstance(value, str):
        value = value.replace("nan", "None")
    try:
        return ast.literal_eval(value)
    except (ValueError, SyntaxError):
        return None

def organized_data(df, t5_lb, t5_ub):
    """
    Process each row to extract the time-series (MDR), target t5,
    and assign a region label ('low', 'normal', 'high') based on thresholds.
    """
    data = {}
    for index, row in df.iterrows():
        if pd.isna(row['t5']):
            continue
        batch_number = row["batch_number"]
        data[batch_number] = {"MDR": None, "t5": row["t5"], "class": None}

        t_S1 = safe_literal_eval(row["MDRTorqueS1"])
        t_S2 = safe_literal_eval(row["MDRTorqueS2"])
        if t_S1 is not None and t_S2 is not None:
            # Unpack tuples (time, value)
            t_vals, S1 = zip(*t_S1)
            t_vals, S2 = zip(*t_S2)
            t_vals, S1, S2 = list(t_vals), list(S1), list(S2)
            # Exclude the first element as indicated
            MDR = pd.DataFrame({
                "time": t_vals[1:],
                "S1": S1[1:],
                "S2": S2[1:],
            })
            MDR.interpolate(method="linear", inplace=True, limit_direction="both")
            MDR.fillna(method="bfill", inplace=True)
            MDR.fillna(method="ffill", inplace=True)
        else:
            continue

        data[batch_number]["MDR"] = MDR

        # Assign class label based on t5 thresholds
        if row["t5"] < t5_lb:
            data[batch_number]["class"] = "low"
        elif row["t5"] > t5_ub:
            data[batch_number]["class"] = "high"
        else:
            data[batch_number]["class"] = "normal"

    # Remove batches with empty MDR
    data = {k: v for k, v in data.items() if v["MDR"] is not None and not v["MDR"].empty}
    return data

data = organized_data(df, t5_lb, t5_ub)
print(f"# low: {len({k: v for k, v in data.items() if v['class']=='low'})}")
print(f"# high: {len({k: v for k, v in data.items() if v['class']=='high'})}")
print(f"# normal: {len({k: v for k, v in data.items() if v['class']=='normal'})}")

# %% [markdown]
# ### 2. Prepare Data for Training
# Convert the variable-length sequences into padded arrays.
# We use -10 as the pad value so that the Masking layer ignores it.
# Then we scale only the valid (non-padded) data points using a global StandardScaler.

X = []
y = []
# Map labels to integers: low -> 0, normal -> 1, high -> 2
label_map = {"low": 0, "normal": 1, "high": 2}

for key, item in data.items():
    df_mdr = item["MDR"]
    sequence = df_mdr[["S1", "S2"]].values
    X.append(sequence)
    y.append(label_map[item["class"]])
y = np.array(y)

# Pad sequences
max_len = max(seq.shape[0] for seq in X)
X_padded = pad_sequences(X, maxlen=max_len, dtype='float32',
                         padding='post', truncating='post', value=-10.)

# Scale valid (non-padded) points
all_points = []
for seq in X_padded:
    valid_rows = seq[~np.all(seq == -10., axis=1)]
    all_points.append(valid_rows)
all_points = np.concatenate(all_points, axis=0)

scaler = StandardScaler()
scaler.fit(all_points)

X_scaled = []
for seq in X_padded:
    seq_scaled = seq.copy()
    valid_mask = ~np.all(seq == -10., axis=1)
    if np.sum(valid_mask) > 0:
        seq_scaled[valid_mask] = scaler.transform(seq[valid_mask])
    X_scaled.append(seq_scaled)
X_scaled = np.array(X_scaled)

# %% [markdown]
# ### 3. Split Data into Train and Test Sets
# We use stratification to preserve the class distribution.

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=seed_value, stratify=y
)
print("Training set shape:", X_train.shape)
print("Test set shape:", X_test.shape)

# For reporting, create an inverse label map.
inv_label_map = {v: k for k, v in label_map.items()}

# %% [markdown]
# ### 4. Define the Model Architecture and Ensemble Training
# We define a function to build our LSTM-based model.
# Then we train multiple models (ensemble members) with the same architecture.
# Their predictions will later be combined for a final decision.

def build_model(input_shape):
    model = Sequential()
    model.add(Masking(mask_value=-10., input_shape=input_shape))
    model.add(LSTM(64))
    model.add(Dropout(0.5))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Number of ensemble members
num_ensemble = 5
ensemble_models = []

for i in range(num_ensemble):
    print(f"\nTraining model {i+1}/{num_ensemble}")
    # To ensure some diversity, you could reinitialize seeds or use different hyperparameters.
    tf.random.set_seed(seed_value + i)
    model = build_model((max_len, 2))
    model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1, verbose=1)
    ensemble_models.append(model)

# %% [markdown]
# ### 5. Ensemble Predictions and Evaluation
# For each test sample, we average the predictions from all ensemble members
# and then choose the class with the highest averaged probability.
# We then compute the confusion matrix, per-class accuracies, and classification report.

# Get ensemble predictions: average softmax probabilities over models.
ensemble_probs = np.zeros((len(X_test), 3))
for model in ensemble_models:
    ensemble_probs += model.predict(X_test)
ensemble_probs /= num_ensemble

# Final prediction: choose the class with the highest probability.
y_pred_ensemble = np.argmax(ensemble_probs, axis=1)

# Evaluate the ensemble predictions.
cm = confusion_matrix(y_test, y_pred_ensemble)
print("\nConfusion Matrix:")
print(cm)

class_accuracies = {}
for i in range(3):
    if cm[i].sum() > 0:
        acc = cm[i, i] / cm[i].sum()
    else:
        acc = 0.0
    class_accuracies[i] = acc
    print(f"Accuracy for class {i} ({inv_label_map[i]}): {acc:.4f}")

avg_class_acc = np.mean(list(class_accuracies.values()))
print(f"\nAverage Classification Accuracy: {avg_class_acc:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred_ensemble, target_names=["low", "normal", "high"]))


Stacking Ensemble

In [None]:
from bounds import bounds
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import ast
from collections import Counter
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Masking, LSTM, Dense, Dropout, Input, Concatenate
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
import random

# -------------------------
# Set random seeds for reproducibility
seed_value = 42
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

# -------------------------
# 1. Read Excel Data and Organize It
file_name = "DataOn2025Jan08.xlsx"
df1 = pd.read_excel(file_name, sheet_name="NES170K07Line2")
df2 = pd.read_excel(file_name, sheet_name="NES170K07Line1")
df = pd.concat([df1, df2], ignore_index=True)
print("Data shape:", df.shape)

# Get t5 thresholds from bounds dictionary
t5_lb = bounds["170K"][0]
t5_ub = bounds["170K"][1]

def safe_literal_eval(value):
    """Safely evaluate a string representation, replacing 'nan' with None."""
    if isinstance(value, str):
        value = value.replace("nan", "None")
    try:
        return ast.literal_eval(value)
    except (ValueError, SyntaxError):
        return None

def organized_data(df, t5_lb, t5_ub):
    """
    Process each row to extract the time-series (MDR), target t5,
    and assign a region label ('low', 'normal', 'high') based on thresholds.
    """
    data = {}
    for index, row in df.iterrows():
        if pd.isna(row['t5']):
            continue
        batch_number = row["batch_number"]
        data[batch_number] = {"MDR": None, "t5": row["t5"], "class": None}

        t_S1 = safe_literal_eval(row["MDRTorqueS1"])
        t_S2 = safe_literal_eval(row["MDRTorqueS2"])
        if t_S1 is not None and t_S2 is not None:
            # Unpack tuples (time, value)
            t_vals, S1 = zip(*t_S1)
            t_vals, S2 = zip(*t_S2)
            t_vals, S1, S2 = list(t_vals), list(S1), list(S2)
            # Exclude the first element as indicated
            MDR = pd.DataFrame({
                "time": t_vals[1:],
                "S1": S1[1:],
                "S2": S2[1:],
            })
            MDR.interpolate(method="linear", inplace=True, limit_direction="both")
            MDR.fillna(method="bfill", inplace=True)
            MDR.fillna(method="ffill", inplace=True)
        else:
            continue

        data[batch_number]["MDR"] = MDR

        # Assign class label based on t5 thresholds
        if row["t5"] < t5_lb:
            data[batch_number]["class"] = "low"
        elif row["t5"] > t5_ub:
            data[batch_number]["class"] = "high"
        else:
            data[batch_number]["class"] = "normal"

    # Remove batches with empty MDR
    data = {k: v for k, v in data.items() if v["MDR"] is not None and not v["MDR"].empty}
    return data

data = organized_data(df, t5_lb, t5_ub)
print(f"# low: {len({k: v for k, v in data.items() if v['class']=='low'})}")
print(f"# high: {len({k: v for k, v in data.items() if v['class']=='high'})}")
print(f"# normal: {len({k: v for k, v in data.items() if v['class']=='normal'})}")

# -------------------------
# 2. Prepare Data for Training
# Create sequences and labels. Map labels to integers: low -> 0, normal -> 1, high -> 2.
X = []
y = []
label_map = {"low": 0, "normal": 1, "high": 2}
for key, item in data.items():
    df_mdr = item["MDR"]
    sequence = df_mdr[["S1", "S2"]].values
    X.append(sequence)
    y.append(label_map[item["class"]])
y = np.array(y)

# Pad sequences with a pad value of -10 (so the Masking layer can ignore them)
max_len = max(seq.shape[0] for seq in X)
X_padded = pad_sequences(X, maxlen=max_len, dtype='float32', 
                         padding='post', truncating='post', value=-10.)

# Scale valid (non-padded) data using StandardScaler
all_points = []
for seq in X_padded:
    valid_rows = seq[~np.all(seq == -10., axis=1)]
    all_points.append(valid_rows)
all_points = np.concatenate(all_points, axis=0)

scaler = StandardScaler()
scaler.fit(all_points)

X_scaled = []
for seq in X_padded:
    seq_scaled = seq.copy()
    valid_mask = ~np.all(seq == -10., axis=1)
    if np.sum(valid_mask) > 0:
        seq_scaled[valid_mask] = scaler.transform(seq[valid_mask])
    X_scaled.append(seq_scaled)
X_scaled = np.array(X_scaled)

# -------------------------
# 3. Split Data into Train and Test Sets
X_train_full, X_test, y_train_full, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=seed_value, stratify=y
)
print("Full training set shape:", X_train_full.shape)
print("Test set shape:", X_test.shape)

# Further split training set into base training and stacking sets (for meta model)
X_train_base, X_train_stack, y_train_base, y_train_stack = train_test_split(
    X_train_full, y_train_full, test_size=0.2, random_state=seed_value, stratify=y_train_full
)
print("Base training set shape:", X_train_base.shape)
print("Stacking set shape:", X_train_stack.shape)

# Inverse label map for reporting
inv_label_map = {v: k for k, v in label_map.items()}

# -------------------------
# 4. Define Base Model Architecture
def build_base_model(input_shape):
    model = Sequential()
    model.add(Masking(mask_value=-10., input_shape=input_shape))
    model.add(LSTM(64))
    model.add(Dropout(0.5))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# -------------------------
# 5. Train Base Models
num_base_models = 3  # You can increase the number for more diversity
base_models = []
for i in range(num_base_models):
    print(f"\nTraining base model {i+1}/{num_base_models}")
    # Optionally vary the seed for diversity among base models
    tf.random.set_seed(seed_value + i)
    model = build_base_model((max_len, 2))
    model.fit(X_train_base, y_train_base, epochs=20, batch_size=32, validation_split=0.1, verbose=1)
    base_models.append(model)

# -------------------------
# 6. Generate Meta-Features for the Stacking Set
# For each base model, predict probabilities on the stacking set and then concatenate them.
meta_features_train = []
for model in base_models:
    preds = model.predict(X_train_stack)  # shape: (num_stack_samples, 3)
    meta_features_train.append(preds)
# Concatenate along the feature axis
meta_X_train = np.concatenate(meta_features_train, axis=1)  # shape: (num_stack_samples, 3*num_base_models)
meta_y_train = y_train_stack

print("Meta training features shape:", meta_X_train.shape)

# -------------------------
# 7. Train Meta Model
# Here we use a simple MLP as the meta learner.
def build_meta_model(input_shape):
    model = Sequential()
    model.add(Dense(32, activation='relu', input_shape=(input_shape,)))
    model.add(Dropout(0.3))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

meta_model = build_meta_model(meta_X_train.shape[1])
meta_model.fit(meta_X_train, meta_y_train, epochs=20, batch_size=16, validation_split=0.1, verbose=1)

# -------------------------
# 8. Evaluate the Stacking Ensemble on the Test Set
# First, generate meta features for the test set by obtaining predictions from each base model.
meta_features_test = []
for model in base_models:
    preds = model.predict(X_test)
    meta_features_test.append(preds)
meta_X_test = np.concatenate(meta_features_test, axis=1)

# Use the meta model to get final predictions.
y_pred_meta = meta_model.predict(meta_X_test)
y_pred_final = np.argmax(y_pred_meta, axis=1)

# Evaluate the final predictions
cm = confusion_matrix(y_test, y_pred_final)
print("\nConfusion Matrix:")
print(cm)

# Per-class accuracies
class_accuracies = {}
for i in range(3):
    if cm[i].sum() > 0:
        acc = cm[i, i] / cm[i].sum()
    else:
        acc = 0.0
    class_accuracies[i] = acc
    print(f"Accuracy for class {i} ({inv_label_map[i]}): {acc:.4f}")

avg_class_acc = np.mean(list(class_accuracies.values()))
print(f"\nAverage Classification Accuracy: {avg_class_acc:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred_final, target_names=["low", "normal", "high"]))


Attention Mechanism

In [None]:
from bounds import bounds
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import ast
from collections import Counter
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Masking, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences

# -------------------------
# 1. Define a Custom Attention Layer
class AttentionLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)
    
    def build(self, input_shape):
        # input_shape: (batch_size, time_steps, hidden_size)
        self.W = self.add_weight(name="att_weight",
                                 shape=(input_shape[-1], input_shape[-1]),
                                 initializer="glorot_uniform",
                                 trainable=True)
        self.b = self.add_weight(name="att_bias",
                                 shape=(input_shape[-1],),
                                 initializer="zeros",
                                 trainable=True)
        self.u = self.add_weight(name="att_u",
                                 shape=(input_shape[-1], 1),
                                 initializer="glorot_uniform",
                                 trainable=True)
        super(AttentionLayer, self).build(input_shape)
    
    def call(self, inputs):
        # Compute u_t = tanh(W.h_t + b) for each timestep
        uit = tf.tanh(tf.tensordot(inputs, self.W, axes=1) + self.b)  # (batch, time_steps, hidden_size)
        # Compute scores for each timestep
        ait = tf.tensordot(uit, self.u, axes=1)  # (batch, time_steps, 1)
        ait = tf.squeeze(ait, -1)  # (batch, time_steps)
        a = tf.nn.softmax(ait, axis=1)  # (batch, time_steps)
        a = tf.expand_dims(a, -1)       # (batch, time_steps, 1)
        # Compute the weighted sum of the inputs
        output = tf.reduce_sum(inputs * a, axis=1)  # (batch, hidden_size)
        return output

# -------------------------
# 2. Read Excel Data and Organize It
file_name = "DataOn2025Jan08.xlsx"
df1 = pd.read_excel(file_name, sheet_name="NES170K07Line2")
df2 = pd.read_excel(file_name, sheet_name="NES170K07Line1")
df = pd.concat([df1, df2], ignore_index=True)
print("Data shape:", df.shape)

# Get t5 thresholds from bounds dictionary
t5_lb = bounds["170K"][0]
t5_ub = bounds["170K"][1]

def safe_literal_eval(value):
    """Safely evaluate a string representation, replacing 'nan' with None."""
    if isinstance(value, str):
        value = value.replace("nan", "None")
    try:
        return ast.literal_eval(value)
    except (ValueError, SyntaxError):
        return None

def organized_data(df, t5_lb, t5_ub):
    """
    Process each row to extract the time-series (MDR), t5 value, and assign a label:
    'low' if t5 < t5_lb, 'high' if t5 > t5_ub, else 'normal'.
    """
    data = {}
    for index, row in df.iterrows():
        if pd.isna(row['t5']):
            continue
        batch_number = row["batch_number"]
        data[batch_number] = {"MDR": None, "t5": row["t5"], "class": None}
        
        t_S1 = safe_literal_eval(row["MDRTorqueS1"])
        t_S2 = safe_literal_eval(row["MDRTorqueS2"])
        if t_S1 is not None and t_S2 is not None:
            # Unpack tuples (time, value)
            t_vals, S1 = zip(*t_S1)
            t_vals, S2 = zip(*t_S2)
            t_vals, S1, S2 = list(t_vals), list(S1), list(S2)
            # Exclude the first element as indicated
            MDR = pd.DataFrame({
                "time": t_vals[1:],
                "S1": S1[1:],
                "S2": S2[1:],
            })
            MDR.interpolate(method="linear", inplace=True, limit_direction="both")
            MDR.fillna(method="bfill", inplace=True)
            MDR.fillna(method="ffill", inplace=True)
        else:
            continue
        
        data[batch_number]["MDR"] = MDR
        
        # Assign class label based on t5 thresholds
        if row["t5"] < t5_lb:
            data[batch_number]["class"] = "low"
        elif row["t5"] > t5_ub:
            data[batch_number]["class"] = "high"
        else:
            data[batch_number]["class"] = "normal"
    
    # Remove batches with empty MDR
    data = {k: v for k, v in data.items() if v["MDR"] is not None and not v["MDR"].empty}
    return data

data = organized_data(df, t5_lb, t5_ub)
print(f"# low: {len({k: v for k, v in data.items() if v['class']=='low'})}")
print(f"# high: {len({k: v for k, v in data.items() if v['class']=='high'})}")
print(f"# normal: {len({k: v for k, v in data.items() if v['class']=='normal'})}")

# -------------------------
# 3. Prepare Data for Training
# Build sequences (each with features S1 and S2) and corresponding labels.
X = []
y = []
# Map labels to integers: low -> 0, normal -> 1, high -> 2
label_map = {"low": 0, "normal": 1, "high": 2}
for key, item in data.items():
    df_mdr = item["MDR"]
    sequence = df_mdr[["S1", "S2"]].values
    X.append(sequence)
    y.append(label_map[item["class"]])
y = np.array(y)

# Pad sequences with a pad value of -10 (so that the Masking layer ignores them)
max_len = max(seq.shape[0] for seq in X)
X_padded = pad_sequences(X, maxlen=max_len, dtype='float32', 
                         padding='post', truncating='post', value=-10.)

# Scale valid (non-padded) points using StandardScaler
all_points = []
for seq in X_padded:
    valid_rows = seq[~np.all(seq == -10., axis=1)]
    all_points.append(valid_rows)
all_points = np.concatenate(all_points, axis=0)

scaler = StandardScaler()
scaler.fit(all_points)

X_scaled = []
for seq in X_padded:
    seq_scaled = seq.copy()
    valid_mask = ~np.all(seq == -10., axis=1)
    if np.sum(valid_mask) > 0:
        seq_scaled[valid_mask] = scaler.transform(seq[valid_mask])
    X_scaled.append(seq_scaled)
X_scaled = np.array(X_scaled)

# -------------------------
# 4. Split Data into Train and Test Sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)
print("Training set shape:", X_train.shape)
print("Test set shape:", X_test.shape)

# For reporting purposes, create an inverse label map.
inv_label_map = {v: k for k, v in label_map.items()}

# -------------------------
# 5. Build the Model with Attention Mechanism
model = Sequential()
model.add(Masking(mask_value=-10., input_shape=(max_len, 2)))
# Use return_sequences=True so that attention can operate over the timesteps.
model.add(LSTM(64, return_sequences=True))
# Apply the custom attention layer
model.add(AttentionLayer())
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# -------------------------
# 6. Train the Model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1, verbose=1)

# -------------------------
# 7. Evaluate the Model
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

# Predict on the test set
y_pred_prob = model.predict(X_test)
y_pred = np.argmax(y_pred_prob, axis=1)

# Compute confusion matrix and classification report
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(cm)

class_accuracies = {}
for i in range(3):
    if cm[i].sum() > 0:
        acc = cm[i, i] / cm[i].sum()
    else:
        acc = 0.0
    class_accuracies[i] = acc
    print(f"Accuracy for class {i} ({inv_label_map[i]}): {acc:.4f}")

avg_class_acc = np.mean(list(class_accuracies.values()))
print(f"\nAverage Classification Accuracy: {avg_class_acc:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=["low", "normal", "high"]))


CNN-LSTM model

In [None]:
from bounds import bounds
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import ast
from collections import Counter
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Masking, Conv1D, MaxPooling1D, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences

# -------------------------
# 1. Read Excel Data and Organize It

file_name = "DataOn2025Jan08.xlsx"
df1 = pd.read_excel(file_name, sheet_name="NES170K07Line2")
df2 = pd.read_excel(file_name, sheet_name="NES170K07Line1")
df = pd.concat([df1, df2], ignore_index=True)
print("Data shape:", df.shape)

# Get t5 thresholds from the bounds dictionary
t5_lb = bounds["170K"][0]
t5_ub = bounds["170K"][1]

def safe_literal_eval(value):
    """Safely evaluate a string representation, replacing 'nan' with None."""
    if isinstance(value, str):
        value = value.replace("nan", "None")
    try:
        return ast.literal_eval(value)
    except (ValueError, SyntaxError):
        return None

def organized_data(df, t5_lb, t5_ub):
    """
    Process each row to extract the time-series (MDR), the t5 value, and assign a label:
    'low' if t5 < t5_lb, 'high' if t5 > t5_ub, else 'normal'.
    """
    data = {}
    for index, row in df.iterrows():
        if pd.isna(row['t5']):
            continue
        batch_number = row["batch_number"]
        data[batch_number] = {"MDR": None, "t5": row["t5"], "class": None}
        
        t_S1 = safe_literal_eval(row["MDRTorqueS1"])
        t_S2 = safe_literal_eval(row["MDRTorqueS2"])
        if t_S1 is not None and t_S2 is not None:
            # Unpack the tuples (time, value)
            t_vals, S1 = zip(*t_S1)
            t_vals, S2 = zip(*t_S2)
            t_vals, S1, S2 = list(t_vals), list(S1), list(S2)
            # Exclude the first element as indicated
            MDR = pd.DataFrame({
                "time": t_vals[1:],
                "S1": S1[1:],
                "S2": S2[1:],
            })
            MDR.interpolate(method="linear", inplace=True, limit_direction="both")
            MDR.fillna(method="bfill", inplace=True)
            MDR.fillna(method="ffill", inplace=True)
        else:
            continue
        
        data[batch_number]["MDR"] = MDR
        
        # Assign class label based on t5 thresholds
        if row["t5"] < t5_lb:
            data[batch_number]["class"] = "low"
        elif row["t5"] > t5_ub:
            data[batch_number]["class"] = "high"
        else:
            data[batch_number]["class"] = "normal"
    
    # Remove batches with empty MDR
    data = {k: v for k, v in data.items() if v["MDR"] is not None and not v["MDR"].empty}
    return data

data = organized_data(df, t5_lb, t5_ub)
print(f"# low: {len({k: v for k, v in data.items() if v['class']=='low'})}")
print(f"# high: {len({k: v for k, v in data.items() if v['class']=='high'})}")
print(f"# normal: {len({k: v for k, v in data.items() if v['class']=='normal'})}")

# -------------------------
# 2. Prepare Data for Training

X = []
y = []
# Map labels to integers: low -> 0, normal -> 1, high -> 2
label_map = {"low": 0, "normal": 1, "high": 2}

for key, item in data.items():
    df_mdr = item["MDR"]
    sequence = df_mdr[["S1", "S2"]].values
    X.append(sequence)
    y.append(label_map[item["class"]])
y = np.array(y)

# Pad sequences to have the same length using a pad value of -10 (for the Masking layer)
max_len = max(seq.shape[0] for seq in X)
X_padded = pad_sequences(X, maxlen=max_len, dtype='float32', 
                         padding='post', truncating='post', value=-10.)

# Scale valid (non-padded) points using a global StandardScaler
all_points = []
for seq in X_padded:
    valid_rows = seq[~np.all(seq == -10., axis=1)]
    all_points.append(valid_rows)
all_points = np.concatenate(all_points, axis=0)

scaler = StandardScaler()
scaler.fit(all_points)

X_scaled = []
for seq in X_padded:
    seq_scaled = seq.copy()
    valid_mask = ~np.all(seq == -10., axis=1)
    if np.sum(valid_mask) > 0:
        seq_scaled[valid_mask] = scaler.transform(seq[valid_mask])
    X_scaled.append(seq_scaled)
X_scaled = np.array(X_scaled)

# -------------------------
# 3. Split Data into Train and Test Sets

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)
print("Training set shape:", X_train.shape)
print("Test set shape:", X_test.shape)

# For reporting purposes, create an inverse label map.
inv_label_map = {v: k for k, v in label_map.items()}

# -------------------------
# 4. Build the CNN-LSTM Model

model = Sequential()
model.add(Masking(mask_value=-10., input_shape=(max_len, 2)))

# Convolutional block
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPooling1D(pool_size=2))

# Optionally, add another Conv1D block
model.add(Conv1D(filters=32, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPooling1D(pool_size=2))

# LSTM block to capture temporal dependencies
model.add(LSTM(64))
model.add(Dropout(0.5))

# Final classification layer
model.add(Dense(3, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# -------------------------
# 5. Train the Model

history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1, verbose=1)

# -------------------------
# 6. Evaluate the Model

test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

# Make predictions on the test set
y_pred_prob = model.predict(X_test)
y_pred = np.argmax(y_pred_prob, axis=1)

# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(cm)

# Compute per-class accuracy
class_accuracies = {}
for i in range(3):
    if cm[i].sum() > 0:
        acc = cm[i, i] / cm[i].sum()
    else:
        acc = 0.0
    class_accuracies[i] = acc
    print(f"Accuracy for class {i} ({inv_label_map[i]}): {acc:.4f}")

avg_class_acc = np.mean(list(class_accuracies.values()))
print(f"\nAverage Classification Accuracy: {avg_class_acc:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=["low", "normal", "high"]))


Bidirectional LSTMs

In [None]:
from bounds import bounds
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import ast
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Masking, Bidirectional, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences

# -------------------------
# 1. Read Excel Data and Organize It

file_name = "DataOn2025Jan08.xlsx"
df1 = pd.read_excel(file_name, sheet_name="NES170K07Line2")
df2 = pd.read_excel(file_name, sheet_name="NES170K07Line1")
df = pd.concat([df1, df2], ignore_index=True)
print("Data shape:", df.shape)

# Get t5 thresholds from bounds dictionary
t5_lb = bounds["170K"][0]
t5_ub = bounds["170K"][1]

def safe_literal_eval(value):
    """Safely evaluate a string representation, replacing 'nan' with None."""
    if isinstance(value, str):
        value = value.replace("nan", "None")
    try:
        return ast.literal_eval(value)
    except (ValueError, SyntaxError):
        return None

def organized_data(df, t5_lb, t5_ub):
    """
    Process each row to extract:
    - The multivariate time-series (MDR) with columns S1 and S2.
    - The t5 value.
    - A class label ('low', 'normal', or 'high') based on thresholds.
    """
    data = {}
    for index, row in df.iterrows():
        if pd.isna(row['t5']):
            continue
        batch_number = row["batch_number"]
        data[batch_number] = {"MDR": None, "t5": row["t5"], "class": None}
        
        t_S1 = safe_literal_eval(row["MDRTorqueS1"])
        t_S2 = safe_literal_eval(row["MDRTorqueS2"])
        if t_S1 is not None and t_S2 is not None:
            # Unpack tuples (time, value)
            t_vals, S1 = zip(*t_S1)
            t_vals, S2 = zip(*t_S2)
            t_vals, S1, S2 = list(t_vals), list(S1), list(S2)
            # Exclude the first element as indicated
            MDR = pd.DataFrame({
                "time": t_vals[1:],
                "S1": S1[1:],
                "S2": S2[1:],
            })
            MDR.interpolate(method="linear", inplace=True, limit_direction="both")
            MDR.fillna(method="bfill", inplace=True)
            MDR.fillna(method="ffill", inplace=True)
        else:
            continue
        
        data[batch_number]["MDR"] = MDR
        
        # Assign class label based on t5 thresholds
        if row["t5"] < t5_lb:
            data[batch_number]["class"] = "low"
        elif row["t5"] > t5_ub:
            data[batch_number]["class"] = "high"
        else:
            data[batch_number]["class"] = "normal"
    
    # Remove batches with empty or invalid MDR data
    data = {k: v for k, v in data.items() if v["MDR"] is not None and not v["MDR"].empty}
    return data

data = organized_data(df, t5_lb, t5_ub)
print(f"# low: {len({k: v for k, v in data.items() if v['class']=='low'})}")
print(f"# high: {len({k: v for k, v in data.items() if v['class']=='high'})}")
print(f"# normal: {len({k: v for k, v in data.items() if v['class']=='normal'})}")

# -------------------------
# 2. Prepare Data for Training

X = []
y = []
# Map labels to integers: low -> 0, normal -> 1, high -> 2
label_map = {"low": 0, "normal": 1, "high": 2}

for key, item in data.items():
    df_mdr = item["MDR"]
    # Use only the S1 and S2 columns as features
    sequence = df_mdr[["S1", "S2"]].values
    X.append(sequence)
    y.append(label_map[item["class"]])
y = np.array(y)

# Pad sequences (using -10 as the pad value so that Masking can ignore these)
max_len = max(seq.shape[0] for seq in X)
X_padded = pad_sequences(X, maxlen=max_len, dtype='float32', 
                         padding='post', truncating='post', value=-10.)

# Scale only valid (non-padded) points using a global StandardScaler
all_points = []
for seq in X_padded:
    valid_rows = seq[~np.all(seq == -10., axis=1)]
    all_points.append(valid_rows)
all_points = np.concatenate(all_points, axis=0)

scaler = StandardScaler()
scaler.fit(all_points)

X_scaled = []
for seq in X_padded:
    seq_scaled = seq.copy()
    valid_mask = ~np.all(seq == -10., axis=1)
    if np.sum(valid_mask) > 0:
        seq_scaled[valid_mask] = scaler.transform(seq[valid_mask])
    X_scaled.append(seq_scaled)
X_scaled = np.array(X_scaled)

# -------------------------
# 3. Split Data into Train and Test Sets

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)
print("Training set shape:", X_train.shape)
print("Test set shape:", X_test.shape)

# For reporting, create an inverse label map.
inv_label_map = {v: k for k, v in label_map.items()}

# -------------------------
# 4. Build the Bidirectional LSTM Model

model = Sequential()
model.add(Masking(mask_value=-10., input_shape=(max_len, 2)))
# Use a Bidirectional LSTM to capture both forward and backward dependencies
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# -------------------------
# 5. Train the Model

history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1, verbose=1)

# -------------------------
# 6. Evaluate the Model

test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

# Generate predictions on the test set
y_pred_prob = model.predict(X_test)
y_pred = np.argmax(y_pred_prob, axis=1)

# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(cm)

# Calculate per-class accuracy
class_accuracies = {}
for i in range(3):
    if cm[i].sum() > 0:
        acc = cm[i, i] / cm[i].sum()
    else:
        acc = 0.0
    class_accuracies[i] = acc
    print(f"Accuracy for class {i} ({inv_label_map[i]}): {acc:.4f}")

avg_class_acc = np.mean(list(class_accuracies.values()))
print(f"\nAverage Classification Accuracy: {avg_class_acc:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=["low", "normal", "high"]))
