In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.optimizers import AdamW
from tensorflow.keras.regularizers import l2
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

# 1. Preprocessing Data

In [None]:
# Load data
df = pd.read_csv("data/ilapak3/train.csv")
df['times'] = pd.to_datetime(df['times'])
df = df.sort_values(by='times').reset_index(drop=True)

# Pick just data 2025
# df = df[df["times"].dt.year == 2025]
df.info()

In [None]:
df["Condition"].value_counts()

# Features Engineering

### Times Columns

In [None]:
df["month"] = df["times"].dt.month
df["week"] = df["times"].dt.isocalendar().week
df["day"] = df["times"].dt.day
df["hour"] = df["times"].dt.hour
df["minute"] = df["times"].dt.minute
df.drop(columns=["times"], inplace=True)
df.head(2)

### Diff in Sealing Vertical & Horizontal

In [5]:
df["diff_sealing_vertical"] = df["Suhu Sealing Vertical Atas (oC)"] - df["Suhu Sealing Vertikal Bawah (oC)"]
df["diff_sealing_horizontal"] = df["Suhu Sealing Horizontal Depan/Kanan (oC)"] - df["Suhu Sealing Horizontal Belakang/Kiri (oC )"]

df["diff_sealing_top_vertical"] = df["Suhu Sealing Vertical Atas (oC)"].diff().fillna(0)
df["diff_sealing_bottom_vertical"] = df["Suhu Sealing Vertikal Bawah (oC)"].diff().fillna(0)
df["diff_sealing_top_horizontal"] = df["Suhu Sealing Horizontal Depan/Kanan (oC)"].diff().fillna(0)
df["diff_sealing_bottom_horizontal"] = df["Suhu Sealing Horizontal Belakang/Kiri (oC )"].diff().fillna(0)

### Diff in Output & Reject

In [6]:
df["diff_output"] = df["Counter Output (pack)"] - df["Counter Reject (pack)"]
df['diff_counter_output'] = df['Counter Output (pack)'].diff().fillna(0)
df['diff_counter_reject'] = df['Counter Reject (pack)'].diff().fillna(0)

### Diff in Counter Time 

In [7]:
df["diff_output_time"] = df["Output Time_sec"].diff().fillna(0)

### Statistical Features

In [None]:
# Mean
df["rolling_mean_sealing_vertical"] = df["Suhu Sealing Vertical Atas (oC)"].rolling(window=10).mean().fillna(method="bfill")
df["rolling_mean_sealing_horizontal"] = df["Suhu Sealing Horizontal Depan/Kanan (oC)"].rolling(window=10).mean().fillna(method="bfill")
df["rolling_mean_sealing_top_vertical"] = df["Suhu Sealing Vertical Atas (oC)"].rolling(window=10).mean().fillna(method="bfill")
df["rolling_mean_sealing_bottom_vertical"] = df["Suhu Sealing Vertikal Bawah (oC)"].rolling(window=10).mean().fillna(method="bfill")
df["rolling_mean_sealing_top_horizontal"] = df["Suhu Sealing Horizontal Depan/Kanan (oC)"].rolling(window=10).mean().fillna(method="bfill")
df["rolling_mean_sealing_bottom_horizontal"] = df["Suhu Sealing Horizontal Belakang/Kiri (oC )"].rolling(window=10).mean().fillna(method="bfill")
df["rolling_mean_output"] = df["Counter Output (pack)"].rolling(window=10).mean().fillna(method="bfill")
df["rolling_mean_reject"] = df["Counter Reject (pack)"].rolling(window=10).mean().fillna(method="bfill")

# STD
df["rolling_std_sealing_vertical"] = df["Suhu Sealing Vertical Atas (oC)"].rolling(window=10).std().fillna(method="bfill")
df["rolling_std_sealing_horizontal"] = df["Suhu Sealing Horizontal Depan/Kanan (oC)"].rolling(window=10).std().fillna(method="bfill")
df["rolling_std_sealing_top_vertical"] = df["Suhu Sealing Vertical Atas (oC)"].rolling(window=10).std().fillna(method="bfill")
df["rolling_std_sealing_bottom_vertical"] = df["Suhu Sealing Vertikal Bawah (oC)"].rolling(window=10).std().fillna(method="bfill")
df["rolling_std_sealing_top_horizontal"] = df["Suhu Sealing Horizontal Depan/Kanan (oC)"].rolling(window=10).std().fillna(method="bfill")
df["rolling_std_sealing_bottom_horizontal"] = df["Suhu Sealing Horizontal Belakang/Kiri (oC )"].rolling(window=10).std().fillna(method="bfill")
df["rolling_std_output"] = df["Counter Output (pack)"].rolling(window=10).std().fillna(method="bfill")
df["rolling_std_reject"] = df["Counter Reject (pack)"].rolling(window=10).std().fillna(method="bfill")

### Interaction Features

In [10]:
df['reject_ratio'] = df['Counter Reject (pack)'] / (df['Counter Output (pack)'] + 1)
df['efficiency'] = df['Output Time_sec'] / df['Total Time_sec']

# Lag Features
df['lag_1_reject_ratio'] = df['reject_ratio'].shift(1).fillna(0)
df['lag_1_efficiency'] = df['efficiency'].shift(1).fillna(0)

# Rate of Change (ROC)
df['roc_quality'] = df['Quality(%)'].pct_change().fillna(0)
df['roc_reject_ratio'] = df['reject_ratio'].pct_change().fillna(0)
df['roc_efficiency'] = df['efficiency'].pct_change().fillna(0)

# Drop original columns
df.drop(columns=[
    "Suhu Sealing Vertical Atas (oC)", "Suhu Sealing Vertikal Bawah (oC)",
    "Suhu Sealing Horizontal Depan/Kanan (oC)", "Suhu Sealing Horizontal Belakang/Kiri (oC )",
    "Counter Output (pack)", "Counter Reject (pack)", "Output Time_sec"
], inplace=True)

In [None]:
plt.figure(figsize=(20, 12))
sns.heatmap(df.corr(), annot=True, cbar=False, cmap='coolwarm', linewidths=0.5, fmt='.2f')

### Select Feature with correlation value >= 0.1

In [None]:
# Select features with correlation >= 0.1 with 'Condition'
features = df.corr()['Condition'].abs().sort_values(ascending=False)
selected_features = features[features >= 0.1].index.tolist()

df_selected = df[selected_features].copy()
df_selected

In [None]:
sns.heatmap(df_selected.corr(), annot=True, cbar=False, cmap='coolwarm', linewidths=0.5, fmt='.2f')

In [None]:
# Drop features that is redundant or not useful
df_selected.drop(columns=[
    "diff_sealing_vertical", "rolling_mean_sealing_horizontal", "diff_output"
], inplace=True)

sns.heatmap(df_selected.corr(), annot=True, cbar=False, cmap='coolwarm', linewidths=0.5, fmt='.2f')

# LSTM Pipeline

In [15]:
categorical_cols = ['Status']

continuous_cols = ['rolling_mean_sealing_bottom_vertical',
                   'diff_sealing_horizontal', 'rolling_mean_sealing_top_horizontal',
                   'Quality(%)', 'rolling_std_sealing_bottom_vertical',
                   'rolling_std_sealing_bottom_horizontal', 'rolling_mean_output']

target_col = "Condition"

timesteps = 10
n_splits = 5
n_classes = 3

In [None]:
X = df[categorical_cols + continuous_cols]
y = df[target_col]

scaler = StandardScaler()
X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

def create_sequences(X: pd.DataFrame, y: pd.Series, timesteps: int = 10):
    Xs, ys = [], []
    for i in range(timesteps, len(X)):
        Xs.append(X.iloc[i-timesteps:i].values)
        ys.append(y.iloc[i])
    return np.array(Xs), np.array(ys)

# Time Series Split
def time_series_split(X, y, train_ratio=0.8):
    """Proper time series split"""
    n = len(X)
    train_end = int(n * train_ratio)
    
    X_train = X.iloc[:train_end]
    X_test = X.iloc[train_end:]
    
    y_train = y.iloc[:train_end]
    y_test = y.iloc[train_end:]
    
    return X_train, X_test, y_train, y_test

# Split
X_train, X_test, y_train, y_test = time_series_split(X, y)

# Sequence
X_train_seq, y_train_seq = create_sequences(X_train, y_train, timesteps)
X_test_seq, y_test_seq = create_sequences(X_test, y_test, timesteps)

# Print shape
print(f"X train seq: {X_train_seq.shape}, y train seq: {y_train_seq.shape}")
print(f"X test seq: {X_test_seq.shape}, y test seq: {y_test_seq.shape}")

In [None]:
n_features = X_train_seq.shape[2]
y_train_cat = to_categorical(y_train_seq, n_classes)
y_test_cat = to_categorical(y_test_seq, n_classes)

model = Sequential([
        Bidirectional(LSTM(16, return_sequences=True), 
                     input_shape=(timesteps, n_features)),
        Dropout(0.3),
        
        Bidirectional(LSTM(32, return_sequences=False)),
        Dropout(0.3),
        
        Dense(16, activation='relu'),
        Dropout(0.3),
        
        Dense(n_classes, activation='softmax')
])

model.summary()

In [None]:
# Define Focal Loss to make the model more focused on the minority class
def focal_loss(gamma=2.0, alpha=0.75):
    """Focal loss for imbalanced classes"""
    def loss_fn(y_true, y_pred):
        epsilon = K.epsilon()
        y_pred = K.clip(y_pred, epsilon, 1.0 - epsilon)
        # Cross entropy
        ce = -y_true * K.log(y_pred)
        # Focal weight
        p_t = K.sum(y_true * y_pred, axis=-1)
        alpha_t = alpha
        focal_weight = alpha_t * K.pow(1 - p_t, gamma)
        # Apply focal weight
        focal_loss = focal_weight * K.sum(ce, axis=-1)
        
        return K.mean(focal_loss)
    
    return loss_fn

# Training
model.compile(
    loss=focal_loss(gamma=2.0, alpha=0.75),
    optimizer=AdamW(learning_rate=0.0001, weight_decay=1e-5),
    metrics=['accuracy']
)

early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

model.fit(
    X_train_seq,
    y_train_cat,
    epochs=10,
    batch_size=64,
    validation_data=(X_test_seq, y_test_cat),
    callbacks=[early_stopping],
)

# Evaluation