In [6]:
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Bidirectional
from tensorflow.keras.utils import to_categorical

In [None]:
# Load the dataset properly
def load_dataset(folder):
    """
    Loads QPO dataset from multiple CSV files, extracting sequences and labels.
    
    Parameters:
    - folder: Path to the dataset directory containing CSV files.

    Returns:
    - data: NumPy array of sequences
    - labels: NumPy array of labels (1 = QPO, 0 = Non-QPO)
    """
    data = []
    labels = []
    
    for filename in os.listdir(folder):
        if filename.endswith(".csv"):
            df = pd.read_csv(os.path.join(folder, filename))
            
            # Extract sequences and labels
            sequences = df.drop(columns=["label"]).values  # Extract time-series data
            file_labels = df["label"].values  # Extract labels from CSV

            data.append(sequences)
            labels.append(file_labels)

    # Convert lists to NumPy arrays
    return np.vstack(data), np.hstack(labels)

# Define folder to load dataset
DATASET_FOLDER = "qpo_dataset"

# Load data and labels
data, labels = load_dataset(DATASET_FOLDER)

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42, stratify=labels)

# Reshape data for LSTM input (LSTMs expect 3D input: samples, timesteps, features)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Convert labels to categorical (one-hot encoding for softmax output)
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# Compute class weights to handle imbalance
class_weights = compute_class_weight(class_weight="balanced", classes=np.unique(labels), y=labels)
class_weight_dict = {i: class_weights[i] for i in range(len(class_weights))}

# Define the improved LSTM model
model = Sequential([
    Bidirectional(LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2]))),
    BatchNormalization(),
    Dropout(0.3),

    Bidirectional(LSTM(64, return_sequences=True)),
    BatchNormalization(),
    Dropout(0.3),

    Bidirectional(LSTM(32)),
    Dropout(0.3),

    Dense(32, activation='relu'),
    Dropout(0.2),

    Dense(2, activation='softmax')  # Two classes: QPO (1) and Non-QPO (0)
])

# Compile the model with Adam optimizer
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model with class weights
history = model.fit(
    X_train, y_train,
    epochs=30,  
    batch_size=64,
    validation_data=(X_test, y_test),
    class_weight=class_weight_dict,
    verbose=2
)


Epoch 1/30


  super().__init__(**kwargs)


2250/2250 - 359s - 159ms/step - accuracy: 0.7412 - loss: 0.5063 - val_accuracy: 0.7085 - val_loss: 0.6550
Epoch 2/30
2250/2250 - 342s - 152ms/step - accuracy: 0.8521 - loss: 0.3527 - val_accuracy: 0.7889 - val_loss: 0.4426
Epoch 3/30
2250/2250 - 376s - 167ms/step - accuracy: 0.8757 - loss: 0.3105 - val_accuracy: 0.8715 - val_loss: 0.3094
Epoch 4/30
2250/2250 - 387s - 172ms/step - accuracy: 0.8848 - loss: 0.2900 - val_accuracy: 0.8896 - val_loss: 0.2759
Epoch 5/30
2250/2250 - 517s - 230ms/step - accuracy: 0.8912 - loss: 0.2773 - val_accuracy: 0.8734 - val_loss: 0.3064
Epoch 6/30
2250/2250 - 381s - 169ms/step - accuracy: 0.8952 - loss: 0.2679 - val_accuracy: 0.9020 - val_loss: 0.2506
Epoch 7/30
2250/2250 - 344s - 153ms/step - accuracy: 0.8990 - loss: 0.2609 - val_accuracy: 0.8987 - val_loss: 0.2669
Epoch 8/30
2250/2250 - 2480s - 1s/step - accuracy: 0.9011 - loss: 0.2568 - val_accuracy: 0.9013 - val_loss: 0.2567
Epoch 9/30
2250/2250 - 457s - 203ms/step - accuracy: 0.9049 - loss: 0.2481 - 

In [3]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test, verbose=2)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

1125/1125 - 28s - 25ms/step - accuracy: 0.9310 - loss: 0.1820
Test Accuracy: 93.10%


In [4]:
model.save('my_model.keras')


In [7]:
new_model = tf.keras.models.load_model('my_model.keras')

# Show the model architecture
new_model.summary()