In [23]:
import pandas as pd
from os import path
import numpy as np

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight


from regimetry.config import Config
from regimetry.logger_manager import LoggerManager


In [24]:

logging = LoggerManager.get_logger("regime_transistion_predictions")

In [25]:
cfg = Config()
print("PROJECT ROOT:", cfg.PROJECT_ROOT)
print("BASE DIR:", cfg.BASE_DIR)
print("RAW DATA:", cfg.RAW_DATA_DIR)
print("PROCESSED DATA:", cfg.PROCESSED_DATA_DIR)
print("EMBEDDINGS DATA:", cfg.EMBEDDINGS_DIR) 
print("REPORTS DATA:", cfg.REPORTS_DIR) 

PROJECT ROOT: /Users/kenneth/Public/projects/python/ai/regimetry
BASE DIR: /Users/kenneth/Public/projects/python/ai/regimetry/artifacts
RAW DATA: /Users/kenneth/Public/projects/python/ai/regimetry/artifacts/data/raw
PROCESSED DATA: /Users/kenneth/Public/projects/python/ai/regimetry/artifacts/data/processed
EMBEDDINGS DATA: /Users/kenneth/Public/projects/python/ai/regimetry/artifacts/embeddings
REPORTS DATA: /Users/kenneth/Public/projects/python/ai/regimetry/artifacts/reports


In [26]:
cluster_path = path.join(cfg.REPORTS_DIR,"CAD_CHF_ws5_learnable80_nc12", "cluster_assignments.csv")


In [27]:
# Re-define the cluster_seq and rebuild the dataset to ensure y is available in this session
df = pd.read_csv(cluster_path)
cluster_ids = df["Cluster_ID"].dropna().astype(int).tolist()



In [28]:

# === Label Encode all cluster IDs consistently ===
encoder = LabelEncoder()
encoded_ids = encoder.fit_transform(cluster_ids)
n_clusters = len(encoder.classes_)



In [29]:
# === Build X, y sequences ===
window_size = 15
X, y = [], []
for i in range(len(encoded_ids) - window_size):
    X.append(encoded_ids[i:i+window_size])
    y.append(encoded_ids[i+window_size])

X = np.array(X)
y = np.array(y)
# Confirm y is now defined
y[:5]

array([10, 10, 10, 10,  3])

In [30]:
# === Train-test split ===
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, shuffle=False
)


In [31]:
# === Class weights to handle imbalance ===
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(y_train),
    y=y_train
)
class_weight_dict = {i: w for i, w in enumerate(class_weights)}

In [32]:
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,            # Stop after 10 epochs without improvement
    restore_best_weights=True,
    verbose=1
)

In [33]:
# === Build Improved LSTM Model ===
model = Sequential([
    Embedding(input_dim=n_clusters, output_dim=64),         # richer embedding
    LSTM(64, return_sequences=True),
    Dropout(0.2),
    LSTM(32),
    Dense(n_clusters, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# === Train ===
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=32,
    validation_split=0.1,
    #class_weight=class_weight_dict,
    callbacks=[early_stopping],    
    verbose=1
)

# === Evaluate ===
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print(f"\n✅ LSTM accuracy: {acc * 100:.2f}%")

Epoch 1/100
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.3540 - loss: 2.1872 - val_accuracy: 0.0604 - val_loss: 2.5758
Epoch 2/100
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5051 - loss: 1.3159 - val_accuracy: 0.2416 - val_loss: 2.5742
Epoch 3/100
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6835 - loss: 1.0097 - val_accuracy: 0.2953 - val_loss: 2.6129
Epoch 4/100
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7669 - loss: 0.8181 - val_accuracy: 0.3154 - val_loss: 2.2373
Epoch 5/100
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7708 - loss: 0.7477 - val_accuracy: 0.2886 - val_loss: 2.5224
Epoch 6/100
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8160 - loss: 0.6372 - val_accuracy: 0.3087 - val_loss: 2.4324
Epoch 7/100
[1m42/42[0m [32m━━