In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import random

# -------------------- Load Data --------------------
X = np.load(r"preprocessed\ALL_X.npy")
y = np.load(r"preprocessed\ALL_y.npy")

encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)
print("Classes:", encoder.classes_)
X = X.reshape((X.shape[0], X.shape[1], 1))

print("Dataset shape:", X.shape)
print("Labels distribution:", np.unique(y_encoded, return_counts=True))

# -------------------- Prepare Cross Validation --------------------
random_state = np.random.randint(0, 10000)
print(f"ðŸŽ² Random state used for this run: {random_state}")

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state)
fold_indices = [(train_idx, test_idx) for train_idx, test_idx in kfold.split(X, y_encoded)]

# Save indices for reproducibility
os.makedirs("results", exist_ok=True)
np.save("results/fold_indices.npy", np.array(fold_indices, dtype=object), allow_pickle=True)

# -------------------- Training --------------------
acc_per_fold = []
conf_matrices = []

for fold_no, (train_val_idx, test_idx) in enumerate(fold_indices, start=1):
    print(f"\nðŸ”¹ Fold {fold_no}")

    # Split into train/val/test
    X_train_val, X_test = X[train_val_idx], X[test_idx]
    y_train_val, y_test = y_encoded[train_val_idx], y_encoded[test_idx]

    X_train, X_val, y_train, y_val = train_test_split(
        X_train_val, y_train_val, test_size=0.1765, stratify=y_train_val, random_state=42
    )

    print(f"Train: {len(X_train)}, Val: {len(X_val)}, Test: {len(X_test)}")

    # Define model
    model = Sequential([
        Conv1D(32, 5, activation='relu', input_shape=(X.shape[1], 1)),
        MaxPooling1D(2),
        Dropout(0.2),
        Conv1D(64, 5, activation='relu'),
        MaxPooling1D(2),
        Dropout(0.2),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.3),
        Dense(3, activation='softmax')
    ])
    model.compile(optimizer=Adam(1e-4), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Train model
    history = model.fit(X_train, y_train, epochs=40, batch_size=32, validation_data=(X_val, y_val), verbose=1)

    # Evaluate
    test_loss, test_acc = model.evaluate(X_test, y_test)
    acc_per_fold.append(test_acc)
    print(f"Fold {fold_no} - Test Accuracy: {test_acc:.4f}")

    # Save model weights
    weight_path = f"results/model_fold{fold_no}.h5"
    model.save_weights(weight_path)
    print(f"âœ… Weights saved to {weight_path}")

    # Confusion Matrix
    y_pred = np.argmax(model.predict(X_test), axis=1)
    cm = confusion_matrix(y_test, y_pred)
    conf_matrices.append(cm)

    plt.figure(figsize=(5, 4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=encoder.classes_, yticklabels=encoder.classes_)
    plt.title(f"Fold {fold_no} Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.tight_layout()
    plt.savefig(f"results/confusion_fold{fold_no}.png")
    plt.close()

# -------------------- Summary --------------------
print("\nðŸ“Š Mean Accuracy across folds:", np.mean(acc_per_fold))

# Combine confusion matrices
total_cm = np.sum(conf_matrices, axis=0)
plt.figure(figsize=(5, 4))
sns.heatmap(total_cm, annot=True, fmt='d', cmap='Greens', xticklabels=encoder.classes_, yticklabels=encoder.classes_)
plt.title("Overall Confusion Matrix (All Folds)")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.tight_layout()
plt.savefig("results/confusion_overall.png")

print("âœ… Training completed! All weights, folds, and confusion matrices saved in 'results/' folder.")


ModuleNotFoundError: No module named 'numpy'